#include "TextCollection.h"
#include "TextStorage.h"
-#include "Tools.h" // Defines ulong and uchar.
-
-#include <string>
-#include <vector>
-#include <utility> // Defines std::pair.
-#include <cstring> // Defines std::strlen, added by Kim
-
-// Un-comment to compare BWT against a BWT generated from class dynFMI:
-//#define TCB_TEST_BWT
+#include "Tools.h"
// Default samplerate for suffix array samples
-#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 64
+#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 32
// Default input length, used to calculate the buffer size.
#define TEXTCOLLECTION_DEFAULT_INPUT_LENGTH (150 * 1024 * 1024)
namespace SXSI
{
- struct TCBuilderRep; // Pimpl
-
/**
- * Build an instance of the TextCollection class.
+ * Builder for an instance of the TextCollection class.
*/
class TextCollectionBuilder
{
public:
- explicit TextCollectionBuilder(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE,
- ulong estimatedInputLength = TEXTCOLLECTION_DEFAULT_INPUT_LENGTH);
- ~TextCollectionBuilder();
+ // Index type defaults to FM-index.
+ // SWCSA can be used for natural language inputs.
+ // NB: Current SWCSA uses a lot of memory during construction!
+ enum index_type_t { index_type_default, index_type_swcsa, index_type_rlcsa };
+
+ static TextCollectionBuilder* create(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE,
+ index_type_t type = index_type_default,
+ ulong estimatedInputLength = TEXTCOLLECTION_DEFAULT_INPUT_LENGTH);
+
+
+ virtual ~TextCollectionBuilder() { };
/**
* Insert text
* index also. If false, text is added only to the TextCollection
* and can not be searched for.
*/
- void InsertText(uchar const *, bool index = true);
+ virtual void InsertText(uchar const *, bool index = true) = 0;
/**
* Make static
*
* TextStorage type defaults to TYPE_PLAIN_TEXT, another
* possible type is TYPE_LZ_INDEX.
*/
- TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT);
+ virtual TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT) = 0;
- private:
- // Using Pimpl idiom to hide RLCSA implementation.
- struct TCBuilderRep * p_;
+ protected:
+ // Protected constructor; use the static method TextCollectionBuilder::create()
+ TextCollectionBuilder() { };
+ private:
// No copy constructor or assignment
TextCollectionBuilder(TextCollectionBuilder const&);
TextCollectionBuilder& operator = (TextCollectionBuilder const&);
};
+
}
+
#endif