X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TextCollectionBuilder.h;fp=TextCollectionBuilder.h;h=43d420459e61ec7b0f67b50b70bab3e851faaac3;hb=89dc22aee980ba16f757cd9a7f77478c2da50051;hp=6b3819a2ead0ff2aba0fa7ac9f3f350bd9ac556c;hpb=443151511a86083b21c1c06eb610f86b3aed35be;p=SXSI%2FTextCollection.git diff --git a/TextCollectionBuilder.h b/TextCollectionBuilder.h index 6b3819a..43d4204 100644 --- a/TextCollectionBuilder.h +++ b/TextCollectionBuilder.h @@ -23,18 +23,10 @@ #include "TextCollection.h" #include "TextStorage.h" -#include "Tools.h" // Defines ulong and uchar. - -#include -#include -#include // Defines std::pair. -#include // Defines std::strlen, added by Kim - -// Un-comment to compare BWT against a BWT generated from class dynFMI: -//#define TCB_TEST_BWT +#include "Tools.h" // Default samplerate for suffix array samples -#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 64 +#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 32 // Default input length, used to calculate the buffer size. #define TEXTCOLLECTION_DEFAULT_INPUT_LENGTH (150 * 1024 * 1024) @@ -42,17 +34,23 @@ namespace SXSI { - struct TCBuilderRep; // Pimpl - /** - * Build an instance of the TextCollection class. + * Builder for an instance of the TextCollection class. */ class TextCollectionBuilder { public: - explicit TextCollectionBuilder(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE, - ulong estimatedInputLength = TEXTCOLLECTION_DEFAULT_INPUT_LENGTH); - ~TextCollectionBuilder(); + // Index type defaults to FM-index. + // SWCSA can be used for natural language inputs. + // NB: Current SWCSA uses a lot of memory during construction! + enum index_type_t { index_type_default, index_type_swcsa }; + + static TextCollectionBuilder* create(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE, + index_type_t type = index_type_default, + ulong estimatedInputLength = TEXTCOLLECTION_DEFAULT_INPUT_LENGTH); + + + virtual ~TextCollectionBuilder() { }; /** * Insert text @@ -66,7 +64,7 @@ namespace SXSI * index also. If false, text is added only to the TextCollection * and can not be searched for. */ - void InsertText(uchar const *, bool index = true); + virtual void InsertText(uchar const *, bool index = true) = 0; /** * Make static * @@ -76,15 +74,18 @@ namespace SXSI * TextStorage type defaults to TYPE_PLAIN_TEXT, another * possible type is TYPE_LZ_INDEX. */ - TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT); + virtual TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT) = 0; - private: - // Using Pimpl idiom to hide RLCSA implementation. - struct TCBuilderRep * p_; + protected: + // Protected constructor; use the static method TextCollectionBuilder::create() + TextCollectionBuilder() { }; + private: // No copy constructor or assignment TextCollectionBuilder(TextCollectionBuilder const&); TextCollectionBuilder& operator = (TextCollectionBuilder const&); }; + } + #endif