X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TextCollectionBuilder.h;h=1eb0275550fb05145972dfd0ad5399cf3400cdd0;hb=d660b6ec5cd55019d17188810b783a2e3a94fa49;hp=3c725124cc715ffdcd0e2b6e22c64f17f926c7fe;hpb=40ddf9aca842bdc081b6350a4ebfe36b066c94c9;p=SXSI%2FTextCollection.git diff --git a/TextCollectionBuilder.h b/TextCollectionBuilder.h index 3c72512..1eb0275 100644 --- a/TextCollectionBuilder.h +++ b/TextCollectionBuilder.h @@ -22,9 +22,21 @@ #define _SXSI_TextCollectionBuilder_h_ #include "TextCollection.h" +#include "TextStorage.h" #include "Tools.h" // Defines ulong and uchar. #include #include // Defines std::pair. +#include // Defines std::strlen, added by Kim + +// Un-comment to compare BWT against a BWT generated from class dynFMI: +//#define TCB_TEST_BWT + +// Default samplerate for suffix array samples +#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 64 + +// Default input length, used to calculate the buffer size. +#define TEXTCOLLECTION_DEFAULT_INPUT_LENGTH (150 * 1024 * 1024) + namespace SXSI { @@ -36,7 +48,8 @@ namespace SXSI class TextCollectionBuilder { public: - explicit TextCollectionBuilder(unsigned); + explicit TextCollectionBuilder(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE, + ulong estimatedInputLength = TEXTCOLLECTION_DEFAULT_INPUT_LENGTH); ~TextCollectionBuilder(); /** @@ -51,16 +64,18 @@ namespace SXSI /** * Make static * - * Convert to a static collection; reduces space and time complexities. + * Convert to a static collection. * New texts can not be inserted after this operation. + * + * TextStorage type defaults to TYPE_PLAIN_TEXT, another + * possible type is TYPE_LZ_INDEX. */ - TextCollection * InitTextCollection(); + TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT); private: struct TCBuilderRep * p_; // No copy constructor or assignment - TextCollectionBuilder(); TextCollectionBuilder(TextCollectionBuilder const&); TextCollectionBuilder& operator = (TextCollectionBuilder const&); };