projects
/
SXSI
/
TextCollection.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
LZ index support
[SXSI/TextCollection.git]
/
TextCollectionBuilder.h
diff --git
a/TextCollectionBuilder.h
b/TextCollectionBuilder.h
index
13734d2
..
1eb0275
100644
(file)
--- a/
TextCollectionBuilder.h
+++ b/
TextCollectionBuilder.h
@@
-22,6
+22,7
@@
#define _SXSI_TextCollectionBuilder_h_
#include "TextCollection.h"
#define _SXSI_TextCollectionBuilder_h_
#include "TextCollection.h"
+#include "TextStorage.h"
#include "Tools.h" // Defines ulong and uchar.
#include <vector>
#include <utility> // Defines std::pair.
#include "Tools.h" // Defines ulong and uchar.
#include <vector>
#include <utility> // Defines std::pair.
@@
-33,6
+34,8
@@
// Default samplerate for suffix array samples
#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 64
// Default samplerate for suffix array samples
#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 64
+// Default input length, used to calculate the buffer size.
+#define TEXTCOLLECTION_DEFAULT_INPUT_LENGTH (150 * 1024 * 1024)
namespace SXSI
namespace SXSI
@@
-45,7
+48,8
@@
namespace SXSI
class TextCollectionBuilder
{
public:
class TextCollectionBuilder
{
public:
- explicit TextCollectionBuilder(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE);
+ explicit TextCollectionBuilder(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE,
+ ulong estimatedInputLength = TEXTCOLLECTION_DEFAULT_INPUT_LENGTH);
~TextCollectionBuilder();
/**
~TextCollectionBuilder();
/**
@@
-60,10
+64,13
@@
namespace SXSI
/**
* Make static
*
/**
* Make static
*
- * Convert to a static collection
; reduces space and time complexities
.
+ * Convert to a static collection.
* New texts can not be inserted after this operation.
* New texts can not be inserted after this operation.
+ *
+ * TextStorage type defaults to TYPE_PLAIN_TEXT, another
+ * possible type is TYPE_LZ_INDEX.
*/
*/
- TextCollection * InitTextCollection();
+ TextCollection * InitTextCollection(
char type = TextStorage::TYPE_PLAIN_TEXT
);
private:
struct TCBuilderRep * p_;
private:
struct TCBuilderRep * p_;