projects
/
SXSI
/
TextCollection.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Added support for non-indexed texts
[SXSI/TextCollection.git]
/
TextCollectionBuilder.h
diff --git
a/TextCollectionBuilder.h
b/TextCollectionBuilder.h
index
13734d2
..
6b3819a
100644
(file)
--- a/
TextCollectionBuilder.h
+++ b/
TextCollectionBuilder.h
@@
-22,7
+22,10
@@
#define _SXSI_TextCollectionBuilder_h_
#include "TextCollection.h"
#define _SXSI_TextCollectionBuilder_h_
#include "TextCollection.h"
+#include "TextStorage.h"
#include "Tools.h" // Defines ulong and uchar.
#include "Tools.h" // Defines ulong and uchar.
+
+#include <string>
#include <vector>
#include <utility> // Defines std::pair.
#include <cstring> // Defines std::strlen, added by Kim
#include <vector>
#include <utility> // Defines std::pair.
#include <cstring> // Defines std::strlen, added by Kim
@@
-33,6
+36,8
@@
// Default samplerate for suffix array samples
#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 64
// Default samplerate for suffix array samples
#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 64
+// Default input length, used to calculate the buffer size.
+#define TEXTCOLLECTION_DEFAULT_INPUT_LENGTH (150 * 1024 * 1024)
namespace SXSI
namespace SXSI
@@
-45,7
+50,8
@@
namespace SXSI
class TextCollectionBuilder
{
public:
class TextCollectionBuilder
{
public:
- explicit TextCollectionBuilder(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE);
+ explicit TextCollectionBuilder(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE,
+ ulong estimatedInputLength = TEXTCOLLECTION_DEFAULT_INPUT_LENGTH);
~TextCollectionBuilder();
/**
~TextCollectionBuilder();
/**
@@
-55,17
+61,25
@@
namespace SXSI
* Can not be called after makeStatic().
* The i'th text insertion gets an identifier value i-1.
* In other words, document identifiers start from 0.
* Can not be called after makeStatic().
* The i'th text insertion gets an identifier value i-1.
* In other words, document identifiers start from 0.
+ *
+ * Second parameter tells if the text will be added to the
+ * index also. If false, text is added only to the TextCollection
+ * and can not be searched for.
*/
*/
- void InsertText(uchar const *);
+ void InsertText(uchar const *
, bool index = true
);
/**
* Make static
*
/**
* Make static
*
- * Convert to a static collection
; reduces space and time complexities
.
+ * Convert to a static collection.
* New texts can not be inserted after this operation.
* New texts can not be inserted after this operation.
+ *
+ * TextStorage type defaults to TYPE_PLAIN_TEXT, another
+ * possible type is TYPE_LZ_INDEX.
*/
*/
- TextCollection * InitTextCollection();
+ TextCollection * InitTextCollection(
char type = TextStorage::TYPE_PLAIN_TEXT
);
private:
private:
+ // Using Pimpl idiom to hide RLCSA implementation.
struct TCBuilderRep * p_;
// No copy constructor or assignment
struct TCBuilderRep * p_;
// No copy constructor or assignment