// Type for text position (FIXME ulong or long?)
typedef ulong TextPosition;
+ // mode flag: Default includes both the index and "naive" text
+ enum index_mode_t { index_mode_default, index_mode_text_only }; // index_mode_index_only
+
/**
* Load from a file
*
+ * The second parameter is a prefix to be used for multiple
+ * files. (SWCSAWrapper uses multiple save files!)
+ *
* New samplerate can be given, otherwise will use the one specified in the save file!
- * Note: This is not a static method; call InitTextCollection() first to get the object handle.
*
* Throws an exception if std::fread() fails.
*
*/
- static TextCollection* Load(FILE *, unsigned samplerate = 0);
+ static TextCollection* Load(FILE *, char const *, index_mode_t = index_mode_default, unsigned samplerate = 0);
/**
* Save data structure into a file
- *
+ *
+ * The second parameter is a prefix to be used for multiple
+ * files. (SWCSAWrapper uses multiple save files!)
+ *
* Throws an exception if std::fwrite() fails.
*/
- virtual void Save(FILE *) const = 0;
+ virtual void Save(FILE *, char const *) const = 0;
/**
* Virtual destructor
*
* Returns the i'th text in the collection.
* The numbering starts from 0.
+ *
+ * Call DeleteText() for each pointer returned by GetText()
+ * to avoid possible memory leaks.
*/
virtual uchar* GetText(DocId) const = 0;
+ virtual void DeleteText(uchar *text) const = 0;
+
+ /**
+ * Returns a pointer to the beginning of texts i, i+1, ..., j.
+ * Texts are separated by a '\0' byte.
+ *
+ * Call DeleteText() for each pointer returned by GetText()
+ * to avoid possible memory leaks.
+ */
+ virtual uchar * GetText(DocId i, DocId j) const = 0;
+
/**
* Returns substring [i, j] of k'th text
*
virtual document_result Equal(uchar const *) const = 0;
virtual document_result Contains(uchar const *) const = 0;
virtual document_result LessThan(uchar const *) const = 0;
-
+ virtual document_result KMismaches(uchar const *, unsigned) const = 0;
+ virtual document_result KErrors(uchar const *, unsigned) const = 0;
+
/**
* Document reporting queries for given DocId interval.
*/
// Full reporting query for given DocId interval
virtual full_result FullContains(uchar const *, DocId, DocId) const = 0;
+ virtual full_result FullKMismatches(uchar const *, unsigned) const = 0;
+ virtual full_result FullKErrors(uchar const *, unsigned) const = 0;
+
+
+ virtual TextPosition getLength() const
+ {
+ std::cerr << "TextCollection::getLength() is unsupported! Use RLCSA instead." << std::endl;
+ std::exit(2);
+ return 0;
+ }
+
+ virtual TextPosition LF(uchar c, TextPosition i) const
+ {
+ std::cerr << "TextCollection::LF() is unsupported! Use RLCSA instead." << std::endl;
+ std::exit(2);
+ return 0;
+ }
+
+ virtual uchar* getSuffix(TextPosition pos, unsigned l) const
+ {
+ std::cerr << "TextCollection::getSuffix() is unsupported! Use RLCSA instead." << std::endl;
+ std::exit(2);
+ return 0;
+ }
+
+ virtual DocId getDoc(TextPosition i) const
+ {
+ std::cerr << "TextCollection::getDoc() is unsupported! Use RLCSA instead." << std::endl;
+ std::exit(2);
+ return 0;
+ }
+
+
protected:
// Protected constructor; use TextCollectionBuilder
TextCollection() { };
+// index_mode_t indexMode;
+
// No copy constructor or assignment
TextCollection(TextCollection const&);
TextCollection& operator = (TextCollection const&);