X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TextCollection.h;h=dbc79822fe5bb33f7752fe65be84fc2d056b3f6c;hb=9042e7b1b181a50b15e8ec1b79d5a5c2f913bfff;hp=f41084da633e1557c06c451e13c190f1f381c40a;hpb=6a9b42550a6eb6922d0a5f5d30f5dbc18a1e0b66;p=SXSI%2FTextCollection.git diff --git a/TextCollection.h b/TextCollection.h index f41084d..dbc7982 100644 --- a/TextCollection.h +++ b/TextCollection.h @@ -25,11 +25,9 @@ #include #include // Defines std::pair. -// Default samplerate for suffix array samples -#define TEXTCOLLECTION_DEFAULT_SAMPLERATE 64 - namespace SXSI { + /** * General interface for a text collection * @@ -44,12 +42,6 @@ namespace SXSI // Type for text position (FIXME ulong or long?) typedef ulong TextPosition; - /** - * Init an instance of a text collection object - * - * Returns a pointer to an object implementing this interface. - */ - static TextCollection * InitTextCollection(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE); /** * Load from a file * @@ -59,38 +51,23 @@ namespace SXSI * Throws an exception if std::fread() fails. * */ - virtual void Load(FILE *, unsigned samplerate = 0) = 0; + static TextCollection* Load(FILE *, unsigned samplerate = 0); + /** * Save data structure into a file * * Throws an exception if std::fwrite() fails. */ virtual void Save(FILE *) const = 0; + /** * Virtual destructor */ virtual ~TextCollection() { }; - /** - * Insert text - * - * Must be a zero-terminated string from alphabet [1,255]. - * Can not be called after makeStatic(). - * The i'th text insertion gets an identifier value i-1. - * In other words, document identifiers start from 0. - */ - virtual void InsertText(uchar const *) = 0; - /** - * Make static - * - * Convert to a static collection; reduces space and time complexities. - * New texts can not be inserted after this operation. - */ - virtual void MakeStatic() = 0; - + /** - tests if the string pointed to by DocId is empty - */ - + * Tests if the string pointed to by DocId is empty + */ virtual bool EmptyText(DocId) const = 0; /** @@ -98,8 +75,22 @@ namespace SXSI * * Returns the i'th text in the collection. * The numbering starts from 0. + * + * Call DeleteText() for each pointer returned by GetText() + * to avoid possible memory leaks. */ virtual uchar* GetText(DocId) const = 0; + virtual void DeleteText(uchar *text) const = 0; + + /** + * Returns a pointer to the beginning of texts i, i+1, ..., j. + * Texts are separated by a '\0' byte. + * + * Call DeleteText() for each pointer returned by GetText() + * to avoid possible memory leaks. + */ + virtual uchar * GetText(DocId i, DocId j) const = 0; + /** * Returns substring [i, j] of k'th text * @@ -176,7 +167,9 @@ namespace SXSI virtual document_result Equal(uchar const *) const = 0; virtual document_result Contains(uchar const *) const = 0; virtual document_result LessThan(uchar const *) const = 0; - + virtual document_result KMismaches(uchar const *, unsigned) const = 0; + virtual document_result KErrors(uchar const *, unsigned) const = 0; + /** * Document reporting queries for given DocId interval. */ @@ -197,8 +190,11 @@ namespace SXSI // Full reporting query for given DocId interval virtual full_result FullContains(uchar const *, DocId, DocId) const = 0; + virtual full_result FullKMismatches(uchar const *, unsigned) const = 0; + virtual full_result FullKErrors(uchar const *, unsigned) const = 0; + protected: - // Protected constructor; call the static function InitTextCollection(). + // Protected constructor; use TextCollectionBuilder TextCollection() { }; // No copy constructor or assignment