X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TextCollection.h;h=dad5c880ad4e7db431d0d6819cf49ecefe8c0105;hb=ee8a3e526fe7f39cdc075263824faf6c17389297;hp=37d5cb5ab37edb66a39e5ed8c4a36026b322cc8c;hpb=663cd2f6cc5e3796d001e8c527de0aea8c8bbf68;p=SXSI%2FTextCollection.git diff --git a/TextCollection.h b/TextCollection.h index 37d5cb5..dad5c88 100644 --- a/TextCollection.h +++ b/TextCollection.h @@ -30,6 +30,7 @@ namespace SXSI { + /** * General interface for a text collection * @@ -44,12 +45,6 @@ namespace SXSI // Type for text position (FIXME ulong or long?) typedef ulong TextPosition; - /** - * Init an instance of a text collection object - * - * Returns a pointer to an object implementing this interface. - */ - static TextCollection * InitTextCollection(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE); /** * Load from a file * @@ -59,38 +54,23 @@ namespace SXSI * Throws an exception if std::fread() fails. * */ - virtual void Load(FILE *, unsigned samplerate = 0) = 0; + static TextCollection* Load(FILE *, unsigned samplerate = 0); + /** * Save data structure into a file * * Throws an exception if std::fwrite() fails. */ virtual void Save(FILE *) const = 0; + /** * Virtual destructor */ virtual ~TextCollection() { }; - /** - * Insert text - * - * Must be a zero-terminated string from alphabet [1,255]. - * Can not be called after makeStatic(). - * The i'th text insertion gets an identifier value i-1. - * In other words, document identifiers start from 0. - */ - virtual void InsertText(uchar const *) = 0; - /** - * Make static - * - * Convert to a static collection; reduces space and time complexities. - * New texts can not be inserted after this operation. - */ - virtual void MakeStatic() = 0; - + /** - tests if the string pointed to by DocId is empty - */ - + * Tests if the string pointed to by DocId is empty + */ virtual bool EmptyText(DocId) const = 0; /** @@ -131,6 +111,15 @@ namespace SXSI // Is there a text that is lexicographically less than given string? virtual bool IsLessThan(uchar const *) const = 0; + /** + * Existential queries for given DocId interval. + */ + virtual bool IsPrefix(uchar const *, DocId, DocId) const = 0; + virtual bool IsSuffix(uchar const *, DocId, DocId) const = 0; + virtual bool IsEqual(uchar const *, DocId, DocId) const = 0; + virtual bool IsContains(uchar const *, DocId, DocId) const = 0; + virtual bool IsLessThan(uchar const *, DocId, DocId) const = 0; + /** * Counting queries * Result is the number of occurrences. @@ -146,6 +135,15 @@ namespace SXSI virtual unsigned CountContains(uchar const *) const = 0; virtual unsigned CountLessThan(uchar const *) const = 0; + /** + * Counting queries for given DocId interval + */ + virtual unsigned CountPrefix(uchar const *, DocId, DocId) const = 0; + virtual unsigned CountSuffix(uchar const *, DocId, DocId) const = 0; + virtual unsigned CountEqual(uchar const *, DocId, DocId) const = 0; + virtual unsigned CountContains(uchar const *, DocId, DocId) const = 0; + virtual unsigned CountLessThan(uchar const *, DocId, DocId) const = 0; + /** * Document reporting queries * @@ -159,6 +157,15 @@ namespace SXSI virtual document_result Contains(uchar const *) const = 0; virtual document_result LessThan(uchar const *) const = 0; + /** + * Document reporting queries for given DocId interval. + */ + virtual document_result Prefix(uchar const *, DocId, DocId) const = 0; + virtual document_result Suffix(uchar const *, DocId, DocId) const = 0; + virtual document_result Equal(uchar const *, DocId, DocId) const = 0; + virtual document_result Contains(uchar const *, DocId, DocId) const = 0; + virtual document_result LessThan(uchar const *, DocId, DocId) const = 0; + /** * Full reporting queries * @@ -167,9 +174,11 @@ namespace SXSI // Data type for results typedef std::vector > full_result; virtual full_result FullContains(uchar const *) const = 0; + // Full reporting query for given DocId interval + virtual full_result FullContains(uchar const *, DocId, DocId) const = 0; protected: - // Protected constructor; call the static function InitTextCollection(). + // Protected constructor; use TextCollectionBuilder TextCollection() { }; // No copy constructor or assignment