namespace SXSI
{
+
/**
* General interface for a text collection
*
// Type for text position (FIXME ulong or long?)
typedef ulong TextPosition;
- /**
- * Init an instance of a text collection object
- *
- * Returns a pointer to an object implementing this interface.
- */
- static TextCollection * InitTextCollection(unsigned samplerate = TEXTCOLLECTION_DEFAULT_SAMPLERATE);
/**
* Load from a file
*
* Throws an exception if std::fread() fails.
*
*/
- virtual void Load(FILE *, unsigned samplerate = 0) = 0;
+ static TextCollection* Load(FILE *, unsigned samplerate = 0);
+
/**
* Save data structure into a file
*
* Throws an exception if std::fwrite() fails.
*/
virtual void Save(FILE *) const = 0;
+
/**
* Virtual destructor
*/
virtual ~TextCollection() { };
- /**
- * Insert text
- *
- * Must be a zero-terminated string from alphabet [1,255].
- * Can not be called after makeStatic().
- * The i'th text insertion gets an identifier value i-1.
- * In other words, document identifiers start from 0.
- */
- virtual void InsertText(uchar const *) = 0;
- /**
- * Make static
- *
- * Convert to a static collection; reduces space and time complexities.
- * New texts can not be inserted after this operation.
+
+ /**
+ * Tests if the string pointed to by DocId is empty
*/
- virtual void MakeStatic() = 0;
-
+ virtual bool EmptyText(DocId) const = 0;
+
/**
* Displaying content
*
*
* Note: Parameters i and j are text positions inside the k'th text.
*/
- virtual uchar* GetText(DocId, TextPosition, TextPosition) const = 0;
+// virtual uchar* GetText(DocId, TextPosition, TextPosition) const = 0;
/**
* Returns backwards (reverse) iterator to the end of i'th text
*
* Note: Do we need this?
* Forward iterator would be really in-efficient compared to
- * getText(k) and getText(k, i, j).
+ * getText(k).
*
* TODO Define and implement const_reverse_iterator.
*/
virtual bool IsLessThan(uchar const *) const = 0;
/**
- * Counting queries
- *
+ * Existential queries for given DocId interval.
+ */
+ virtual bool IsPrefix(uchar const *, DocId, DocId) const = 0;
+ virtual bool IsSuffix(uchar const *, DocId, DocId) const = 0;
+ virtual bool IsEqual(uchar const *, DocId, DocId) const = 0;
+ virtual bool IsContains(uchar const *, DocId, DocId) const = 0;
+ virtual bool IsLessThan(uchar const *, DocId, DocId) const = 0;
+
+ /**
+ * Counting queries
+ * Result is the number of occurrences.
+ */
+ virtual ulong Count(uchar const *) const = 0;
+ /**
+ * More counting queries
* Result is the number of documents.
*/
virtual unsigned CountPrefix(uchar const *) const = 0;
virtual unsigned CountContains(uchar const *) const = 0;
virtual unsigned CountLessThan(uchar const *) const = 0;
+ /**
+ * Counting queries for given DocId interval
+ */
+ virtual unsigned CountPrefix(uchar const *, DocId, DocId) const = 0;
+ virtual unsigned CountSuffix(uchar const *, DocId, DocId) const = 0;
+ virtual unsigned CountEqual(uchar const *, DocId, DocId) const = 0;
+ virtual unsigned CountContains(uchar const *, DocId, DocId) const = 0;
+ virtual unsigned CountLessThan(uchar const *, DocId, DocId) const = 0;
+
/**
* Document reporting queries
*
virtual document_result Contains(uchar const *) const = 0;
virtual document_result LessThan(uchar const *) const = 0;
+ /**
+ * Document reporting queries for given DocId interval.
+ */
+ virtual document_result Prefix(uchar const *, DocId, DocId) const = 0;
+ virtual document_result Suffix(uchar const *, DocId, DocId) const = 0;
+ virtual document_result Equal(uchar const *, DocId, DocId) const = 0;
+ virtual document_result Contains(uchar const *, DocId, DocId) const = 0;
+ virtual document_result LessThan(uchar const *, DocId, DocId) const = 0;
+
/**
* Full reporting queries
*
// Data type for results
typedef std::vector<std::pair<DocId, TextPosition> > full_result;
virtual full_result FullContains(uchar const *) const = 0;
+ // Full reporting query for given DocId interval
+ virtual full_result FullContains(uchar const *, DocId, DocId) const = 0;
protected:
- // Protected constructor; call the static function InitTextCollection().
+ // Protected constructor; use TextCollectionBuilder
TextCollection() { };
// No copy constructor or assignment