namespace SXSI
{
-// Un-comment to compare BWT against a BWT generated from class dynFMI:
-//#define CSA_TEST_BWT
/**
* Implementation of the TextCollection interface
// Total number of texts in the collection
unsigned numberOfTexts;
- // Total number of texts including empty texts
- unsigned numberOfAllTexts;
// Length of the longest text
ulong maxTextLength;
// Array of document id's in the order of end-markers in BWT
// Access by endmarkerDocId[rank_$(L, p) - 1].
- BlockArray *endmarkerDocId;
-
- // FIXME Replace with a more succinct data structure
- // Note: Empty texts are already handled inside XMLTree class.
- BSGAP *emptyTextRank; // FIXME Remove
+ static_sequence *Doc;
// Following are not part of the public API
uchar * BWT(uchar *);
return alphabetrank->rank(0, ep) - ranksp;
}
- unsigned CountEndmarkers(TextPosition, TextPosition, DocId, DocId) const;
+
+ /**
+ * Count end-markers in given interval and
+ * within docIds [min,max]
+ */
+ inline unsigned CountEndmarkers(TextPosition sp, TextPosition ep, DocId min, DocId max) const
+ {
+ if (sp != 0)
+ sp = alphabetrank->rank(0, sp - 1);
+ ep = alphabetrank->rank(0, ep);
+ if (ep == 0)
+ return 0;
+
+ return Doc->count(sp, ep-1, min, max);
+ }
+
+ /**
+ * Enumerate all end-markers in given interval
+ */
+ inline document_result EnumerateEndmarkers(TextPosition sp, TextPosition ep) const
+ {
+ if (sp != 0)
+ sp = alphabetrank->rank(0, sp - 1);
+ ep = alphabetrank->rank(0, ep);
+ if (ep == 0)
+ return document_result();
+
+ return Doc->accessAll(sp, ep-1);
+ }
+
+ /**
+ * Enumerate end-markers in given interval and
+ * within docIds [min,max]
+ */
+ inline document_result EnumerateEndmarkers(TextPosition sp, TextPosition ep, DocId min, DocId max) const
+ {
+ if (sp != 0)
+ sp = alphabetrank->rank(0, sp - 1);
+ ep = alphabetrank->rank(0, ep);
+ if (ep == 0)
+ return document_result();
+
+ return Doc->access(sp, ep-1, min, max);
+ }
}; // class TCImplementation
} // namespace SXSI