X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TCImplementation.h;h=5430f21427093b5a74b51dd3a011be0ee32bdf5a;hb=2ff99d618e7f031bcfd95895c982e0958ae58d8c;hp=5e488db94da8a623ace9f6dad8df4ead78643732;hpb=40ddf9aca842bdc081b6350a4ebfe36b066c94c9;p=SXSI%2FTextCollection.git diff --git a/TCImplementation.h b/TCImplementation.h index 5e488db..5430f21 100644 --- a/TCImplementation.h +++ b/TCImplementation.h @@ -23,13 +23,13 @@ #include "BitRank.h" #include "TextCollection.h" #include "BlockArray.h" -#include "BSGAP.h" // Include from XMLTree/libcds #include // Defines W == 32 #include #include #include +#include // Re-define word size to ulong: #undef W @@ -43,8 +43,6 @@ namespace SXSI { -// Un-comment to compare BWT against a BWT generated from class dynFMI: -//#define CSA_TEST_BWT /** * Implementation of the TextCollection interface @@ -119,24 +117,17 @@ private: static_sequence * alphabetrank; // Sample structures for texts longer than samplerate - BSGAP *sampled; // FIXME Replace with RRR02 + static_bitsequence * sampled; // FIXME Replace with RRR02 BlockArray *suffixes; BlockArray *suffixDocId; // Total number of texts in the collection unsigned numberOfTexts; - // Total number of texts including empty texts - unsigned numberOfAllTexts; // Length of the longest text ulong maxTextLength; // Array of document id's in the order of end-markers in BWT - // Access by endmarkerDocId[rank_$(L, p) - 1]. - BlockArray *endmarkerDocId; - - // FIXME Replace with a more succinct data structure - // Note: Empty texts are already handled inside XMLTree class. - BSGAP *emptyTextRank; // FIXME Remove + static_sequence *Doc; // Following are not part of the public API uchar * BWT(uchar *); @@ -161,7 +152,50 @@ private: return alphabetrank->rank(0, ep) - ranksp; } - unsigned CountEndmarkers(TextPosition, TextPosition, DocId, DocId) const; + + /** + * Count end-markers in given interval and + * within docIds [min,max] + */ + inline unsigned CountEndmarkers(TextPosition sp, TextPosition ep, DocId min, DocId max) const + { + if (sp != 0) + sp = alphabetrank->rank(0, sp - 1); + ep = alphabetrank->rank(0, ep); + if (ep == 0) + return 0; + + return Doc->count(sp, ep-1, min, max); + } + + /** + * Enumerate all end-markers in given interval + */ + inline document_result EnumerateEndmarkers(TextPosition sp, TextPosition ep) const + { + if (sp != 0) + sp = alphabetrank->rank(0, sp - 1); + ep = alphabetrank->rank(0, ep); + if (ep == 0) + return document_result(); + + return Doc->accessAll(sp, ep-1); + } + + /** + * Enumerate end-markers in given interval and + * within docIds [min,max] + */ + inline document_result EnumerateEndmarkers(TextPosition sp, TextPosition ep, DocId min, DocId max) const + { + if (sp != 0) + sp = alphabetrank->rank(0, sp - 1); + ep = alphabetrank->rank(0, ep); + if (ep == 0) + return document_result(); + + return Doc->access(sp, ep-1, min, max); + } }; // class TCImplementation } // namespace SXSI