X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TCImplementation.h;h=0f0cab49391db87a9082c3e3128151964bc13eec;hb=65e613f046f3316769cc66eb27bc891d90ec3dc6;hp=5e488db94da8a623ace9f6dad8df4ead78643732;hpb=40ddf9aca842bdc081b6350a4ebfe36b066c94c9;p=SXSI%2FTextCollection.git diff --git a/TCImplementation.h b/TCImplementation.h index 5e488db..0f0cab4 100644 --- a/TCImplementation.h +++ b/TCImplementation.h @@ -30,6 +30,7 @@ #include #include #include +#include // Re-define word size to ulong: #undef W @@ -43,8 +44,6 @@ namespace SXSI { -// Un-comment to compare BWT against a BWT generated from class dynFMI: -//#define CSA_TEST_BWT /** * Implementation of the TextCollection interface @@ -125,18 +124,11 @@ private: // Total number of texts in the collection unsigned numberOfTexts; - // Total number of texts including empty texts - unsigned numberOfAllTexts; // Length of the longest text ulong maxTextLength; // Array of document id's in the order of end-markers in BWT - // Access by endmarkerDocId[rank_$(L, p) - 1]. - BlockArray *endmarkerDocId; - - // FIXME Replace with a more succinct data structure - // Note: Empty texts are already handled inside XMLTree class. - BSGAP *emptyTextRank; // FIXME Remove + static_sequence *Doc; // Following are not part of the public API uchar * BWT(uchar *); @@ -161,7 +153,50 @@ private: return alphabetrank->rank(0, ep) - ranksp; } - unsigned CountEndmarkers(TextPosition, TextPosition, DocId, DocId) const; + + /** + * Count end-markers in given interval and + * within docIds [min,max] + */ + inline unsigned CountEndmarkers(TextPosition sp, TextPosition ep, DocId min, DocId max) const + { + if (sp != 0) + sp = alphabetrank->rank(0, sp - 1); + ep = alphabetrank->rank(0, ep); + if (ep == 0) + return 0; + + return Doc->count(sp, ep-1, min, max); + } + + /** + * Enumerate all end-markers in given interval + */ + inline document_result EnumerateEndmarkers(TextPosition sp, TextPosition ep) const + { + if (sp != 0) + sp = alphabetrank->rank(0, sp - 1); + ep = alphabetrank->rank(0, ep); + if (ep == 0) + return document_result(); + + return Doc->accessAll(sp, ep-1); + } + + /** + * Enumerate end-markers in given interval and + * within docIds [min,max] + */ + inline document_result EnumerateEndmarkers(TextPosition sp, TextPosition ep, DocId min, DocId max) const + { + if (sp != 0) + sp = alphabetrank->rank(0, sp - 1); + ep = alphabetrank->rank(0, ep); + if (ep == 0) + return document_result(); + + return Doc->access(sp, ep-1, min, max); + } }; // class TCImplementation } // namespace SXSI