#include "BitRank.h"
#include "TextCollection.h"
#include "BlockArray.h"
-#include "BSGAP.h"
// Include from XMLTree/libcds
#include <basics.h> // Defines W == 32
#include <static_bitsequence.h>
#include <alphabet_mapper.h>
#include <static_sequence.h>
+#include <static_sequence_wvtree_noptrs.h>
// Re-define word size to ulong:
#undef W
namespace SXSI
{
-// Un-comment to compare BWT against a BWT generated from class dynFMI:
-//#define CSA_TEST_BWT
/**
* Implementation of the TextCollection interface
static_sequence * alphabetrank;
// Sample structures for texts longer than samplerate
- BSGAP *sampled; // FIXME Replace with RRR02
+ static_bitsequence * sampled; // FIXME Replace with RRR02
BlockArray *suffixes;
BlockArray *suffixDocId;
// Total number of texts in the collection
unsigned numberOfTexts;
- // Total number of texts including empty texts
- unsigned numberOfAllTexts;
// Length of the longest text
ulong maxTextLength;
// Array of document id's in the order of end-markers in BWT
- // Access by endmarkerDocId[rank_$(L, p) - 1].
- BlockArray *endmarkerDocId;
-
- // FIXME Replace with a more succinct data structure
- // Note: Empty texts are already handled inside XMLTree class.
- BSGAP *emptyTextRank; // FIXME Remove
+ static_sequence *Doc;
// Following are not part of the public API
uchar * BWT(uchar *);
return alphabetrank->rank(0, ep) - ranksp;
}
- unsigned CountEndmarkers(TextPosition, TextPosition, DocId, DocId) const;
+
+ /**
+ * Count end-markers in given interval and
+ * within docIds [min,max]
+ */
+ inline unsigned CountEndmarkers(TextPosition sp, TextPosition ep, DocId min, DocId max) const
+ {
+ if (sp != 0)
+ sp = alphabetrank->rank(0, sp - 1);
+ ep = alphabetrank->rank(0, ep);
+ if (ep == 0)
+ return 0;
+
+ return Doc->count(sp, ep-1, min, max);
+ }
+
+ /**
+ * Enumerate all end-markers in given interval
+ */
+ inline document_result EnumerateEndmarkers(TextPosition sp, TextPosition ep) const
+ {
+ if (sp != 0)
+ sp = alphabetrank->rank(0, sp - 1);
+ ep = alphabetrank->rank(0, ep);
+ if (ep == 0)
+ return document_result();
+
+ return Doc->accessAll(sp, ep-1);
+ }
+
+ /**
+ * Enumerate end-markers in given interval and
+ * within docIds [min,max]
+ */
+ inline document_result EnumerateEndmarkers(TextPosition sp, TextPosition ep, DocId min, DocId max) const
+ {
+ if (sp != 0)
+ sp = alphabetrank->rank(0, sp - 1);
+ ep = alphabetrank->rank(0, ep);
+ if (ep == 0)
+ return document_result();
+
+ return Doc->access(sp, ep-1, min, max);
+ }
}; // class TCImplementation
} // namespace SXSI