#include "BitRank.h"
#include "dynFMI.h"
#include "TextCollection.h"
+#include "BlockArray.h"
/**
* Implementation of the TextCollection interface
*/
void InsertText(uchar const *);
void MakeStatic();
+ bool EmptyText(DocId) const;
uchar* GetText(DocId) const;
uchar* GetText(DocId, TextPosition, TextPosition) const;
// full_result is inherited from SXSI::TextCollection.
full_result FullContains(uchar const *) const;
- // TODO implement:
+ // Index from/to disk
void Load(FILE *, unsigned);
- void Save(FILE *);
+ void Save(FILE *) const;
+
+ // debug
+ TextPosition Lookup(TextPosition) const;
private:
class TCodeEntry {
~THuffAlphabetRank();
bool Test(uchar *, TextPosition);
- inline ulong rank(int c, TextPosition i) const { // returns the number of characters c before and including position i
+ inline TextPosition rank(int c, TextPosition i) const { // returns the number of characters c before and including position i
THuffAlphabetRank const * temp=this;
if (codetable[c].count == 0) return 0;
unsigned level = 0;
static const unsigned char report = 1;
TextPosition n;
unsigned samplerate;
- ulong C[256];
+ unsigned C[256];
TextPosition bwtEndPos;
THuffAlphabetRank *alphabetrank;
BitRank *sampled;
- ulong *suffixes;
- ulong *positions;
+ BlockArray *suffixes;
+ BlockArray *positions;
TCodeEntry *codetable;
DynFMI *dynFMI;
- // Map from end-marker in BWT to pair (textId, sampled text position)
- std::map<TextPosition, std::pair<DocId, TextPosition> > endmarkers;
- // Vector of pairs of <text length, text start position>
- std::vector<std::pair<TextPosition, TextPosition> > textLength;
+
+ // Total number of texts in the collection
+ unsigned numberOfTexts;
+ // Length of the longest text
+ ulong maxTextLength;
+
+ // Array of document id's in the order of end-markers in BWT
+ // Access by endmarkerDocId[rank_$(L, p) - 1].
+ BlockArray *endmarkerDocId;
+ // Array of text lengths (in the inserted order)
+ BlockArray *textLength;
+ // Array of text starting positions (in the inserted order)
+ BlockArray *textStartPos;
+
// Private methods
uchar * BWT(uchar *);
uchar * LoadFromFile(const char *);
void SaveToFile(const char *, uchar *);
+ void makewavelet(uchar *);
void maketables();
// Following are not part of the public API
DocId DocIdAtTextPos(TextPosition) const;
+ unsigned CountEndmarkers(TextPosition, TextPosition) const;
ulong Search(uchar const *, TextPosition, TextPosition *, TextPosition *) const;
- TextPosition Lookup(TextPosition) const;
TextPosition Inverse(TextPosition) const;
TextPosition LF(uchar c, TextPosition &sp, TextPosition &ep) const;
TextPosition Psi(TextPosition) const;