X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TextStorage.h;h=1f809b8e4878d186aee720a0cdf802dc05685617;hb=a56dda3a95d2df268969bd3c2c994b5d2f020410;hp=fcd66e58f3e0dcf4915c2c870a55392a8e711e3f;hpb=54d7f377217134290545df4c3f7c254478dc1f88;p=SXSI%2FTextCollection.git diff --git a/TextStorage.h b/TextStorage.h index fcd66e5..1f809b8 100644 --- a/TextStorage.h +++ b/TextStorage.h @@ -23,7 +23,16 @@ #include "TextCollection.h" #include "Tools.h" + #include "incbwt/bits/deltavector.h" +// Re-define word size to ulong: +#undef W +#if __WORDSIZE == 64 +# define W 64 +#else +# define W 32 +#endif + #include #include @@ -59,20 +68,22 @@ public: virtual ~TextStorage() { + delete offit_; + offit_ = 0; delete offsets_; - offsets_ = 0; + offsets_ = 0; } TextCollection::DocId DocIdAtTextPos(TextCollection::TextPosition i) const { assert(i < n_); - return offsets_->rank(i)-1; + return offit_->rank(i)-1; } TextCollection::TextPosition TextStartPos(TextCollection::DocId i) const { assert(i < (TextCollection::DocId)numberOfTexts_); - return offsets_->select(i); + return offit_->select(i); } bool IsEndmarker(TextCollection::TextPosition i) const @@ -80,7 +91,7 @@ public: assert(i < n_); if (i >= n_ - 1) return true; - return offsets_->isSet(i+1); + return offit_->isSet(i+1); } @@ -91,7 +102,7 @@ protected: const static CSA::usint DV_BLOCK_SIZE = 32; TextStorage(uchar const * text, TextPosition n) - : n_(n), offsets_(0), numberOfTexts_(0) + : n_(n), offsets_(0), offit_(0), numberOfTexts_(0) { // Delta encoded bitvector of text offsets. CSA::DeltaEncoder encoder(DV_BLOCK_SIZE); @@ -104,12 +115,8 @@ protected: offsets_ = new CSA::DeltaVector(encoder, n_); - -/* for (ulong i = 0; i < n_-1; ++i) - if ((text[i] == '\0') != IsEndmarker(i)) - std::cout << "misplaced endmarker at i = " << i << std::endl; -*/ - numberOfTexts_ = offsets_->rank(n_ - 1); + offit_ = new CSA::DeltaVector::Iterator(*(offsets_)); + numberOfTexts_ = offit_->rank(n_ - 1); } TextStorage(std::FILE *); @@ -117,6 +124,7 @@ protected: TextPosition n_; CSA::DeltaVector *offsets_; + CSA::DeltaVector::Iterator *offit_; TextPosition numberOfTexts_; }; @@ -157,7 +165,7 @@ public: { assert(docId < (TextCollection::DocId)numberOfTexts_); - TextPosition offset = offsets_->select(docId); + TextPosition offset = offit_->select(docId); return &text_[offset]; } @@ -166,7 +174,7 @@ public: assert(i < (TextCollection::DocId)numberOfTexts_); assert(j < (TextCollection::DocId)numberOfTexts_); - TextPosition offset = offsets_->select(i); + TextPosition offset = offit_->select(i); return &text_[offset]; } @@ -182,7 +190,7 @@ private: /****************************************************************** * LZ-index text collection. */ -struct LzTriePimpl; // Pimpl, declared in .cpp +struct LzTriePimpl; // Using Pimpl idiom to hide LzTrie implementation. class TextStorageLzIndex : public TextStorage { @@ -252,7 +260,7 @@ public: private: TextPosition n_; - uchar *text_; // FIXME Replace with a succinct representation. + uchar *text_; bool freeText; }; // class TextStorageBuilder