X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TCImplementation.cpp;h=5959e6af991c9a7d3e5d803e77c20e9ee8f7bd85;hb=b14313eff81b64586da652b222540f4705796616;hp=a8303d8be74b7251ba2171dda41cda9f27c4499a;hpb=b472b8a9abdb1695994b4b23963de98c361dc66b;p=SXSI%2FTextCollection.git diff --git a/TCImplementation.cpp b/TCImplementation.cpp index a8303d8..5959e6a 100644 --- a/TCImplementation.cpp +++ b/TCImplementation.cpp @@ -19,6 +19,7 @@ *****************************************************************************/ #include "TCImplementation.h" +//#define DEBUG_MEMUSAGE #ifdef DEBUG_MEMUSAGE #include "HeapProfiler.h" // FIXME remove #endif @@ -40,7 +41,7 @@ namespace SXSI { // Save file version info -const uchar TCImplementation::versionFlag = 4; +const uchar TCImplementation::versionFlag = 6; /** * Constructor inits an empty dynamic FM-index. @@ -63,10 +64,12 @@ bool TCImplementation::EmptyText(DocId k) const return false; // Empty texts are not indexed } -uchar* TCImplementation::GetText(DocId k) const +uchar * TCImplementation::GetText(DocId k) const { assert(k < (DocId)numberOfTexts); - TextPosition i = k; + + return textStorage->GetText(k); +/* TextPosition i = k; string result; // Reserve average string length to avoid reallocs @@ -87,7 +90,7 @@ uchar* TCImplementation::GetText(DocId k) const res[i] = '\0'; for (ulong j = 0; j < i; ++j) res[i-j-1] = result[j]; - return res; + return res;*/ } /* * Not supported @@ -769,6 +772,7 @@ void TCImplementation::Save(FILE *file) const throw std::runtime_error("TCImplementation::Save(): file write error (maxTextLength)."); Doc->save(file); + textStorage->Save(file); fflush(file); } @@ -815,6 +819,7 @@ TCImplementation::TCImplementation(FILE *file, unsigned samplerate_) throw std::runtime_error("TCImplementation::Load(): file read error (maxTextLength)."); Doc = static_sequence::load(file); + textStorage = new TextStorage(file); // FIXME Construct data structures with new samplerate //maketables(); @@ -906,6 +911,7 @@ TCImplementation::~TCImplementation() { delete suffixes; delete suffixDocId; delete Doc; + delete textStorage; } void TCImplementation::makewavelet(uchar *bwt) @@ -1036,6 +1042,8 @@ void TCImplementation::maketables() p=bwtEndPos; textId = numberOfTexts; + TextStorageBuilder tsbuilder(n); + /** * Second pass: populate tables suffixes and suffixDocId. */ @@ -1052,6 +1060,8 @@ void TCImplementation::maketables() } uchar c = alphabetrank->access(p, alphabetrank_i_tmp); + tsbuilder[i] = c; + if (c == '\0') { --textId; @@ -1062,9 +1072,10 @@ void TCImplementation::maketables() p = C[c]+alphabetrank_i_tmp-1; } assert(textId == 0); - delete textStartPos; + textStorage = tsbuilder.InitTextStorage(); + #ifdef DEBUG_MEMUSAGE std::cerr << "max heap usage before Doc: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; HeapProfiler::ResetMaxHeapConsumption();