X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=TextCollectionBuilder.cpp;h=1d49f397ec4e52b74d6c32b9ae304daddd711cc2;hb=6e35318fa5b3d5630aa8e5c8ac019d62a47b8948;hp=67657bf4927e957c07d78f0709f0fb11f0b231f4;hpb=40ddf9aca842bdc081b6350a4ebfe36b066c94c9;p=SXSI%2FTextCollection.git diff --git a/TextCollectionBuilder.cpp b/TextCollectionBuilder.cpp index 67657bf..1d49f39 100644 --- a/TextCollectionBuilder.cpp +++ b/TextCollectionBuilder.cpp @@ -23,6 +23,7 @@ struct TCBuilderRep unsigned numberOfTexts; // Length of the longest text ulong maxTextLength; + ulong numberOfSamples; #ifdef TCB_TEST_BWT DynFMI *dynFMI; @@ -32,17 +33,17 @@ struct TCBuilderRep /** * Init text collection * - * See CSA.h for more details. */ -TextCollectionBuilder::TextCollectionBuilder(unsigned samplerate) +TextCollectionBuilder::TextCollectionBuilder(unsigned samplerate, ulong estimatedInputLength) : p_(new struct TCBuilderRep()) { p_->n = 0; p_->samplerate = samplerate; p_->numberOfTexts = 0; + p_->numberOfSamples = 0; - // Current params: 8 bytes, 15 MB, no samples - p_->sa = new CSA::RLCSABuilder(8, 0, 15 * 1024 * 1024); + // Current params: 8 bytes, no samples, buffer size n/10 bytes. + p_->sa = new CSA::RLCSABuilder(8, 0, estimatedInputLength/10); assert(p_->sa->isOk()); #ifdef TCB_TEST_BWT @@ -74,11 +75,11 @@ void TextCollectionBuilder::InsertText(uchar const * text) { p_->n += m; p_->numberOfTexts ++; + p_->numberOfSamples += (m-1)/p_->samplerate; #ifdef TCB_TEST_BWT p_->dynFMI->addText(text, m); #endif - p_->sa->insertSequence((char*)text, m-1, 0); assert(p_->sa->isOk()); } @@ -137,7 +138,8 @@ TextCollection * TextCollectionBuilder::InitTextCollection() for (ulong i = 0; i < p_->n; ++i) if (bwt[i] != bwtTest[i]) { - std::cout << "i = " << i << ", bwt = " << (unsigned)bwt[i] << ", " << (unsigned)bwtTest[i] << std::endl; + std::cout << "i = " << i << ", bwt = " << (unsigned)bwt[i] << ", " + << (unsigned)bwtTest[i] << std::endl; assert(0); } delete [] bwtTest; @@ -145,7 +147,8 @@ TextCollection * TextCollectionBuilder::InitTextCollection() #endif // TCB_TEST_BWT } - TextCollection *result = new TCImplementation(bwt, (ulong)length, p_->samplerate, p_->numberOfTexts, p_->maxTextLength); + TextCollection *result = new TCImplementation(bwt, (ulong)length, + p_->samplerate, p_->numberOfTexts, p_->maxTextLength, p_->numberOfSamples); return result; }