From 65e613f046f3316769cc66eb27bc891d90ec3dc6 Mon Sep 17 00:00:00 2001 From: nvalimak Date: Thu, 16 Apr 2009 11:45:02 +0000 Subject: [PATCH] Fix for mem usage of Doc git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@320 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- TCImplementation.cpp | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/TCImplementation.cpp b/TCImplementation.cpp index 5a754c7..8112b45 100644 --- a/TCImplementation.cpp +++ b/TCImplementation.cpp @@ -891,7 +891,7 @@ ulong TCImplementation::SearchLessThan(uchar const * pattern, TextPosition m, Te { // printf("i = %lu, c = %c, sp = %lu, ep = %lu\n", i, pattern[i], sp, ep); c = (int)pattern[--i]; - uint result = alphabetrank->rank(c,ep); + uint result = alphabetrank->rankLessThan(c,ep); if (result == ~0u) ep = 0; else @@ -939,19 +939,18 @@ void TCImplementation::makewavelet(uchar *bwt) // delete [] bwt; //alphabetrank = new RLWaveletTree(bwt, n); // Deletes bwt! // std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; -// std::cerr << "max heap usage before WT: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; +// std::cerr << "max heap usage before WT: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; // HeapProfiler::ResetMaxHeapConsumption(); // FIXME remove alphabet_mapper * am = new alphabet_mapper_none(); static_bitsequence_builder * bmb = new static_bitsequence_builder_rrr02(8); // FIXME samplerate? -// static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(16); // FIXME samplerate? wt_coder * wtc = new wt_coder_binary(bwt,n,am); alphabetrank = new static_sequence_wvtree(bwt,n,wtc,bmb,am); delete bmb; bwt = 0; // already deleted -// std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; +// std::cerr << "heap usage after WT: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; // std::cerr << "max heap usage after WT: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; } @@ -973,9 +972,7 @@ void TCImplementation::maketables() // Build up arrays for text length and starting positions // FIXME Temp, remove - //BlockArray* textLength = new BlockArray(numberOfTexts, Tools::CeilLog2(maxTextLength)); BlockArray* textStartPos = new BlockArray(numberOfTexts, Tools::CeilLog2(this->n)); - //(*textLength)[0] = l; (*textStartPos)[0] = 0; // Construct samples @@ -1000,9 +997,8 @@ void TCImplementation::maketables() ulongmax--; uint alphabetrank_i_tmp =0; - //positions: - for (ulong i=n-1;iGetPos(i) + for (ulong i=n-1;iGetPos(i) x=(i==n-1)?0:i+1; if (x % samplerate == 0 && posOfSuccEndmarker - x > samplerate) { @@ -1024,7 +1020,6 @@ void TCImplementation::maketables() // Store text length and text start position: if (textId < (DocId)numberOfTexts - 1) { - //(*textLength)[textId + 1] = posOfSuccEndmarker - x; (*textStartPos)[textId + 1] = x; // x is the position of end-marker. posOfSuccEndmarker = x; } @@ -1032,7 +1027,7 @@ void TCImplementation::maketables() // LF-mapping from '\0' does not work with this (pseudo) BWT (see details from Wolfgang's thesis). p = textId; // Correct LF-mapping to the last char of the previous text. } - else + else // Now c != '\0', do LF-mapping: p = C[c]+alphabetrank_i_tmp-1; } assert(textId == 0); @@ -1060,31 +1055,34 @@ void TCImplementation::maketables() // FIXME Temp, remove delete tmpSuffix; delete positions; -// delete textLength; delete textStartPos; + // std::cerr << "heap usage before Doc: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; + uint *tmp = new uint[numberOfTexts]; // FIXME Silly... -// cout << "Doc: "; for (unsigned i = 0; i < numberOfTexts; ++i) { tmp[i] = ((*endmarkerDocId)[i] + 1) % numberOfTexts; - // cout << tmp[i] << ", "; +// cout << tmp[i] << ","; } -// cout << endl; + cout << endl; delete endmarkerDocId; + alphabet_mapper * am = new alphabet_mapper_none(); static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(16); // FIXME samplerate? - wt_coder * wtc = new wt_coder_binary(tmp, numberOfTexts, am); - Doc = new static_sequence_wvtree(tmp, numberOfTexts, wtc, bmb, am); + Doc = new static_sequence_wvtree_noptrs(tmp, numberOfTexts, bmb, am); delete bmb; delete [] tmp; - /* document_result res = Doc->access(1, 2, 0, 1); - cout << "result: "; +// std::cerr << "heap usage after Doc: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; + + /*document_result res = Doc->access(3, 3, 0, 3); + cout << "Access result: "; for (document_result::iterator it = res.begin(); it != res.end(); ++it) cout << *it << ", "; - cout << endl;*/ + cout << endl; + exit(0);*/ } -- 2.17.1