Fix for mem usage of Doc

author nvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>

Thu, 16 Apr 2009 11:45:02 +0000 (11:45 +0000)

committer nvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>

Thu, 16 Apr 2009 11:45:02 +0000 (11:45 +0000)
author nvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Thu, 16 Apr 2009 11:45:02 +0000 (11:45 +0000)
committer nvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Thu, 16 Apr 2009 11:45:02 +0000 (11:45 +0000)
diff --git a/TCImplementation.cpp b/TCImplementation.cpp

index 5a754c7..8112b45 100644 (file)
--- a/TCImplementation.cpp
+++ b/TCImplementation.cpp
@@ -891,7 +891,7 @@ ulong TCImplementation::SearchLessThan(uchar const * pattern, TextPosition m, Te
      {
  //         printf("i = %lu, c = %c, sp = %lu, ep = %lu\n", i, pattern[i], sp, ep);
          c = (int)pattern[--i];
-        uint result = alphabetrank->rank(c,ep);
+        uint result = alphabetrank->rankLessThan(c,ep);
          if (result == ~0u)
              ep = 0;
          else
@@ -939,19 +939,18 @@ void TCImplementation::makewavelet(uchar *bwt)
  //    delete [] bwt;
      //alphabetrank = new RLWaveletTree(bwt, n); // Deletes bwt!
  //  std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
-//    std::cerr << "max heap usage before WT: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
  
+//    std::cerr << "max heap usage before WT: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
  //    HeapProfiler::ResetMaxHeapConsumption(); // FIXME remove
  
      alphabet_mapper * am = new alphabet_mapper_none();
      static_bitsequence_builder * bmb = new static_bitsequence_builder_rrr02(8); // FIXME samplerate?
-//    static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(16); // FIXME samplerate?
      wt_coder * wtc = new wt_coder_binary(bwt,n,am);
      alphabetrank = new static_sequence_wvtree(bwt,n,wtc,bmb,am);
      delete bmb;
      bwt = 0; // already deleted
     
-//    std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
+//    std::cerr << "heap usage after WT: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
  //    std::cerr << "max heap usage after WT: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
  }
  
@@ -973,9 +972,7 @@ void TCImplementation::maketables()
  
      // Build up arrays for text length and starting positions
      // FIXME Temp, remove
-    //BlockArray* textLength = new BlockArray(numberOfTexts, Tools::CeilLog2(maxTextLength));
      BlockArray* textStartPos = new BlockArray(numberOfTexts, Tools::CeilLog2(this->n));
-    //(*textLength)[0] = l;
      (*textStartPos)[0] = 0; 
  
      // Construct samples
@@ -1000,9 +997,8 @@ void TCImplementation::maketables()
      ulongmax--;
      uint alphabetrank_i_tmp =0;
  
-    //positions:
-    for (ulong i=n-1;i<ulongmax;i--) { // TODO bad solution with ulongmax?
-      // i substitutes SA->GetPos(i)
+    for (ulong i=n-1;i<ulongmax;i--) {
+        // i substitutes SA->GetPos(i)
          x=(i==n-1)?0:i+1;
  
          if (x % samplerate == 0 && posOfSuccEndmarker - x > samplerate) {
@@ -1024,7 +1020,6 @@ void TCImplementation::maketables()
              // Store text length and text start position:
              if (textId < (DocId)numberOfTexts - 1)
              {
-                //(*textLength)[textId + 1] = posOfSuccEndmarker - x;
                  (*textStartPos)[textId + 1] = x;  // x is the position of end-marker.
                  posOfSuccEndmarker = x;
              }
@@ -1032,7 +1027,7 @@ void TCImplementation::maketables()
              // LF-mapping from '\0' does not work with this (pseudo) BWT (see details from Wolfgang's thesis).
              p = textId; // Correct LF-mapping to the last char of the previous text.
          }
-        else
+        else // Now c != '\0', do LF-mapping:
              p = C[c]+alphabetrank_i_tmp-1;
      }
      assert(textId == 0);
@@ -1060,31 +1055,34 @@ void TCImplementation::maketables()
      // FIXME Temp, remove
      delete tmpSuffix;
      delete positions;
-//    delete textLength;
      delete textStartPos;
  
+    //  std::cerr << "heap usage before Doc: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
+
  
      uint *tmp = new uint[numberOfTexts]; // FIXME Silly...
-//    cout << "Doc: ";
      for (unsigned i = 0; i < numberOfTexts; ++i)
      {
          tmp[i] = ((*endmarkerDocId)[i] + 1) % numberOfTexts;
-        //      cout << tmp[i] << ", ";
+//        cout << tmp[i] << ",";
      }
-//    cout << endl;
+    cout << endl;
      delete endmarkerDocId;
+
      alphabet_mapper * am = new alphabet_mapper_none();
      static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(16); // FIXME samplerate?
-    wt_coder * wtc = new wt_coder_binary(tmp, numberOfTexts, am);
-    Doc = new static_sequence_wvtree(tmp, numberOfTexts, wtc, bmb, am);
+    Doc = new static_sequence_wvtree_noptrs(tmp, numberOfTexts, bmb, am);
      delete bmb;
      delete [] tmp;
  
-    /*    document_result res = Doc->access(1, 2, 0, 1);
-    cout << "result: ";
+//    std::cerr << "heap usage after Doc: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
+
+    /*document_result res = Doc->access(3, 3, 0, 3);
+    cout << "Access result: ";
      for (document_result::iterator it = res.begin(); it != res.end(); ++it)
          cout << *it << ", ";
-        cout << endl;*/
+        cout << endl;
+        exit(0);*/
  }
author	nvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
	Thu, 16 Apr 2009 11:45:02 +0000 (11:45 +0000)
committer	nvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
	Thu, 16 Apr 2009 11:45:02 +0000 (11:45 +0000)