Added RLCSA index option
[SXSI/TextCollection.git] / incbwt / bits / bitvector.cpp
index b2ed324..ea2aba6 100644 (file)
@@ -15,12 +15,12 @@ BitVector::BitVector(std::ifstream& file) :
   file.read((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks));
   file.read((char*)&(this->block_size), sizeof(this->block_size));
 
-  this->array = new usint[this->block_size * this->number_of_blocks];
-  file.read((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint));
-  this->buffer = new FastBitBuffer(this->array, this->block_size);
+  usint* array_buffer = new usint[this->block_size * this->number_of_blocks];
+  file.read((char*)(array_buffer), this->block_size * this->number_of_blocks * sizeof(usint));
+  this->array = array_buffer;
 
   this->integer_bits = length(this->size);
-  this->samples = new FastBitBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
+  this->samples = new ReadBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
 
   this->indexForRank();
   this->indexForSelect();
@@ -34,37 +34,38 @@ BitVector::BitVector(std::FILE * file) :
     std::fread(&(this->number_of_blocks), sizeof(this->number_of_blocks), 1, file);
     std::fread(&(this->block_size), sizeof(this->block_size), 1, file);
 
-  this->array = new usint[this->block_size * this->number_of_blocks];
-  std::fread(this->array, sizeof(usint), this->block_size * this->number_of_blocks, file);
-  this->buffer = new FastBitBuffer(this->array, this->block_size);
+  usint* array_buffer = new usint[this->block_size * this->number_of_blocks];
+  std::fread(array_buffer, sizeof(usint), this->block_size * this->number_of_blocks, file);
+  this->array = array_buffer;
 
   this->integer_bits = length(this->size);
-  this->samples = new FastBitBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
+  this->samples = new ReadBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
 
   this->indexForRank();
   this->indexForSelect();
 }
 
+
 BitVector::BitVector(VectorEncoder& encoder, usint universe_size) :
   size(universe_size), items(encoder.items),
   block_size(encoder.block_size),
   number_of_blocks(encoder.blocks),
   rank_index(0), select_index(0)
 {
-  this->array = new usint[this->block_size * this->number_of_blocks];
-  this->buffer = new FastBitBuffer(this->array, this->block_size);
+  usint* array_buffer = new usint[this->block_size * this->number_of_blocks];
 
   this->integer_bits = length(this->size);
-  this->samples = new FastBitBuffer(2 * (this->number_of_blocks + 1), this->integer_bits);
+  WriteBuffer sample_buffer(2 * (this->number_of_blocks + 1), this->integer_bits);
 
   // Copy & linearize the array.
   usint pos = 0;
   for(std::list<usint*>::iterator iter = encoder.array_blocks.begin(); iter != encoder.array_blocks.end(); iter++)
   {
-    memcpy(this->array + pos, *iter, encoder.superblock_bytes);
+    memcpy(array_buffer + pos, *iter, encoder.superblock_bytes);
     pos += encoder.block_size * encoder.blocks_in_superblock;
   }
-  memcpy(this->array + pos, encoder.array, encoder.current_blocks * encoder.block_size * sizeof(usint));
+  memcpy(array_buffer + pos, encoder.array, encoder.current_blocks * encoder.block_size * sizeof(usint));
+  this->array = array_buffer;
 
   // Compress the samples.
   for(std::list<usint*>::iterator iter = encoder.sample_blocks.begin(); iter != encoder.sample_blocks.end(); iter++)
@@ -72,15 +73,17 @@ BitVector::BitVector(VectorEncoder& encoder, usint universe_size) :
     usint* buf = *iter;
     for(usint i = 0; i < 2 * encoder.samples_in_superblock; i++)
     {
-      this->samples->writeItem(buf[i]);
+      sample_buffer.writeItem(buf[i]);
     }
   }
   for(usint i = 0; i < 2 * encoder.current_samples; i++)
   {
-    this->samples->writeItem(encoder.samples[i]);
+    sample_buffer.writeItem(encoder.samples[i]);
   }
-  this->samples->writeItem(this->items);
-  this->samples->writeItem(this->size);
+  sample_buffer.writeItem(this->items);
+  sample_buffer.writeItem(this->size);
+
+  this->samples = sample_buffer.getReadBuffer();
 
   this->indexForRank();
   this->indexForSelect();
@@ -89,80 +92,49 @@ BitVector::BitVector(VectorEncoder& encoder, usint universe_size) :
 BitVector::~BitVector()
 {
   delete[] this->array;
-  delete   this->buffer;
-  delete   this->samples;
-  delete   this->rank_index;
-  delete   this->select_index;
+  delete this->samples;
+  delete this->rank_index;
+  delete this->select_index;
 }
 
+//--------------------------------------------------------------------------
+
 void
-BitVector::writeTo(std::ofstream& file)
+BitVector::writeTo(std::ofstream& file) const
 {
   file.write((char*)&(this->size), sizeof(this->size));
   file.write((char*)&(this->items), sizeof(this->items));
   file.write((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks));
   file.write((char*)&(this->block_size), sizeof(this->block_size));
   file.write((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint));
-  this->samples->writeBuffer(file, false);
+  this->samples->writeBuffer(file);
 }
-
 void
-BitVector::writeTo(FILE* file)
+BitVector::writeTo(FILE* file) const
 {
     std::fwrite(&(this->size), sizeof(this->size), 1, file);
     std::fwrite(&(this->items), sizeof(this->items), 1, file);
     std::fwrite(&(this->number_of_blocks), sizeof(this->number_of_blocks), 1, file);
     std::fwrite(&(this->block_size), sizeof(this->block_size), 1, file);
     std::fwrite(this->array, sizeof(usint), this->block_size * this->number_of_blocks, file);
-    this->samples->writeBuffer(file, false);
+    this->samples->writeBuffer(file);
 }
 
-
-//--------------------------------------------------------------------------
-
 usint
-BitVector::reportSize()
+BitVector::reportSize() const
 {
-  usint bytes = this->buffer->reportSize();
-  bytes += this->block_size * this->number_of_blocks * sizeof(usint);
+  // We assume the reportSize() of derived classes includes any class variables of BitVector.
+  usint bytes = this->block_size * this->number_of_blocks * sizeof(usint);
   bytes += this->samples->reportSize();
   bytes += this->rank_index->reportSize();
   bytes += this->select_index->reportSize();
   return bytes;
 }
 
-//--------------------------------------------------------------------------
-
-usint
-BitVector::sampleForIndex(usint index)
-{
-  usint low = this->select_index->readItem(index / this->select_rate);
-  usint high = this->select_index->readItem(index / this->select_rate + 1);
-
-  this->samples->goToItem(2 * low + 2);
-  for(; low < high; low++)
-  {
-    if(this->samples->readItem() > index) { return low; }
-    this->samples->skipItem();
-  }
-
-  return low;
-}
-
 usint
-BitVector::sampleForValue(usint value)
+BitVector::getCompressedSize() const
 {
-  usint low = this->rank_index->readItem(value / this->rank_rate);
-  usint high = this->rank_index->readItem(value / this->rank_rate + 1);
-
-  this->samples->goToItem(2 * low + 3);
-  for(; low < high; low++)
-  {
-    if(this->samples->readItem() > value) { return low; }
-    this->samples->skipItem();
-  }
-
-  return low;
+  return this->block_size * this->number_of_blocks * sizeof(usint);
 }
 
 //--------------------------------------------------------------------------
@@ -175,7 +147,7 @@ BitVector::indexForRank()
   usint value_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE;
   this->rank_rate = (this->size + value_samples - 1) / value_samples;
   value_samples = (this->size + this->rank_rate - 1) / this->rank_rate + 1;
-  this->rank_index = new FastBitBuffer(value_samples, length(this->number_of_blocks - 1));
+  WriteBuffer index_buffer(value_samples, length(this->number_of_blocks - 1));
 
   usint current = 0, pointer = 0;
   this->samples->goToItem(2);
@@ -185,12 +157,14 @@ BitVector::indexForRank()
     usint limit = this->samples->readItem();
     while(current < limit)
     {
-      this->rank_index->writeItem(pointer);
+      index_buffer.writeItem(pointer);
       current += this->rank_rate;
     }
     pointer++;
   }
-  this->rank_index->writeItem(this->number_of_blocks - 1);
+  index_buffer.writeItem(this->number_of_blocks - 1);
+
+  this->rank_index = index_buffer.getReadBuffer();
 }
 
 void
@@ -201,7 +175,7 @@ BitVector::indexForSelect()
   usint index_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE;
   this->select_rate = (this->items + index_samples - 1) / index_samples;
   index_samples = (this->items + this->select_rate - 1) / this->select_rate + 1;
-  this->select_index = new FastBitBuffer(index_samples, length(this->number_of_blocks - 1));
+  WriteBuffer index_buffer(index_samples, length(this->number_of_blocks - 1));
 
   usint current = 0, pointer = 0;
   this->samples->goToItem(2);
@@ -211,12 +185,59 @@ BitVector::indexForSelect()
     this->samples->skipItem();
     while(current < limit)
     {
-      this->select_index->writeItem(pointer);
+      index_buffer.writeItem(pointer);
       current += this->select_rate;
     }
     pointer++;
   }
-  this->select_index->writeItem(this->number_of_blocks - 1);
+  index_buffer.writeItem(this->number_of_blocks - 1);
+
+  this->select_index = index_buffer.getReadBuffer();
+}
+
+//--------------------------------------------------------------------------
+
+BitVector::Iterator::Iterator(const BitVector& par) :
+  parent(par),
+  buffer(par.array, par.block_size),
+  samples(*(par.samples))
+{
+}
+
+BitVector::Iterator::~Iterator()
+{
+}
+
+usint
+BitVector::Iterator::sampleForIndex(usint index)
+{
+  usint low = this->parent.select_index->readItemConst(index / this->parent.select_rate);
+  usint high = this->parent.number_of_blocks - 1;
+
+  this->samples.goToItem(2 * low + 2);
+  for(; low < high; low++)
+  {
+    if(this->samples.readItem() > index) { return low; }
+    this->samples.skipItem();
+  }
+
+  return low;
+}
+
+usint
+BitVector::Iterator::sampleForValue(usint value)
+{
+  usint low = this->parent.rank_index->readItemConst(value / this->parent.rank_rate);
+  usint high = this->parent.number_of_blocks - 1;
+
+  this->samples.goToItem(2 * low + 3);
+  for(; low < high; low++)
+  {
+    if(this->samples.readItem() > value) { return low; }
+    this->samples.skipItem();
+  }
+
+  return low;
 }
 
 //--------------------------------------------------------------------------
@@ -235,7 +256,7 @@ VectorEncoder::VectorEncoder(usint block_bytes, usint superblock_size) :
   this->samples_in_superblock = this->superblock_bytes / (2 * sizeof(usint));
   this->current_samples = 0;
 
-  this->buffer = new FastBitBuffer(this->array, this->block_size);
+  this->buffer = new WriteBuffer(this->array, this->block_size);
 }
 
 VectorEncoder::~VectorEncoder()