X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=incbwt%2Fbits%2Fbitvector.cpp;h=ea2aba6163ee03baca5058c057ba9dbd7ff841a8;hb=13e254b7c0ee22dffbc7c3125cee0408f9b375da;hp=b2ed324aee2f65142be6c5fa991272a76bbd670c;hpb=e4b6bdc7cc2a1372e4d4dae50acac55cebcc7e9b;p=SXSI%2FTextCollection.git diff --git a/incbwt/bits/bitvector.cpp b/incbwt/bits/bitvector.cpp index b2ed324..ea2aba6 100644 --- a/incbwt/bits/bitvector.cpp +++ b/incbwt/bits/bitvector.cpp @@ -15,12 +15,12 @@ BitVector::BitVector(std::ifstream& file) : file.read((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks)); file.read((char*)&(this->block_size), sizeof(this->block_size)); - this->array = new usint[this->block_size * this->number_of_blocks]; - file.read((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint)); - this->buffer = new FastBitBuffer(this->array, this->block_size); + usint* array_buffer = new usint[this->block_size * this->number_of_blocks]; + file.read((char*)(array_buffer), this->block_size * this->number_of_blocks * sizeof(usint)); + this->array = array_buffer; this->integer_bits = length(this->size); - this->samples = new FastBitBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits); + this->samples = new ReadBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits); this->indexForRank(); this->indexForSelect(); @@ -34,37 +34,38 @@ BitVector::BitVector(std::FILE * file) : std::fread(&(this->number_of_blocks), sizeof(this->number_of_blocks), 1, file); std::fread(&(this->block_size), sizeof(this->block_size), 1, file); - this->array = new usint[this->block_size * this->number_of_blocks]; - std::fread(this->array, sizeof(usint), this->block_size * this->number_of_blocks, file); - this->buffer = new FastBitBuffer(this->array, this->block_size); + usint* array_buffer = new usint[this->block_size * this->number_of_blocks]; + std::fread(array_buffer, sizeof(usint), this->block_size * this->number_of_blocks, file); + this->array = array_buffer; this->integer_bits = length(this->size); - this->samples = new FastBitBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits); + this->samples = new ReadBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits); this->indexForRank(); this->indexForSelect(); } + BitVector::BitVector(VectorEncoder& encoder, usint universe_size) : size(universe_size), items(encoder.items), block_size(encoder.block_size), number_of_blocks(encoder.blocks), rank_index(0), select_index(0) { - this->array = new usint[this->block_size * this->number_of_blocks]; - this->buffer = new FastBitBuffer(this->array, this->block_size); + usint* array_buffer = new usint[this->block_size * this->number_of_blocks]; this->integer_bits = length(this->size); - this->samples = new FastBitBuffer(2 * (this->number_of_blocks + 1), this->integer_bits); + WriteBuffer sample_buffer(2 * (this->number_of_blocks + 1), this->integer_bits); // Copy & linearize the array. usint pos = 0; for(std::list::iterator iter = encoder.array_blocks.begin(); iter != encoder.array_blocks.end(); iter++) { - memcpy(this->array + pos, *iter, encoder.superblock_bytes); + memcpy(array_buffer + pos, *iter, encoder.superblock_bytes); pos += encoder.block_size * encoder.blocks_in_superblock; } - memcpy(this->array + pos, encoder.array, encoder.current_blocks * encoder.block_size * sizeof(usint)); + memcpy(array_buffer + pos, encoder.array, encoder.current_blocks * encoder.block_size * sizeof(usint)); + this->array = array_buffer; // Compress the samples. for(std::list::iterator iter = encoder.sample_blocks.begin(); iter != encoder.sample_blocks.end(); iter++) @@ -72,15 +73,17 @@ BitVector::BitVector(VectorEncoder& encoder, usint universe_size) : usint* buf = *iter; for(usint i = 0; i < 2 * encoder.samples_in_superblock; i++) { - this->samples->writeItem(buf[i]); + sample_buffer.writeItem(buf[i]); } } for(usint i = 0; i < 2 * encoder.current_samples; i++) { - this->samples->writeItem(encoder.samples[i]); + sample_buffer.writeItem(encoder.samples[i]); } - this->samples->writeItem(this->items); - this->samples->writeItem(this->size); + sample_buffer.writeItem(this->items); + sample_buffer.writeItem(this->size); + + this->samples = sample_buffer.getReadBuffer(); this->indexForRank(); this->indexForSelect(); @@ -89,80 +92,49 @@ BitVector::BitVector(VectorEncoder& encoder, usint universe_size) : BitVector::~BitVector() { delete[] this->array; - delete this->buffer; - delete this->samples; - delete this->rank_index; - delete this->select_index; + delete this->samples; + delete this->rank_index; + delete this->select_index; } +//-------------------------------------------------------------------------- + void -BitVector::writeTo(std::ofstream& file) +BitVector::writeTo(std::ofstream& file) const { file.write((char*)&(this->size), sizeof(this->size)); file.write((char*)&(this->items), sizeof(this->items)); file.write((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks)); file.write((char*)&(this->block_size), sizeof(this->block_size)); file.write((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint)); - this->samples->writeBuffer(file, false); + this->samples->writeBuffer(file); } - void -BitVector::writeTo(FILE* file) +BitVector::writeTo(FILE* file) const { std::fwrite(&(this->size), sizeof(this->size), 1, file); std::fwrite(&(this->items), sizeof(this->items), 1, file); std::fwrite(&(this->number_of_blocks), sizeof(this->number_of_blocks), 1, file); std::fwrite(&(this->block_size), sizeof(this->block_size), 1, file); std::fwrite(this->array, sizeof(usint), this->block_size * this->number_of_blocks, file); - this->samples->writeBuffer(file, false); + this->samples->writeBuffer(file); } - -//-------------------------------------------------------------------------- - usint -BitVector::reportSize() +BitVector::reportSize() const { - usint bytes = this->buffer->reportSize(); - bytes += this->block_size * this->number_of_blocks * sizeof(usint); + // We assume the reportSize() of derived classes includes any class variables of BitVector. + usint bytes = this->block_size * this->number_of_blocks * sizeof(usint); bytes += this->samples->reportSize(); bytes += this->rank_index->reportSize(); bytes += this->select_index->reportSize(); return bytes; } -//-------------------------------------------------------------------------- - -usint -BitVector::sampleForIndex(usint index) -{ - usint low = this->select_index->readItem(index / this->select_rate); - usint high = this->select_index->readItem(index / this->select_rate + 1); - - this->samples->goToItem(2 * low + 2); - for(; low < high; low++) - { - if(this->samples->readItem() > index) { return low; } - this->samples->skipItem(); - } - - return low; -} - usint -BitVector::sampleForValue(usint value) +BitVector::getCompressedSize() const { - usint low = this->rank_index->readItem(value / this->rank_rate); - usint high = this->rank_index->readItem(value / this->rank_rate + 1); - - this->samples->goToItem(2 * low + 3); - for(; low < high; low++) - { - if(this->samples->readItem() > value) { return low; } - this->samples->skipItem(); - } - - return low; + return this->block_size * this->number_of_blocks * sizeof(usint); } //-------------------------------------------------------------------------- @@ -175,7 +147,7 @@ BitVector::indexForRank() usint value_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE; this->rank_rate = (this->size + value_samples - 1) / value_samples; value_samples = (this->size + this->rank_rate - 1) / this->rank_rate + 1; - this->rank_index = new FastBitBuffer(value_samples, length(this->number_of_blocks - 1)); + WriteBuffer index_buffer(value_samples, length(this->number_of_blocks - 1)); usint current = 0, pointer = 0; this->samples->goToItem(2); @@ -185,12 +157,14 @@ BitVector::indexForRank() usint limit = this->samples->readItem(); while(current < limit) { - this->rank_index->writeItem(pointer); + index_buffer.writeItem(pointer); current += this->rank_rate; } pointer++; } - this->rank_index->writeItem(this->number_of_blocks - 1); + index_buffer.writeItem(this->number_of_blocks - 1); + + this->rank_index = index_buffer.getReadBuffer(); } void @@ -201,7 +175,7 @@ BitVector::indexForSelect() usint index_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE; this->select_rate = (this->items + index_samples - 1) / index_samples; index_samples = (this->items + this->select_rate - 1) / this->select_rate + 1; - this->select_index = new FastBitBuffer(index_samples, length(this->number_of_blocks - 1)); + WriteBuffer index_buffer(index_samples, length(this->number_of_blocks - 1)); usint current = 0, pointer = 0; this->samples->goToItem(2); @@ -211,12 +185,59 @@ BitVector::indexForSelect() this->samples->skipItem(); while(current < limit) { - this->select_index->writeItem(pointer); + index_buffer.writeItem(pointer); current += this->select_rate; } pointer++; } - this->select_index->writeItem(this->number_of_blocks - 1); + index_buffer.writeItem(this->number_of_blocks - 1); + + this->select_index = index_buffer.getReadBuffer(); +} + +//-------------------------------------------------------------------------- + +BitVector::Iterator::Iterator(const BitVector& par) : + parent(par), + buffer(par.array, par.block_size), + samples(*(par.samples)) +{ +} + +BitVector::Iterator::~Iterator() +{ +} + +usint +BitVector::Iterator::sampleForIndex(usint index) +{ + usint low = this->parent.select_index->readItemConst(index / this->parent.select_rate); + usint high = this->parent.number_of_blocks - 1; + + this->samples.goToItem(2 * low + 2); + for(; low < high; low++) + { + if(this->samples.readItem() > index) { return low; } + this->samples.skipItem(); + } + + return low; +} + +usint +BitVector::Iterator::sampleForValue(usint value) +{ + usint low = this->parent.rank_index->readItemConst(value / this->parent.rank_rate); + usint high = this->parent.number_of_blocks - 1; + + this->samples.goToItem(2 * low + 3); + for(; low < high; low++) + { + if(this->samples.readItem() > value) { return low; } + this->samples.skipItem(); + } + + return low; } //-------------------------------------------------------------------------- @@ -235,7 +256,7 @@ VectorEncoder::VectorEncoder(usint block_bytes, usint superblock_size) : this->samples_in_superblock = this->superblock_bytes / (2 * sizeof(usint)); this->current_samples = 0; - this->buffer = new FastBitBuffer(this->array, this->block_size); + this->buffer = new WriteBuffer(this->array, this->block_size); } VectorEncoder::~VectorEncoder()