X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=incbwt%2Fbits%2Fbitvector.cpp;fp=incbwt%2Fbits%2Fbitvector.cpp;h=b76fa9d1b82cd0dcf00dd4d65e0d4dc7faa9587a;hb=40ddf9aca842bdc081b6350a4ebfe36b066c94c9;hp=0000000000000000000000000000000000000000;hpb=af8938dbee21244687184dd0502a84ce1af70c50;p=SXSI%2FTextCollection.git diff --git a/incbwt/bits/bitvector.cpp b/incbwt/bits/bitvector.cpp new file mode 100644 index 0000000..b76fa9d --- /dev/null +++ b/incbwt/bits/bitvector.cpp @@ -0,0 +1,270 @@ +#include + +#include "bitvector.h" + + +namespace CSA +{ + + +BitVector::BitVector(std::ifstream& file) : + rank_index(0), select_index(0) +{ + file.read((char*)&(this->size), sizeof(this->size)); + file.read((char*)&(this->items), sizeof(this->items)); + file.read((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks)); + file.read((char*)&(this->block_size), sizeof(this->block_size)); + + this->array = new usint[this->block_size * this->number_of_blocks]; + file.read((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint)); + this->buffer = new FastBitBuffer(this->array, this->block_size); + + this->integer_bits = length(this->size); + this->samples = new FastBitBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits); + + this->indexForRank(); + this->indexForSelect(); +} + +BitVector::BitVector(VectorEncoder& encoder, usint universe_size) : + size(universe_size), items(encoder.items), + block_size(encoder.block_size), + number_of_blocks(encoder.blocks), + rank_index(0), select_index(0) +{ + this->array = new usint[this->block_size * this->number_of_blocks]; + this->buffer = new FastBitBuffer(this->array, this->block_size); + + this->integer_bits = length(this->size); + this->samples = new FastBitBuffer(2 * (this->number_of_blocks + 1), this->integer_bits); + + // Copy & linearize the array. + usint pos = 0; + for(std::list::iterator iter = encoder.array_blocks.begin(); iter != encoder.array_blocks.end(); iter++) + { + memcpy(this->array + pos, *iter, encoder.superblock_bytes); + pos += encoder.block_size * encoder.blocks_in_superblock; + } + memcpy(this->array + pos, encoder.array, encoder.current_blocks * encoder.block_size * sizeof(usint)); + + // Compress the samples. + for(std::list::iterator iter = encoder.sample_blocks.begin(); iter != encoder.sample_blocks.end(); iter++) + { + usint* buf = *iter; + for(usint i = 0; i < 2 * encoder.samples_in_superblock; i++) + { + this->samples->writeItem(buf[i]); + } + } + for(usint i = 0; i < 2 * encoder.current_samples; i++) + { + this->samples->writeItem(encoder.samples[i]); + } + this->samples->writeItem(this->items); + this->samples->writeItem(this->size); + + this->indexForRank(); + this->indexForSelect(); +} + +BitVector::~BitVector() +{ + delete[] this->array; + delete this->buffer; + delete this->samples; + delete this->rank_index; + delete this->select_index; +} + +void +BitVector::writeTo(std::ofstream& file) +{ + file.write((char*)&(this->size), sizeof(this->size)); + file.write((char*)&(this->items), sizeof(this->items)); + file.write((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks)); + file.write((char*)&(this->block_size), sizeof(this->block_size)); + file.write((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint)); + this->samples->writeBuffer(file, false); +} + +//-------------------------------------------------------------------------- + +usint +BitVector::reportSize() +{ + usint bytes = this->buffer->reportSize(); + bytes += this->block_size * this->number_of_blocks * sizeof(usint); + bytes += this->samples->reportSize(); + bytes += this->rank_index->reportSize(); + bytes += this->select_index->reportSize(); + return bytes; +} + +//-------------------------------------------------------------------------- + +usint +BitVector::sampleForIndex(usint index) +{ + usint low = this->select_index->readItem(index / this->select_rate); + usint high = this->select_index->readItem(index / this->select_rate + 1); + + this->samples->goToItem(2 * low + 2); + for(; low < high; low++) + { + if(this->samples->readItem() > index) { return low; } + this->samples->skipItem(); + } + + return low; +} + +usint +BitVector::sampleForValue(usint value) +{ + usint low = this->rank_index->readItem(value / this->rank_rate); + usint high = this->rank_index->readItem(value / this->rank_rate + 1); + + this->samples->goToItem(2 * low + 3); + for(; low < high; low++) + { + if(this->samples->readItem() > value) { return low; } + this->samples->skipItem(); + } + + return low; +} + +//-------------------------------------------------------------------------- + +void +BitVector::indexForRank() +{ + delete this->rank_index; + + usint value_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE; + this->rank_rate = (this->size + value_samples - 1) / value_samples; + value_samples = (this->size + this->rank_rate - 1) / this->rank_rate + 1; + this->rank_index = new FastBitBuffer(value_samples, length(this->number_of_blocks - 1)); + + usint current = 0, pointer = 0; + this->samples->goToItem(2); + while(this->samples->hasNextItem()) + { + this->samples->skipItem(); + usint limit = this->samples->readItem(); + while(current < limit) + { + this->rank_index->writeItem(pointer); + current += this->rank_rate; + } + pointer++; + } + this->rank_index->writeItem(this->number_of_blocks - 1); +} + +void +BitVector::indexForSelect() +{ + delete this->select_index; + + usint index_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE; + this->select_rate = (this->items + index_samples - 1) / index_samples; + index_samples = (this->items + this->select_rate - 1) / this->select_rate + 1; + this->select_index = new FastBitBuffer(index_samples, length(this->number_of_blocks - 1)); + + usint current = 0, pointer = 0; + this->samples->goToItem(2); + while(this->samples->hasNextItem()) + { + usint limit = this->samples->readItem(); + this->samples->skipItem(); + while(current < limit) + { + this->select_index->writeItem(pointer); + current += this->select_rate; + } + pointer++; + } + this->select_index->writeItem(this->number_of_blocks - 1); +} + +//-------------------------------------------------------------------------- + +VectorEncoder::VectorEncoder(usint block_bytes, usint superblock_size) : + size(0), items(0), blocks(0), + block_size(BYTES_TO_WORDS(block_bytes)), + superblock_bytes(superblock_size) +{ + this->array = new usint[this->superblock_bytes / sizeof(usint)]; + memset(this->array, 0, this->superblock_bytes); + this->blocks_in_superblock = this->superblock_bytes / (sizeof(usint) * this->block_size); + this->current_blocks = 0; + + this->samples = new usint[this->superblock_bytes / sizeof(usint)]; + this->samples_in_superblock = this->superblock_bytes / (2 * sizeof(usint)); + this->current_samples = 0; + + this->buffer = new FastBitBuffer(this->array, this->block_size); +} + +VectorEncoder::~VectorEncoder() +{ + delete[] this->array; + + delete this->buffer; + for(std::list::iterator iter = this->array_blocks.begin(); iter != this->array_blocks.end(); iter++) + { + delete[] *iter; + } + + delete[] this->samples; + for(std::list::iterator iter = this->sample_blocks.begin(); iter != this->sample_blocks.end(); iter++) + { + delete[] *iter; + } +} + +void +VectorEncoder::addNewBlock() +{ + this->blocks++; + this->current_blocks++; + this->current_samples++; + + // Do we need a new superblock for the block? + if(this->current_blocks > this->blocks_in_superblock) + { + this->array_blocks.push_back(this->array); + this->array = new usint[this->superblock_bytes / sizeof(usint)]; + memset(this->array, 0, this->superblock_bytes); + this->current_blocks = 1; + } + this->buffer->moveBuffer(this->array + (this->block_size * (this->current_blocks - 1))); + + // Do we need a new superblock for the sample? + if(this->current_samples > this->samples_in_superblock) + { + this->sample_blocks.push_back(this->samples); + this->samples = new usint[this->superblock_bytes / sizeof(usint)]; + this->current_samples = 1; + } + this->samples[2 * this->current_samples - 2] = this->items - 1; + this->samples[2 * this->current_samples - 1] = this->size - 1; +} + +void +VectorEncoder::setFirstBit(usint value) +{ + this->samples[0] = 0; + this->samples[1] = value; + + this->size = value + 1; + this->items = 1; + this->blocks = 1; + + this->current_blocks = 1; + this->current_samples = 1; +} + + +} // namespace CSA