X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=incbwt%2Fbits%2Frlevector.cpp;h=a9685c6c62c4104a82e999f12902d45de408eb4f;hb=13e254b7c0ee22dffbc7c3125cee0408f9b375da;hp=e07c2b41549245692881d2713afeb54314a3c8e5;hpb=e4b6bdc7cc2a1372e4d4dae50acac55cebcc7e9b;p=SXSI%2FTextCollection.git diff --git a/incbwt/bits/rlevector.cpp b/incbwt/bits/rlevector.cpp index e07c2b4..a9685c6 100644 --- a/incbwt/bits/rlevector.cpp +++ b/incbwt/bits/rlevector.cpp @@ -13,7 +13,7 @@ RLEVector::RLEVector(std::ifstream& file) : { } -RLEVector::RLEVector(RLEEncoder& encoder, usint universe_size) : +RLEVector::RLEVector(Encoder& encoder, usint universe_size) : BitVector(encoder, universe_size) { } @@ -25,9 +25,30 @@ RLEVector::~RLEVector() //-------------------------------------------------------------------------- usint -RLEVector::rank(usint value, bool at_least) +RLEVector::reportSize() const { - if(value >= this->size) { return this->items; } + usint bytes = sizeof(*this); + bytes += BitVector::reportSize(); + return bytes; +} + +//-------------------------------------------------------------------------- + +RLEVector::Iterator::Iterator(const RLEVector& par) : + BitVector::Iterator(par) +{ +} + +RLEVector::Iterator::~Iterator() +{ +} + +usint +RLEVector::Iterator::rank(usint value, bool at_least) +{ + const RLEVector& par = (const RLEVector&)(this->parent); + + if(value >= par.size) { return par.items; } this->valueLoop(value); @@ -46,17 +67,19 @@ RLEVector::rank(usint value, bool at_least) } usint -RLEVector::select(usint index) +RLEVector::Iterator::select(usint index) { - if(index >= this->items) { return this->size; } + const RLEVector& par = (const RLEVector&)(this->parent); + + if(index >= par.items) { return par.size; } this->getSample(this->sampleForIndex(index)); this->run = 0; usint lim = index - this->sample.first; while(this->cur < lim) { - this->val += this->buffer->readDeltaCode(); - usint temp = this->buffer->readDeltaCode(); + this->val += this->buffer.readDeltaCode(); + usint temp = this->buffer.readDeltaCode(); this->val += temp - 1; this->cur += temp; } @@ -71,7 +94,7 @@ RLEVector::select(usint index) } usint -RLEVector::selectNext() +RLEVector::Iterator::selectNext() { if(this->cur >= this->block_items) { @@ -88,17 +111,19 @@ RLEVector::selectNext() } else { - this->val += this->buffer->readDeltaCode(); - this->run = this->buffer->readDeltaCode() - 1; + this->val += this->buffer.readDeltaCode(); + this->run = this->buffer.readDeltaCode() - 1; } return this->val; } pair_type -RLEVector::valueAfter(usint value) +RLEVector::Iterator::valueAfter(usint value) { - if(value >= this->size) { return pair_type(this->size, this->items); } + const RLEVector& par = (const RLEVector&)(this->parent); + + if(value >= par.size) { return pair_type(par.size, par.items); } this->valueLoop(value); @@ -112,7 +137,7 @@ RLEVector::valueAfter(usint value) } pair_type -RLEVector::nextValue() +RLEVector::Iterator::nextValue() { if(this->cur >= this->block_items) { @@ -129,15 +154,15 @@ RLEVector::nextValue() } else { - this->val += this->buffer->readDeltaCode(); - this->run = this->buffer->readDeltaCode() - 1; + this->val += this->buffer.readDeltaCode(); + this->run = this->buffer.readDeltaCode() - 1; } return pair_type(this->val, this->sample.first + this->cur); } pair_type -RLEVector::selectRun(usint index, usint max_length) +RLEVector::Iterator::selectRun(usint index, usint max_length) { usint value = this->select(index); @@ -148,7 +173,7 @@ RLEVector::selectRun(usint index, usint max_length) } pair_type -RLEVector::selectNextRun(usint max_length) +RLEVector::Iterator::selectNextRun(usint max_length) { usint value = this->selectNext(); @@ -159,29 +184,43 @@ RLEVector::selectNextRun(usint max_length) } bool -RLEVector::isSet(usint value) +RLEVector::Iterator::isSet(usint value) { - if(value >= this->size) { return false; } + const RLEVector& par = (const RLEVector&)(this->parent); + + if(value >= par.size) { return false; } this->valueLoop(value); return (this->val == value); } -//-------------------------------------------------------------------------- - usint -RLEVector::reportSize() +RLEVector::Iterator::countRuns() { - usint bytes = sizeof(*this); - bytes += BitVector::reportSize(); - return bytes; + const RLEVector& par = (const RLEVector&)(this->parent); + + if(par.items == 0) { return 0; } + + usint runs = 1; + pair_type res = this->selectRun(0, par.items); + usint last = res.first + res.second; + + while(last < par.size) + { + res = this->selectNextRun(par.items); + if(res.first < par.size && res.first > last + 1) { runs++; } + last = res.first + res.second; + } + + return runs; } //-------------------------------------------------------------------------- RLEEncoder::RLEEncoder(usint block_bytes, usint superblock_size) : - VectorEncoder(block_bytes, superblock_size) + VectorEncoder(block_bytes, superblock_size), + run(EMPTY_PAIR) { } @@ -189,6 +228,12 @@ RLEEncoder::~RLEEncoder() { } +void +RLEEncoder::setBit(usint value) +{ + this->setRun(value, 1); +} + void RLEEncoder::setRun(usint start, usint len) { @@ -243,5 +288,36 @@ RLEEncoder::setRun(usint start, usint len) } } +void +RLEEncoder::addBit(usint value) +{ + this->addRun(value, 1); +} + +void +RLEEncoder::addRun(usint start, usint len) +{ + if(this->run.second == 0) + { + this->run = pair_type(start, len); + } + else if(start == this->run.first + this->run.second) + { + this->run.second += len; + } + else + { + this->setRun(this->run.first, this->run.second); + this->run = pair_type(start, len); + } +} + +void +RLEEncoder::flush() +{ + this->setRun(this->run.first, this->run.second); + this->run.second = 0; +} + } // namespace CSA