X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=incbwt%2Fbits%2Fbitvector.h;fp=incbwt%2Fbits%2Fbitvector.h;h=e931b0a2ff5b4bb343a3009a10cf4c2f54bea658;hb=13e254b7c0ee22dffbc7c3125cee0408f9b375da;hp=486aefbed883996a607220360e23f090e9fa5cba;hpb=e4b6bdc7cc2a1372e4d4dae50acac55cebcc7e9b;p=SXSI%2FTextCollection.git diff --git a/incbwt/bits/bitvector.h b/incbwt/bits/bitvector.h index 486aefb..e931b0a 100644 --- a/incbwt/bits/bitvector.h +++ b/incbwt/bits/bitvector.h @@ -29,6 +29,7 @@ class VectorEncoder This should be implemented in any inherited class. void setBit(usint value); // Values must be in increasing order. + void setRun(usint start, usint len); */ void addNewBlock(); @@ -37,7 +38,7 @@ class VectorEncoder usint size, items, blocks; usint block_size, superblock_bytes; - FastBitBuffer* buffer; + WriteBuffer* buffer; std::list array_blocks; usint* array; @@ -46,6 +47,13 @@ class VectorEncoder std::list sample_blocks; usint* samples; usint samples_in_superblock, current_samples; + + protected: + + // These are not allowed. + VectorEncoder(); + VectorEncoder(const VectorEncoder&); + VectorEncoder& operator = (const VectorEncoder&); }; @@ -59,19 +67,76 @@ class BitVector const static usint INDEX_RATE = 5; BitVector(std::ifstream& file); - BitVector(std::FILE* file); + BitVector(std::FILE * file); BitVector(VectorEncoder& encoder, usint universe_size); ~BitVector(); - void writeTo(std::ofstream& file); - void writeTo(std::FILE* file); +//-------------------------------------------------------------------------- + + void writeTo(std::ofstream& file) const; + void writeTo(std::FILE *file) const; + + inline usint getSize() const { return this->size; } + inline usint getNumberOfItems() const { return this->items; } + inline usint getBlockSize() const { return this->block_size; } + + // This returns only the sizes of the dynamically allocated structures. + usint reportSize() const; + + usint getCompressedSize() const; //-------------------------------------------------------------------------- + class Iterator + { + public: + Iterator(const BitVector& par); + ~Iterator(); + + inline bool hasNext() const + { + return (this->sample.first + this->cur < this->parent.items - 1); + } + + protected: + const BitVector& parent; + + usint block; + pair_type sample; + usint cur, val, run; // cur == 0 is the sample + usint block_items; + + ReadBuffer buffer, samples; + + /* + These functions return the sample corresponding to the block the given + index/value might be found in. Parameters are assumed to be valid. + */ + usint sampleForIndex(usint index); + usint sampleForValue(usint value); + + inline void getSample(usint sample_number) + { + this->block = sample_number; + this->samples.goToItem(2 * sample_number); + this->sample.first = this->samples.readItem(); + this->sample.second = this->samples.readItem(); + this->cur = 0; + this->val = this->sample.second; + this->block_items = this->samples.readItem() - this->sample.first - 1; + this->buffer.moveBuffer(this->parent.array + (this->block * this->parent.block_size)); + } + + // These are not allowed. + Iterator(); + Iterator(const Iterator&); + Iterator& operator = (const Iterator&); + }; + /* - These should be implemented in any actual bit vector. + These should be implemented in any actual iterator. - // rank is not iterator-safe + // rank invalidates the iterator // regular: \sum_{i = 0}^{value} V[i] // at_least: \sum_{i = 0}^{value - 1} V[i] + 1 usint rank(usint value, bool at_least = false); @@ -92,69 +157,31 @@ class BitVector pair_type selectRun(usint index, usint max_length); pair_type selectNextRun(usint max_length); - // isSet is not iterator-safe + // isSet invalidates the iterator bool isSet(usint value); // V[value] */ - inline bool hasNext() - { - return (this->sample.first + cur < this->items - 1); - } - //-------------------------------------------------------------------------- - inline usint getSize() { return this->size; } - inline usint getNumberOfItems() { return this->items; } - inline usint getBlockSize() { return this->block_size; } - - // This returns only the sizes of dynamically allocated structures. - usint reportSize(); - protected: usint size, items; - FastBitBuffer* buffer; - usint* array; - usint block_size; - usint number_of_blocks; + const usint* array; + usint block_size; + usint number_of_blocks; /* Each sample is (rank(i) - 1, i) where V[i] = 1. Number of samples is number_of_blocks + 1. */ - FastBitBuffer* samples; - usint integer_bits; - - FastBitBuffer* rank_index; - usint rank_rate; - - FastBitBuffer* select_index; - usint select_rate; + ReadBuffer* samples; + usint integer_bits; - // Iterator data. These are enough for a gap-encoded vector. - usint block; - pair_type sample; - usint cur, val; // cur == 0 is the sample - usint block_items; + ReadBuffer* rank_index; + usint rank_rate; - /* - These functions return the sample corresponding to the block the given - index/value might be found in. Parameters are assumed to be valid. - */ - usint sampleForIndex(usint index); - usint sampleForValue(usint value); - - inline void getSample(usint sample_number) - { - this->block = sample_number; - this->samples->goToItem(2 * sample_number); - this->sample.first = this->samples->readItem(); - this->sample.second = this->samples->readItem(); - this->cur = 0; - this->val = this->sample.second; - this->block_items = this->samples->readItem() - this->sample.first - 1; - this->buffer->moveBuffer(this->array + (this->block * this->block_size)); - } + ReadBuffer* select_index; + usint select_rate; /* These functions build a higher level index for faster rank/select queries. @@ -163,6 +190,11 @@ class BitVector */ void indexForRank(); void indexForSelect(); + + // These are not allowed. + BitVector(); + BitVector(const BitVector&); + BitVector& operator = (const BitVector&); };