X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;ds=sidebyside;f=incbwt%2Fbits%2Fbitbuffer.h;fp=incbwt%2Fbits%2Fbitbuffer.h;h=cd9e2ef7838244bd031dc6355986382e15567901;hb=13e254b7c0ee22dffbc7c3125cee0408f9b375da;hp=1579622cc9d1cfbcb6ca7a51158823258e23a352;hpb=e4b6bdc7cc2a1372e4d4dae50acac55cebcc7e9b;p=SXSI%2FTextCollection.git diff --git a/incbwt/bits/bitbuffer.h b/incbwt/bits/bitbuffer.h index 1579622..cd9e2ef 100644 --- a/incbwt/bits/bitbuffer.h +++ b/incbwt/bits/bitbuffer.h @@ -3,9 +3,9 @@ #include #include +#include #include #include -#include // defines std::memset, added by Kim #include "../misc/definitions.h" @@ -14,90 +14,152 @@ namespace CSA { -template -class GenericBitBuffer +class ReadBuffer { public: - GenericBitBuffer(usint words); - GenericBitBuffer(usint _items, usint item_size); - GenericBitBuffer(std::ifstream& file, usint words); - GenericBitBuffer(std::ifstream& file, usint _items, usint item_size); - GenericBitBuffer(std::FILE* file, usint _items, usint item_size); - GenericBitBuffer(Data* buffer, usint words); - GenericBitBuffer(Data* buffer, usint _items, usint item_size); - ~GenericBitBuffer(); - - void writeBuffer(std::ofstream& file, bool erase = true); - void writeBuffer(std::FILE * file, bool erase = true); - void readBuffer(std::ifstream& file, usint words, bool erase = true); - void setBuffer(Data* buffer, usint words); - - // We assume we are already using a buffer not owned by this. - inline void moveBuffer(Data* buffer) + ReadBuffer(std::ifstream& file, usint words) : + size(words), + item_bits(1), + items(0), + free_buffer(true) { + usint* buffer = new usint[this->size]; + memset(buffer, 0, this->size * sizeof(usint)); + file.read((char*)buffer, this->size * sizeof(usint)); this->data = buffer; - this->pos = 0; - this->bits = DATA_BITS; - this->current = 0; + this->reset(); } -//-------------------------------------------------------------------------- + ReadBuffer(std::ifstream& file, usint _items, usint item_size) : + item_bits(item_size), + items(_items), + free_buffer(true) + { + this->size = bitsToWords(this->items * this->item_bits); + usint* buffer = new usint[this->size]; + memset(buffer, 0, this->size * sizeof(usint)); + file.read((char*)buffer, this->size * sizeof(usint)); + this->data = buffer; + this->reset(); + } - inline void reset(bool erase) + ReadBuffer(std::FILE* file, usint _items, usint item_size) : + item_bits(item_size), + items(_items), + free_buffer(true) { - this->pos = 0; - this->bits = DATA_BITS; - this->current = 0; - if(erase) - { - memset(this->data, 0, this->size * sizeof(Data)); - } + this->size = bitsToWords(this->items * this->item_bits); + usint* buffer = new usint[this->size]; + memset(buffer, 0, this->size * sizeof(usint)); + fread(buffer, sizeof(usint), this->size, file); + this->data = buffer; + this->reset(); } - inline void skipBits(usint count) + // This version does not delete the data when deleted. + ReadBuffer(const usint* buffer, usint words) : + size(words), + item_bits(1), + items(0), + free_buffer(false) { - if(count < this->bits) - { - this->bits -= count; - return; - } + this->data = buffer; + this->reset(); + } - count -= this->bits; - this->pos += 1 + count / DATA_BITS; - this->bits = DATA_BITS - count % DATA_BITS; + // This version does not delete the data when deleted. + ReadBuffer(const usint* buffer, usint _items, usint item_size) : + item_bits(item_size), + items(_items), + free_buffer(false) + { + this->size = bitsToWords(this->items * this->item_bits); + this->data = buffer; + this->reset(); + } + + // This version does not delete the data when deleted. + ReadBuffer(const ReadBuffer& original) : + data(original.data), + size(original.size), + item_bits(original.item_bits), + items(original.items), + free_buffer(false) + { + this->reset(); } - inline void rewind(usint count) + ~ReadBuffer() { - this->bits += count; - if(this->bits > DATA_BITS) + if(this->free_buffer) { - usint back = (this->bits - 1) / DATA_BITS; - this->pos -= back; - this->bits -= back * DATA_BITS; + delete[] this->data; } } //-------------------------------------------------------------------------- - inline usint bitsLeft() + void claimData() { - return this->bits + DATA_BITS * (this->size - this->pos - 1); + this->free_buffer = true; } - inline void writeBits(usint value, usint count) + void writeBuffer(std::ofstream& file) const + { + file.write((const char*)this->data, this->size * sizeof(usint)); + } + void writeBuffer(std::FILE* file) const { - while(count >= this->bits) + fwrite(this->data, sizeof(usint), this->size, file); + } + + // The buffer will no longer own the data. + void moveBuffer(const usint* buffer) + { + if(this->free_buffer) { - count -= this->bits; - this->data[this->pos] |= GET(LOWER(value, count), this->bits); - this->pos++; this->bits = DATA_BITS; + delete[] this->data; } - if(count > 0) + this->free_buffer = false; + + this->data = buffer; + this->reset(); + } + + usint reportSize() const + { + usint bytes = sizeof(*this); + if(this->free_buffer) { bytes += this->size * sizeof(usint); } + return bytes; + } + +//-------------------------------------------------------------------------- + + inline void reset() + { + this->pos = 0; + this->bits = WORD_BITS; + this->current = 0; + } + + inline void skipBits(usint count) + { + if(count < this->bits) { this->bits -= count; - this->data[this->pos] |= HIGHER(GET(value, count), this->bits); + return; } + + count -= this->bits; + this->pos += 1 + count / WORD_BITS; + this->bits = WORD_BITS - count % WORD_BITS; + } + +//-------------------------------------------------------------------------- + + inline usint bitsLeft() const + { + return this->bits + WORD_BITS * (this->size - this->pos - 1); } // Returns nonzero if bit is 1 @@ -106,7 +168,7 @@ class GenericBitBuffer this->bits--; usint bit = this->data[this->pos] & ((usint)1 << this->bits); - if(this->bits == 0) { this->pos++; this->bits = DATA_BITS; } + if(this->bits == 0) { this->pos++; this->bits = WORD_BITS; } return bit; } @@ -115,11 +177,11 @@ class GenericBitBuffer { usint value = 0; - while(count >= this->bits) + if(count >= this->bits) { count -= this->bits; value |= HIGHER(GET(this->data[this->pos], this->bits), count); - this->pos++; this->bits = DATA_BITS; + this->pos++; this->bits = WORD_BITS; } if(count > 0) { @@ -133,27 +195,52 @@ class GenericBitBuffer //-------------------------------------------------------------------------- /* - These operations work on fixed-size items. + These operations work on 4-bit nibbles. + Do not use these with the bit-level operations. */ - inline usint getItemSize() + inline usint readNibble() { - return this->item_bits; + this->bits -= 4; + usint value = GET(LOWER(this->data[this->pos], this->bits), 4); + + if(this->bits == 0) { this->pos++; this->bits = WORD_BITS; } + + return value; } -/* - inline void setItemSize(usint new_size) + // Nibble code for positive integers. + inline usint readNibbleCode() { - if(new_size == 0) { return; } - this->item_bits = new_size; + usint temp, value = 0, shift = 0; + do + { + temp = this->readNibble(); + value |= (temp & 0x7) << shift; + shift += 3; + } + while((temp & 0x8) == 0); + + return value + 1; + } + +//-------------------------------------------------------------------------- + + /* + These operations work on fixed-size items. No sanity checks are made + for parameter values. + */ + + inline usint getItemSize() const + { + return this->item_bits; } -*/ inline void goToItem(usint item) { usint b = item * this->item_bits; - this->pos = b / DATA_BITS; - this->bits = DATA_BITS - b % DATA_BITS; + this->pos = b / WORD_BITS; + this->bits = WORD_BITS - b % WORD_BITS; this->current = item; } @@ -174,15 +261,33 @@ class GenericBitBuffer return this->readItem(0); } - inline bool hasNextItem() + inline usint readItemConst(usint item) const { - return (this->current < this->items); + usint b = item * this->item_bits; + usint p = b / WORD_BITS; + b = WORD_BITS - b % WORD_BITS; + + usint c = this->item_bits; + usint value = 0; + + if(c >= b) + { + c -= b; + value |= HIGHER(GET(this->data[p], b), c); + p++; b = WORD_BITS; + } + if(c > 0) + { + b -= c; + value |= GET(LOWER(this->data[p], b), c); + } + + return value; } - inline void writeItem(usint item) + inline bool hasNextItem() const { - this->writeBits(item, this->item_bits); - this->current++; + return (this->current < this->items); } inline void skipItem() @@ -197,311 +302,338 @@ class GenericBitBuffer Delta coding for positive integers */ - inline bool canDeltaCode(usint value) + inline usint readDeltaCode() { - return this->deltaCodeLength(value) <= this->bitsLeft(); + usint len = 0; + while(this->readBit() == 0) { len++; } + + usint temp = (((usint)1 << len) | this->readBits(len)) - 1; + temp = ((usint)1 << temp) | this->readBits(temp); + return temp; } - inline usint deltaCodeLength(usint value) +//-------------------------------------------------------------------------- + + private: + const usint* data; + usint size, item_bits, items; + bool free_buffer; + + // Iterator data + usint pos, bits, current; + + inline static usint bitsToWords(usint _bits) { return (_bits + WORD_BITS - 1) / WORD_BITS; } + + // These are not allowed. + ReadBuffer(); + ReadBuffer& operator = (const ReadBuffer&); +}; + + +//-------------------------------------------------------------------------- + + +class WriteBuffer +{ + public: + WriteBuffer(usint words) : + size(words), + item_bits(1), + items(0), + free_buffer(true) { - usint len = length(value); - usint llen = length(len); - return (len + llen + llen - 2); + this->data = new usint[words]; + memset(this->data, 0, this->size * sizeof(usint)); + this->reset(); } - // This version returns false if there is no space left for the encoding. - inline bool writeDeltaCode(usint value) + WriteBuffer(usint _items, usint item_size) : + item_bits(item_size), + items(_items), + free_buffer(true) { - usint len = length(value); - usint llen = length(len); - - if(len + llen + llen - 2 > this->bitsLeft()) { return false; } + this->size = bitsToWords(this->items * this->item_bits); + this->data = new usint[this->size]; + memset(this->data, 0, this->size * sizeof(usint)); + this->reset(); + } - // this->writeBits(0, llen - 1); // Now included in the next writeBits() - this->writeBits(len, llen + llen - 1); - this->writeBits(value, len - 1); - return true; + // This version does not delete the data when deleted. + WriteBuffer(usint* buffer, usint words) : + size(words), + item_bits(1), + items(0), + free_buffer(false) + { + this->data = buffer; + this->reset(); } - // This version assumes the code fits into the buffer. - inline void writeDeltaCodeDirect(usint value) + // This version does not delete the data when deleted. + WriteBuffer(usint* buffer, usint _items, usint item_size) : + item_bits(item_size), + items(_items), + free_buffer(false) { - usint len = length(value); - usint llen = length(len); + this->size = bitsToWords(this->items * this->item_bits); + this->data = buffer; + this->reset(); + } - // this->writeBits(0, llen - 1); // Now included in the next writeBits() - this->writeBits(len, llen + llen - 1); - this->writeBits(value, len - 1); + ~WriteBuffer() + { + if(this->free_buffer) + { + delete[] this->data; + } } - // We assume the code fits into usint: - // 32-bit: value < 2^24 - // 64-bit: value < 2^54 - inline void writeDeltaCodeFast(usint value) +//-------------------------------------------------------------------------- + + // This transfers the ownership of the data to the read buffer. + ReadBuffer* getReadBuffer() { - usint len = length(value); + ReadBuffer* buffer; + if(this->items > 0) + { + buffer = new ReadBuffer(this->data, this->items, this->item_bits); + } + else + { + buffer = new ReadBuffer(this->data, this->size); + } - value ^= ((usint)1 << (len - 1)); - this->writeBits((len << (len - 1)) | value, len + 2 * length(len) - 2); + if(this->free_buffer) + { + buffer->claimData(); + this->free_buffer = false; + } + + return buffer; } - inline usint readDeltaCode() + void writeBuffer(std::ofstream& file) const { - usint len = 0; - while(this->readBit() == 0) { len++; } + file.write((char*)this->data, this->size * sizeof(usint)); + } + void writeBuffer(std::FILE *file) const + { + fwrite(this->data, sizeof(usint), this->size, file); + } - usint temp = (((usint)1 << len) | this->readBits(len)) - 1; - temp = ((usint)1 << temp) | this->readBits(temp); - return temp; + // The buffer will no longer own the data. + void moveBuffer(usint* buffer) + { + if(this->free_buffer) + { + delete[] this->data; + } + this->free_buffer = false; + + this->data = buffer; + this->reset(); + } + + usint reportSize() const + { + usint bytes = sizeof(*this); + if(this->free_buffer) { bytes += this->size * sizeof(usint); } + return bytes; } //-------------------------------------------------------------------------- - /* - Gamma coding for positive integers - */ + inline void reset() + { + this->pos = 0; + this->bits = WORD_BITS; + this->current = 0; + } - inline bool canGammaCode(usint value) + inline void skipBits(usint count) { - return this->gammaCodeLength(value) <= this->bitsLeft(); + if(count < this->bits) + { + this->bits -= count; + return; + } + + count -= this->bits; + this->pos += 1 + count / WORD_BITS; + this->bits = WORD_BITS - count % WORD_BITS; } - inline usint gammaCodeLength(usint value) +//-------------------------------------------------------------------------- + + inline usint bitsLeft() const { - return 2 * length(value) - 1; + return this->bits + WORD_BITS * (this->size - this->pos - 1); } - // This version returns false if there is no space left for the encoding. - inline bool writeGammaCode(usint value) + inline void writeBits(usint value, usint count) { - usint len = length(value); + if(count >= this->bits) + { + count -= this->bits; + this->data[this->pos] |= GET(LOWER(value, count), this->bits); + this->pos++; this->bits = WORD_BITS; + } + if(count > 0) + { + this->bits -= count; + this->data[this->pos] |= HIGHER(GET(value, count), this->bits); + } + } - if(len > this->bitsLeft()) { return false; } +//-------------------------------------------------------------------------- - this->writeBits(0, len - 1); - this->writeBits(value, len); - return true; + /* + These operations work on fixed-size items. + */ + + inline usint getItemSize() const + { + return this->item_bits; } - // This version assumes the code fits into the buffer. - inline void writeGammaCodeDirect(usint value) + inline void goToItem(usint item) { - usint len = length(value); + usint b = item * this->item_bits; + this->pos = b / WORD_BITS; + this->bits = WORD_BITS - b % WORD_BITS; + this->current = item; + } - this->writeBits(0, len - 1); - this->writeBits(value, len); + inline bool hasNextItem() const + { + return (this->current < this->items); } - // We assume the code fits into usint: - // 32-bit: value < 2^16 - // 64-bit: value < 2^32 - inline void writeGammaCodeFast(usint value) + inline void writeItem(usint item) { - this->writeBits(value, this->gammaCodeLength(value)); + this->writeBits(item, this->item_bits); + this->current++; } - inline usint readGammaCode() + inline void skipItem() { - usint len = 1; - while(this->readBit() == 0) { len++; } - return ((usint)1 << len) | this->readBits(len); + this->skipBits(this->item_bits); + this->current++; } //-------------------------------------------------------------------------- - usint reportSize(); + /* + Nibble coding for positive integers. + */ -//-------------------------------------------------------------------------- + inline usint nibbleCodeLength(usint value) const + { + usint b = 0; + value--; - private: - Data* data; - usint size, pos, bits; - usint item_bits, items, current; - bool free_buffer; + do + { + b += 4; + value >>= 3; + } + while(value > 0); - const static usint DATA_BITS = sizeof(Data) * CHAR_BIT; + return b; + } - inline static usint bitsToData(usint _bits) { return (_bits + DATA_BITS - 1) / DATA_BITS; } -}; + // Something breaks very badly if value > 15. + inline void writeNibble(usint value) + { + this->bits -= 4; + this->data[this->pos] |= HIGHER(value, this->bits); + if(this->bits == 0) { this->pos++; this->bits = WORD_BITS; } + } + // It is assumed that there is enough space for the code. + inline void writeNibbleCode(usint value) + { + value--; + while(value > 0x7) + { + this->writeNibble(value & 0x7); + value >>= 3; + } + this->writeNibble(value | 0x8); + } -typedef GenericBitBuffer BitBuffer; -typedef GenericBitBuffer FastBitBuffer; +//-------------------------------------------------------------------------- + /* + Delta coding for positive integers + */ -//-------------------------------------------------------------------------- + inline bool canDeltaCode(usint value) const + { + return this->deltaCodeLength(value) <= this->bitsLeft(); + } + inline usint deltaCodeLength(usint value) const + { + usint len = length(value); + usint llen = length(len); + return (len + llen + llen - 2); + } -template -GenericBitBuffer::GenericBitBuffer(usint words) : - size(words), - pos(0), - bits(DATA_BITS), - item_bits(1), - items(0), - current(0), - free_buffer(true) -{ - this->data = new Data[words]; - memset(this->data, 0, words * sizeof(Data)); -} - -template -GenericBitBuffer::GenericBitBuffer(usint _items, usint item_size) : - pos(0), - bits(DATA_BITS), - item_bits(item_size), - items(_items), - current(0), - free_buffer(true) -{ - this->size = bitsToData(items * item_size); - this->data = new Data[this->size]; - memset(this->data, 0, this->size * sizeof(Data)); -} - -template -GenericBitBuffer::GenericBitBuffer(std::ifstream& file, usint words) : - size(words), - pos(0), - bits(DATA_BITS), - item_bits(1), - items(0), - current(0), - free_buffer(true) -{ - this->data = new Data[words]; - memset(this->data, 0, words * sizeof(Data)); - file.read((char*)this->data, words * sizeof(Data)); -} - -template -GenericBitBuffer::GenericBitBuffer(std::ifstream& file, usint _items, usint item_size) : - size(BITS_TO_WORDS(_items * item_size)), - pos(0), - bits(DATA_BITS), - item_bits(item_size), - items(_items), - current(0), - free_buffer(true) -{ - this->data = new Data[this->size]; - memset(this->data, 0, this->size * sizeof(Data)); - file.read((char*)this->data, this->size * sizeof(Data)); -} - -template -GenericBitBuffer::GenericBitBuffer(std::FILE* file, usint _items, usint item_size) : - size(BITS_TO_WORDS(_items * item_size)), - pos(0), - bits(DATA_BITS), - item_bits(item_size), - items(_items), - current(0), - free_buffer(true) -{ - this->data = new Data[this->size]; - memset(this->data, 0, this->size * sizeof(Data)); - std::fread(this->data, sizeof(Data), this->size, file); -} - -template -GenericBitBuffer::GenericBitBuffer(Data* buffer, usint words) : - size(words), - pos(0), - bits(DATA_BITS), - item_bits(1), - items(0), - current(0), - free_buffer(false) -{ - this->data = buffer; -} - -template -GenericBitBuffer::GenericBitBuffer(Data* buffer, usint _items, usint item_size) : - size(BITS_TO_WORDS(_items * item_size)), - pos(0), - bits(DATA_BITS), - item_bits(item_size), - items(_items), - current(0), - free_buffer(false) -{ - this->data = buffer; -} + // This version returns false if there is no space left for the encoding. + inline bool writeDeltaCode(usint value) + { + usint len = length(value); + usint llen = length(len); -template -GenericBitBuffer::~GenericBitBuffer() -{ - if(this->free_buffer) - { - delete[] this->data; - } -} + if(len + llen + llen - 2 > this->bitsLeft()) { return false; } -//-------------------------------------------------------------------------- + // this->writeBits(0, llen - 1); // Now included in the next writeBits() + this->writeBits(len, llen + llen - 1); + this->writeBits(value, len - 1); + return true; + } -template -void -GenericBitBuffer::writeBuffer(std::ofstream& file, bool erase) -{ - file.write((char*)this->data, this->size * sizeof(Data)); - this->reset(erase); -} + // This version assumes the code fits into the buffer. + inline void writeDeltaCodeDirect(usint value) + { + usint len = length(value); + usint llen = length(len); -template -void -GenericBitBuffer::writeBuffer(std::FILE* file, bool erase) -{ - std::fwrite(this->data, sizeof(Data), this->size, file); - this->reset(erase); -} + // this->writeBits(0, llen - 1); // Now included in the next writeBits() + this->writeBits(len, llen + llen - 1); + this->writeBits(value, len - 1); + } -template -void -GenericBitBuffer::readBuffer(std::ifstream& file, usint words, bool erase) -{ - if(words > this->size || !(this->free_buffer)) - { - if(this->free_buffer) + // We assume the code fits into usint: + // 32-bit: value < 2^24 + // 64-bit: value < 2^54 + inline void writeDeltaCodeFast(usint value) { - delete[] this->data; + usint len = length(value); + + value ^= ((usint)1 << (len - 1)); + this->writeBits((len << (len - 1)) | value, len + 2 * length(len) - 2); } - this->size = words; - this->data = new Data[words]; - this->free_buffer = true; - } - this->reset(erase); - file.read((char*)this->data, words * sizeof(Data)); -} +//-------------------------------------------------------------------------- -template -void -GenericBitBuffer::setBuffer(Data* buffer, usint words) -{ - if(this->free_buffer) - { - delete[] this->data; - this->free_buffer = false; - } + private: + usint* data; + usint size, item_bits, items; + bool free_buffer; - this->data = buffer; - this->size = words; - this->reset(false); -} + // Iterator data + usint pos, bits, current; -//-------------------------------------------------------------------------- + inline static usint bitsToWords(usint _bits) { return (_bits + WORD_BITS - 1) / WORD_BITS; } + + // These are not allowed. + WriteBuffer(); + WriteBuffer(const WriteBuffer&); + WriteBuffer& operator = (const WriteBuffer&); +}; -template -usint -GenericBitBuffer::reportSize() -{ - usint bytes = sizeof(*this); - if(this->free_buffer) { bytes += this->size * sizeof(Data); } - return bytes; -} //--------------------------------------------------------------------------