7 #include "../misc/definitions.h"
16 This class provides the core functionality for encoding a bit vector.
22 const static usint SUPERBLOCK_SIZE = MEGABYTE;
24 // We assume superblock size is divisible by block and sample size.
25 VectorEncoder(usint block_bytes, usint superblock_size = SUPERBLOCK_SIZE);
29 This should be implemented in any inherited class.
31 void setBit(usint value); // Values must be in increasing order.
35 void setFirstBit(usint value);
37 usint size, items, blocks;
38 usint block_size, superblock_bytes;
40 FastBitBuffer* buffer;
42 std::list<usint*> array_blocks;
44 usint blocks_in_superblock, current_blocks;
46 std::list<usint*> sample_blocks;
48 usint samples_in_superblock, current_samples;
53 This class provides the core functionality for a bit vector.
59 const static usint INDEX_RATE = 5;
61 BitVector(std::ifstream& file);
62 BitVector(std::FILE* file);
63 BitVector(VectorEncoder& encoder, usint universe_size);
66 void writeTo(std::ofstream& file);
67 void writeTo(std::FILE* file);
69 //--------------------------------------------------------------------------
72 These should be implemented in any actual bit vector.
74 // rank is not iterator-safe
75 // regular: \sum_{i = 0}^{value} V[i]
76 // at_least: \sum_{i = 0}^{value - 1} V[i] + 1
77 usint rank(usint value, bool at_least = false);
79 usint select(usint index); // \min value: \sum_{i = 0}^{value} V[i] = index + 1
82 pair_type valueAfter(usint value); // (\min i >= value: V[i] = 1, rank(i) - 1)
83 pair_type nextValue();
85 // These versions of select return (value, length_of_run).
86 // max_length is an upper bound for the length of the run returned.
87 // V[value] is not included in the length of the run
88 // These functions are not greedy: the actual length of the run can be more than reported.
89 // This can happen even if max_length was not reached.
90 // length_of_run is actually the number of extra items returned past value
92 pair_type selectRun(usint index, usint max_length);
93 pair_type selectNextRun(usint max_length);
95 // isSet is not iterator-safe
96 bool isSet(usint value); // V[value]
101 return (this->sample.first + cur < this->items - 1);
104 //--------------------------------------------------------------------------
106 inline usint getSize() { return this->size; }
107 inline usint getNumberOfItems() { return this->items; }
108 inline usint getBlockSize() { return this->block_size; }
110 // This returns only the sizes of dynamically allocated structures.
116 FastBitBuffer* buffer;
119 usint number_of_blocks;
122 Each sample is (rank(i) - 1, i) where V[i] = 1.
123 Number of samples is number_of_blocks + 1.
125 FastBitBuffer* samples;
128 FastBitBuffer* rank_index;
131 FastBitBuffer* select_index;
134 // Iterator data. These are enough for a gap-encoded vector.
137 usint cur, val; // cur == 0 is the sample
141 These functions return the sample corresponding to the block the given
142 index/value might be found in. Parameters are assumed to be valid.
144 usint sampleForIndex(usint index);
145 usint sampleForValue(usint value);
147 inline void getSample(usint sample_number)
149 this->block = sample_number;
150 this->samples->goToItem(2 * sample_number);
151 this->sample.first = this->samples->readItem();
152 this->sample.second = this->samples->readItem();
154 this->val = this->sample.second;
155 this->block_items = this->samples->readItem() - this->sample.first - 1;
156 this->buffer->moveBuffer(this->array + (this->block * this->block_size));
160 These functions build a higher level index for faster rank/select queries.
161 The index consists of about (number of samples) / INDEX_RATE pointers.
162 The bit vector cannot be used without the index.
165 void indexForSelect();
172 #endif // BITVECTOR_H