7 #include "../misc/definitions.h"
16 This class provides the core functionality for encoding a bit vector.
22 const static usint SUPERBLOCK_SIZE = MEGABYTE;
24 // We assume superblock size is divisible by block and sample size.
25 VectorEncoder(usint block_bytes, usint superblock_size = SUPERBLOCK_SIZE);
29 This should be implemented in any inherited class.
31 void setBit(usint value); // Values must be in increasing order.
32 void setRun(usint start, usint len);
36 void setFirstBit(usint value);
38 usint size, items, blocks;
39 usint block_size, superblock_bytes;
43 std::list<usint*> array_blocks;
45 usint blocks_in_superblock, current_blocks;
47 std::list<usint*> sample_blocks;
49 usint samples_in_superblock, current_samples;
53 // These are not allowed.
55 VectorEncoder(const VectorEncoder&);
56 VectorEncoder& operator = (const VectorEncoder&);
61 This class provides the core functionality for a bit vector.
67 const static usint INDEX_RATE = 5;
69 BitVector(std::ifstream& file);
70 BitVector(std::FILE * file);
71 BitVector(VectorEncoder& encoder, usint universe_size);
74 //--------------------------------------------------------------------------
76 void writeTo(std::ofstream& file) const;
77 void writeTo(std::FILE *file) const;
79 inline usint getSize() const { return this->size; }
80 inline usint getNumberOfItems() const { return this->items; }
81 inline usint getBlockSize() const { return this->block_size; }
83 // This returns only the sizes of the dynamically allocated structures.
84 usint reportSize() const;
86 usint getCompressedSize() const;
88 //--------------------------------------------------------------------------
93 Iterator(const BitVector& par);
96 inline bool hasNext() const
98 return (this->sample.first + this->cur < this->parent.items - 1);
102 const BitVector& parent;
106 usint cur, val, run; // cur == 0 is the sample
109 ReadBuffer buffer, samples;
112 These functions return the sample corresponding to the block the given
113 index/value might be found in. Parameters are assumed to be valid.
115 usint sampleForIndex(usint index);
116 usint sampleForValue(usint value);
118 inline void getSample(usint sample_number)
120 this->block = sample_number;
121 this->samples.goToItem(2 * sample_number);
122 this->sample.first = this->samples.readItem();
123 this->sample.second = this->samples.readItem();
125 this->val = this->sample.second;
126 this->block_items = this->samples.readItem() - this->sample.first - 1;
127 this->buffer.moveBuffer(this->parent.array + (this->block * this->parent.block_size));
130 // These are not allowed.
132 Iterator(const Iterator&);
133 Iterator& operator = (const Iterator&);
137 These should be implemented in any actual iterator.
139 // rank invalidates the iterator
140 // regular: \sum_{i = 0}^{value} V[i]
141 // at_least: \sum_{i = 0}^{value - 1} V[i] + 1
142 usint rank(usint value, bool at_least = false);
144 usint select(usint index); // \min value: \sum_{i = 0}^{value} V[i] = index + 1
147 pair_type valueAfter(usint value); // (\min i >= value: V[i] = 1, rank(i) - 1)
148 pair_type nextValue();
150 // These versions of select return (value, length_of_run).
151 // max_length is an upper bound for the length of the run returned.
152 // V[value] is not included in the length of the run
153 // These functions are not greedy: the actual length of the run can be more than reported.
154 // This can happen even if max_length was not reached.
155 // length_of_run is actually the number of extra items returned past value
157 pair_type selectRun(usint index, usint max_length);
158 pair_type selectNextRun(usint max_length);
160 // isSet invalidates the iterator
161 bool isSet(usint value); // V[value]
164 //--------------------------------------------------------------------------
171 usint number_of_blocks;
174 Each sample is (rank(i) - 1, i) where V[i] = 1.
175 Number of samples is number_of_blocks + 1.
180 ReadBuffer* rank_index;
183 ReadBuffer* select_index;
187 These functions build a higher level index for faster rank/select queries.
188 The index consists of about (number of samples) / INDEX_RATE pointers.
189 The bit vector cannot be used without the index.
192 void indexForSelect();
194 // These are not allowed.
196 BitVector(const BitVector&);
197 BitVector& operator = (const BitVector&);
204 #endif // BITVECTOR_H