ea2aba6163ee03baca5058c057ba9dbd7ff841a8
[SXSI/TextCollection.git] / incbwt / bits / bitvector.cpp
1 #include <cstdlib>
2
3 #include "bitvector.h"
4
5
6 namespace CSA
7 {
8
9
10 BitVector::BitVector(std::ifstream& file) :
11   rank_index(0), select_index(0)
12 {
13   file.read((char*)&(this->size), sizeof(this->size));
14   file.read((char*)&(this->items), sizeof(this->items));
15   file.read((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks));
16   file.read((char*)&(this->block_size), sizeof(this->block_size));
17
18   usint* array_buffer = new usint[this->block_size * this->number_of_blocks];
19   file.read((char*)(array_buffer), this->block_size * this->number_of_blocks * sizeof(usint));
20   this->array = array_buffer;
21
22   this->integer_bits = length(this->size);
23   this->samples = new ReadBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
24
25   this->indexForRank();
26   this->indexForSelect();
27 }
28
29 BitVector::BitVector(std::FILE * file) :
30   rank_index(0), select_index(0)
31 {
32     std::fread(&(this->size), sizeof(this->size), 1, file);
33     std::fread(&(this->items), sizeof(this->items), 1, file);
34     std::fread(&(this->number_of_blocks), sizeof(this->number_of_blocks), 1, file);
35     std::fread(&(this->block_size), sizeof(this->block_size), 1, file);
36
37   usint* array_buffer = new usint[this->block_size * this->number_of_blocks];
38   std::fread(array_buffer, sizeof(usint), this->block_size * this->number_of_blocks, file);
39   this->array = array_buffer;
40
41   this->integer_bits = length(this->size);
42   this->samples = new ReadBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
43
44   this->indexForRank();
45   this->indexForSelect();
46 }
47
48
49 BitVector::BitVector(VectorEncoder& encoder, usint universe_size) :
50   size(universe_size), items(encoder.items),
51   block_size(encoder.block_size),
52   number_of_blocks(encoder.blocks),
53   rank_index(0), select_index(0)
54 {
55   usint* array_buffer = new usint[this->block_size * this->number_of_blocks];
56
57   this->integer_bits = length(this->size);
58   WriteBuffer sample_buffer(2 * (this->number_of_blocks + 1), this->integer_bits);
59
60   // Copy & linearize the array.
61   usint pos = 0;
62   for(std::list<usint*>::iterator iter = encoder.array_blocks.begin(); iter != encoder.array_blocks.end(); iter++)
63   {
64     memcpy(array_buffer + pos, *iter, encoder.superblock_bytes);
65     pos += encoder.block_size * encoder.blocks_in_superblock;
66   }
67   memcpy(array_buffer + pos, encoder.array, encoder.current_blocks * encoder.block_size * sizeof(usint));
68   this->array = array_buffer;
69
70   // Compress the samples.
71   for(std::list<usint*>::iterator iter = encoder.sample_blocks.begin(); iter != encoder.sample_blocks.end(); iter++)
72   {
73     usint* buf = *iter;
74     for(usint i = 0; i < 2 * encoder.samples_in_superblock; i++)
75     {
76       sample_buffer.writeItem(buf[i]);
77     }
78   }
79   for(usint i = 0; i < 2 * encoder.current_samples; i++)
80   {
81     sample_buffer.writeItem(encoder.samples[i]);
82   }
83   sample_buffer.writeItem(this->items);
84   sample_buffer.writeItem(this->size);
85
86   this->samples = sample_buffer.getReadBuffer();
87
88   this->indexForRank();
89   this->indexForSelect();
90 }
91
92 BitVector::~BitVector()
93 {
94   delete[] this->array;
95   delete this->samples;
96   delete this->rank_index;
97   delete this->select_index;
98 }
99
100 //--------------------------------------------------------------------------
101
102 void
103 BitVector::writeTo(std::ofstream& file) const
104 {
105   file.write((char*)&(this->size), sizeof(this->size));
106   file.write((char*)&(this->items), sizeof(this->items));
107   file.write((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks));
108   file.write((char*)&(this->block_size), sizeof(this->block_size));
109   file.write((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint));
110   this->samples->writeBuffer(file);
111 }
112 void
113 BitVector::writeTo(FILE* file) const
114 {
115     std::fwrite(&(this->size), sizeof(this->size), 1, file);
116     std::fwrite(&(this->items), sizeof(this->items), 1, file);
117     std::fwrite(&(this->number_of_blocks), sizeof(this->number_of_blocks), 1, file);
118     std::fwrite(&(this->block_size), sizeof(this->block_size), 1, file);
119     std::fwrite(this->array, sizeof(usint), this->block_size * this->number_of_blocks, file);
120     this->samples->writeBuffer(file);
121 }
122
123 usint
124 BitVector::reportSize() const
125 {
126   // We assume the reportSize() of derived classes includes any class variables of BitVector.
127   usint bytes = this->block_size * this->number_of_blocks * sizeof(usint);
128   bytes += this->samples->reportSize();
129   bytes += this->rank_index->reportSize();
130   bytes += this->select_index->reportSize();
131   return bytes;
132 }
133
134 usint
135 BitVector::getCompressedSize() const
136 {
137   return this->block_size * this->number_of_blocks * sizeof(usint);
138 }
139
140 //--------------------------------------------------------------------------
141
142 void
143 BitVector::indexForRank()
144 {
145   delete this->rank_index;
146
147   usint value_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE;
148   this->rank_rate = (this->size + value_samples - 1) / value_samples;
149   value_samples = (this->size + this->rank_rate - 1) / this->rank_rate + 1;
150   WriteBuffer index_buffer(value_samples, length(this->number_of_blocks - 1));
151
152   usint current = 0, pointer = 0;
153   this->samples->goToItem(2);
154   while(this->samples->hasNextItem())
155   {
156     this->samples->skipItem();
157     usint limit = this->samples->readItem();
158     while(current < limit)
159     {
160       index_buffer.writeItem(pointer);
161       current += this->rank_rate;
162     }
163     pointer++;
164   }
165   index_buffer.writeItem(this->number_of_blocks - 1);
166
167   this->rank_index = index_buffer.getReadBuffer();
168 }
169
170 void
171 BitVector::indexForSelect()
172 {
173   delete this->select_index;
174
175   usint index_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE;
176   this->select_rate = (this->items + index_samples - 1) / index_samples;
177   index_samples = (this->items + this->select_rate - 1) / this->select_rate + 1;
178   WriteBuffer index_buffer(index_samples, length(this->number_of_blocks - 1));
179
180   usint current = 0, pointer = 0;
181   this->samples->goToItem(2);
182   while(this->samples->hasNextItem())
183   {
184     usint limit = this->samples->readItem();
185     this->samples->skipItem();
186     while(current < limit)
187     {
188       index_buffer.writeItem(pointer);
189       current += this->select_rate;
190     }
191     pointer++;
192   }
193   index_buffer.writeItem(this->number_of_blocks - 1);
194
195   this->select_index = index_buffer.getReadBuffer();
196 }
197
198 //--------------------------------------------------------------------------
199
200 BitVector::Iterator::Iterator(const BitVector& par) :
201   parent(par),
202   buffer(par.array, par.block_size),
203   samples(*(par.samples))
204 {
205 }
206
207 BitVector::Iterator::~Iterator()
208 {
209 }
210
211 usint
212 BitVector::Iterator::sampleForIndex(usint index)
213 {
214   usint low = this->parent.select_index->readItemConst(index / this->parent.select_rate);
215   usint high = this->parent.number_of_blocks - 1;
216
217   this->samples.goToItem(2 * low + 2);
218   for(; low < high; low++)
219   {
220     if(this->samples.readItem() > index) { return low; }
221     this->samples.skipItem();
222   }
223
224   return low;
225 }
226
227 usint
228 BitVector::Iterator::sampleForValue(usint value)
229 {
230   usint low = this->parent.rank_index->readItemConst(value / this->parent.rank_rate);
231   usint high = this->parent.number_of_blocks - 1;
232
233   this->samples.goToItem(2 * low + 3);
234   for(; low < high; low++)
235   {
236     if(this->samples.readItem() > value) { return low; }
237     this->samples.skipItem();
238   }
239
240   return low;
241 }
242
243 //--------------------------------------------------------------------------
244
245 VectorEncoder::VectorEncoder(usint block_bytes, usint superblock_size) :
246   size(0), items(0), blocks(0),
247   block_size(BYTES_TO_WORDS(block_bytes)),
248   superblock_bytes(superblock_size)
249 {
250   this->array = new usint[this->superblock_bytes / sizeof(usint)];
251   memset(this->array, 0, this->superblock_bytes);
252   this->blocks_in_superblock = this->superblock_bytes / (sizeof(usint) * this->block_size);
253   this->current_blocks = 0;
254
255   this->samples = new usint[this->superblock_bytes / sizeof(usint)];
256   this->samples_in_superblock = this->superblock_bytes / (2 * sizeof(usint));
257   this->current_samples = 0;
258
259   this->buffer = new WriteBuffer(this->array, this->block_size);
260 }
261
262 VectorEncoder::~VectorEncoder()
263 {
264   delete[] this->array;
265
266   delete this->buffer;
267   for(std::list<usint*>::iterator iter = this->array_blocks.begin(); iter != this->array_blocks.end(); iter++)
268   {
269     delete[] *iter;
270   }
271
272   delete[] this->samples;
273   for(std::list<usint*>::iterator iter = this->sample_blocks.begin(); iter != this->sample_blocks.end(); iter++)
274   {
275     delete[] *iter;
276   }
277 }
278
279 void
280 VectorEncoder::addNewBlock()
281 {
282   this->blocks++;
283   this->current_blocks++;
284   this->current_samples++;
285
286   // Do we need a new superblock for the block?
287   if(this->current_blocks > this->blocks_in_superblock)
288   {
289     this->array_blocks.push_back(this->array);
290     this->array = new usint[this->superblock_bytes / sizeof(usint)];
291     memset(this->array, 0, this->superblock_bytes);
292     this->current_blocks = 1;
293   }
294   this->buffer->moveBuffer(this->array + (this->block_size * (this->current_blocks - 1)));
295
296   // Do we need a new superblock for the sample?
297   if(this->current_samples > this->samples_in_superblock)
298   {
299     this->sample_blocks.push_back(this->samples);
300     this->samples = new usint[this->superblock_bytes / sizeof(usint)];
301     this->current_samples = 1;
302   }
303   this->samples[2 * this->current_samples - 2] = this->items - 1;
304   this->samples[2 * this->current_samples - 1] = this->size - 1;
305 }
306
307 void
308 VectorEncoder::setFirstBit(usint value)
309 {
310   this->samples[0] = 0;
311   this->samples[1] = value;
312
313   this->size = value + 1;
314   this->items = 1;
315   this->blocks = 1;
316
317   this->current_blocks = 1;
318   this->current_samples = 1;
319 }
320
321
322 } // namespace CSA