Added FILE* functionality
[SXSI/TextCollection.git] / incbwt / bits / bitvector.cpp
1 #include <cstdlib>
2
3 #include "bitvector.h"
4
5
6 namespace CSA
7 {
8
9
10 BitVector::BitVector(std::ifstream& file) :
11   rank_index(0), select_index(0)
12 {
13   file.read((char*)&(this->size), sizeof(this->size));
14   file.read((char*)&(this->items), sizeof(this->items));
15   file.read((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks));
16   file.read((char*)&(this->block_size), sizeof(this->block_size));
17
18   this->array = new usint[this->block_size * this->number_of_blocks];
19   file.read((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint));
20   this->buffer = new FastBitBuffer(this->array, this->block_size);
21
22   this->integer_bits = length(this->size);
23   this->samples = new FastBitBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
24
25   this->indexForRank();
26   this->indexForSelect();
27 }
28
29 BitVector::BitVector(std::FILE * file) :
30   rank_index(0), select_index(0)
31 {
32     std::fread(&(this->size), sizeof(this->size), 1, file);
33     std::fread(&(this->items), sizeof(this->items), 1, file);
34     std::fread(&(this->number_of_blocks), sizeof(this->number_of_blocks), 1, file);
35     std::fread(&(this->block_size), sizeof(this->block_size), 1, file);
36
37   this->array = new usint[this->block_size * this->number_of_blocks];
38   std::fread(this->array, sizeof(usint), this->block_size * this->number_of_blocks, file);
39   this->buffer = new FastBitBuffer(this->array, this->block_size);
40
41   this->integer_bits = length(this->size);
42   this->samples = new FastBitBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
43
44   this->indexForRank();
45   this->indexForSelect();
46 }
47
48 BitVector::BitVector(VectorEncoder& encoder, usint universe_size) :
49   size(universe_size), items(encoder.items),
50   block_size(encoder.block_size),
51   number_of_blocks(encoder.blocks),
52   rank_index(0), select_index(0)
53 {
54   this->array = new usint[this->block_size * this->number_of_blocks];
55   this->buffer = new FastBitBuffer(this->array, this->block_size);
56
57   this->integer_bits = length(this->size);
58   this->samples = new FastBitBuffer(2 * (this->number_of_blocks + 1), this->integer_bits);
59
60   // Copy & linearize the array.
61   usint pos = 0;
62   for(std::list<usint*>::iterator iter = encoder.array_blocks.begin(); iter != encoder.array_blocks.end(); iter++)
63   {
64     memcpy(this->array + pos, *iter, encoder.superblock_bytes);
65     pos += encoder.block_size * encoder.blocks_in_superblock;
66   }
67   memcpy(this->array + pos, encoder.array, encoder.current_blocks * encoder.block_size * sizeof(usint));
68
69   // Compress the samples.
70   for(std::list<usint*>::iterator iter = encoder.sample_blocks.begin(); iter != encoder.sample_blocks.end(); iter++)
71   {
72     usint* buf = *iter;
73     for(usint i = 0; i < 2 * encoder.samples_in_superblock; i++)
74     {
75       this->samples->writeItem(buf[i]);
76     }
77   }
78   for(usint i = 0; i < 2 * encoder.current_samples; i++)
79   {
80     this->samples->writeItem(encoder.samples[i]);
81   }
82   this->samples->writeItem(this->items);
83   this->samples->writeItem(this->size);
84
85   this->indexForRank();
86   this->indexForSelect();
87 }
88
89 BitVector::~BitVector()
90 {
91   delete[] this->array;
92   delete   this->buffer;
93   delete   this->samples;
94   delete   this->rank_index;
95   delete   this->select_index;
96 }
97
98 void
99 BitVector::writeTo(std::ofstream& file)
100 {
101   file.write((char*)&(this->size), sizeof(this->size));
102   file.write((char*)&(this->items), sizeof(this->items));
103   file.write((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks));
104   file.write((char*)&(this->block_size), sizeof(this->block_size));
105   file.write((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint));
106   this->samples->writeBuffer(file, false);
107 }
108
109 void
110 BitVector::writeTo(FILE* file)
111 {
112     std::fwrite(&(this->size), sizeof(this->size), 1, file);
113     std::fwrite(&(this->items), sizeof(this->items), 1, file);
114     std::fwrite(&(this->number_of_blocks), sizeof(this->number_of_blocks), 1, file);
115     std::fwrite(&(this->block_size), sizeof(this->block_size), 1, file);
116     std::fwrite(this->array, sizeof(usint), this->block_size * this->number_of_blocks, file);
117     this->samples->writeBuffer(file, false);
118 }
119
120
121 //--------------------------------------------------------------------------
122
123 usint
124 BitVector::reportSize()
125 {
126   usint bytes = this->buffer->reportSize();
127   bytes += this->block_size * this->number_of_blocks * sizeof(usint);
128   bytes += this->samples->reportSize();
129   bytes += this->rank_index->reportSize();
130   bytes += this->select_index->reportSize();
131   return bytes;
132 }
133
134 //--------------------------------------------------------------------------
135
136 usint
137 BitVector::sampleForIndex(usint index)
138 {
139   usint low = this->select_index->readItem(index / this->select_rate);
140   usint high = this->select_index->readItem(index / this->select_rate + 1);
141
142   this->samples->goToItem(2 * low + 2);
143   for(; low < high; low++)
144   {
145     if(this->samples->readItem() > index) { return low; }
146     this->samples->skipItem();
147   }
148
149   return low;
150 }
151
152 usint
153 BitVector::sampleForValue(usint value)
154 {
155   usint low = this->rank_index->readItem(value / this->rank_rate);
156   usint high = this->rank_index->readItem(value / this->rank_rate + 1);
157
158   this->samples->goToItem(2 * low + 3);
159   for(; low < high; low++)
160   {
161     if(this->samples->readItem() > value) { return low; }
162     this->samples->skipItem();
163   }
164
165   return low;
166 }
167
168 //--------------------------------------------------------------------------
169
170 void
171 BitVector::indexForRank()
172 {
173   delete this->rank_index;
174
175   usint value_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE;
176   this->rank_rate = (this->size + value_samples - 1) / value_samples;
177   value_samples = (this->size + this->rank_rate - 1) / this->rank_rate + 1;
178   this->rank_index = new FastBitBuffer(value_samples, length(this->number_of_blocks - 1));
179
180   usint current = 0, pointer = 0;
181   this->samples->goToItem(2);
182   while(this->samples->hasNextItem())
183   {
184     this->samples->skipItem();
185     usint limit = this->samples->readItem();
186     while(current < limit)
187     {
188       this->rank_index->writeItem(pointer);
189       current += this->rank_rate;
190     }
191     pointer++;
192   }
193   this->rank_index->writeItem(this->number_of_blocks - 1);
194 }
195
196 void
197 BitVector::indexForSelect()
198 {
199   delete this->select_index;
200
201   usint index_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE;
202   this->select_rate = (this->items + index_samples - 1) / index_samples;
203   index_samples = (this->items + this->select_rate - 1) / this->select_rate + 1;
204   this->select_index = new FastBitBuffer(index_samples, length(this->number_of_blocks - 1));
205
206   usint current = 0, pointer = 0;
207   this->samples->goToItem(2);
208   while(this->samples->hasNextItem())
209   {
210     usint limit = this->samples->readItem();
211     this->samples->skipItem();
212     while(current < limit)
213     {
214       this->select_index->writeItem(pointer);
215       current += this->select_rate;
216     }
217     pointer++;
218   }
219   this->select_index->writeItem(this->number_of_blocks - 1);
220 }
221
222 //--------------------------------------------------------------------------
223
224 VectorEncoder::VectorEncoder(usint block_bytes, usint superblock_size) :
225   size(0), items(0), blocks(0),
226   block_size(BYTES_TO_WORDS(block_bytes)),
227   superblock_bytes(superblock_size)
228 {
229   this->array = new usint[this->superblock_bytes / sizeof(usint)];
230   memset(this->array, 0, this->superblock_bytes);
231   this->blocks_in_superblock = this->superblock_bytes / (sizeof(usint) * this->block_size);
232   this->current_blocks = 0;
233
234   this->samples = new usint[this->superblock_bytes / sizeof(usint)];
235   this->samples_in_superblock = this->superblock_bytes / (2 * sizeof(usint));
236   this->current_samples = 0;
237
238   this->buffer = new FastBitBuffer(this->array, this->block_size);
239 }
240
241 VectorEncoder::~VectorEncoder()
242 {
243   delete[] this->array;
244
245   delete this->buffer;
246   for(std::list<usint*>::iterator iter = this->array_blocks.begin(); iter != this->array_blocks.end(); iter++)
247   {
248     delete[] *iter;
249   }
250
251   delete[] this->samples;
252   for(std::list<usint*>::iterator iter = this->sample_blocks.begin(); iter != this->sample_blocks.end(); iter++)
253   {
254     delete[] *iter;
255   }
256 }
257
258 void
259 VectorEncoder::addNewBlock()
260 {
261   this->blocks++;
262   this->current_blocks++;
263   this->current_samples++;
264
265   // Do we need a new superblock for the block?
266   if(this->current_blocks > this->blocks_in_superblock)
267   {
268     this->array_blocks.push_back(this->array);
269     this->array = new usint[this->superblock_bytes / sizeof(usint)];
270     memset(this->array, 0, this->superblock_bytes);
271     this->current_blocks = 1;
272   }
273   this->buffer->moveBuffer(this->array + (this->block_size * (this->current_blocks - 1)));
274
275   // Do we need a new superblock for the sample?
276   if(this->current_samples > this->samples_in_superblock)
277   {
278     this->sample_blocks.push_back(this->samples);
279     this->samples = new usint[this->superblock_bytes / sizeof(usint)];
280     this->current_samples = 1;
281   }
282   this->samples[2 * this->current_samples - 2] = this->items - 1;
283   this->samples[2 * this->current_samples - 1] = this->size - 1;
284 }
285
286 void
287 VectorEncoder::setFirstBit(usint value)
288 {
289   this->samples[0] = 0;
290   this->samples[1] = value;
291
292   this->size = value + 1;
293   this->items = 1;
294   this->blocks = 1;
295
296   this->current_blocks = 1;
297   this->current_samples = 1;
298 }
299
300
301 } // namespace CSA