b76fa9d1b82cd0dcf00dd4d65e0d4dc7faa9587a
[SXSI/TextCollection.git] / incbwt / bits / bitvector.cpp
1 #include <cstdlib>
2
3 #include "bitvector.h"
4
5
6 namespace CSA
7 {
8
9
10 BitVector::BitVector(std::ifstream& file) :
11   rank_index(0), select_index(0)
12 {
13   file.read((char*)&(this->size), sizeof(this->size));
14   file.read((char*)&(this->items), sizeof(this->items));
15   file.read((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks));
16   file.read((char*)&(this->block_size), sizeof(this->block_size));
17
18   this->array = new usint[this->block_size * this->number_of_blocks];
19   file.read((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint));
20   this->buffer = new FastBitBuffer(this->array, this->block_size);
21
22   this->integer_bits = length(this->size);
23   this->samples = new FastBitBuffer(file, 2 * (this->number_of_blocks + 1), this->integer_bits);
24
25   this->indexForRank();
26   this->indexForSelect();
27 }
28
29 BitVector::BitVector(VectorEncoder& encoder, usint universe_size) :
30   size(universe_size), items(encoder.items),
31   block_size(encoder.block_size),
32   number_of_blocks(encoder.blocks),
33   rank_index(0), select_index(0)
34 {
35   this->array = new usint[this->block_size * this->number_of_blocks];
36   this->buffer = new FastBitBuffer(this->array, this->block_size);
37
38   this->integer_bits = length(this->size);
39   this->samples = new FastBitBuffer(2 * (this->number_of_blocks + 1), this->integer_bits);
40
41   // Copy & linearize the array.
42   usint pos = 0;
43   for(std::list<usint*>::iterator iter = encoder.array_blocks.begin(); iter != encoder.array_blocks.end(); iter++)
44   {
45     memcpy(this->array + pos, *iter, encoder.superblock_bytes);
46     pos += encoder.block_size * encoder.blocks_in_superblock;
47   }
48   memcpy(this->array + pos, encoder.array, encoder.current_blocks * encoder.block_size * sizeof(usint));
49
50   // Compress the samples.
51   for(std::list<usint*>::iterator iter = encoder.sample_blocks.begin(); iter != encoder.sample_blocks.end(); iter++)
52   {
53     usint* buf = *iter;
54     for(usint i = 0; i < 2 * encoder.samples_in_superblock; i++)
55     {
56       this->samples->writeItem(buf[i]);
57     }
58   }
59   for(usint i = 0; i < 2 * encoder.current_samples; i++)
60   {
61     this->samples->writeItem(encoder.samples[i]);
62   }
63   this->samples->writeItem(this->items);
64   this->samples->writeItem(this->size);
65
66   this->indexForRank();
67   this->indexForSelect();
68 }
69
70 BitVector::~BitVector()
71 {
72   delete[] this->array;
73   delete   this->buffer;
74   delete   this->samples;
75   delete   this->rank_index;
76   delete   this->select_index;
77 }
78
79 void
80 BitVector::writeTo(std::ofstream& file)
81 {
82   file.write((char*)&(this->size), sizeof(this->size));
83   file.write((char*)&(this->items), sizeof(this->items));
84   file.write((char*)&(this->number_of_blocks), sizeof(this->number_of_blocks));
85   file.write((char*)&(this->block_size), sizeof(this->block_size));
86   file.write((char*)(this->array), this->block_size * this->number_of_blocks * sizeof(usint));
87   this->samples->writeBuffer(file, false);
88 }
89
90 //--------------------------------------------------------------------------
91
92 usint
93 BitVector::reportSize()
94 {
95   usint bytes = this->buffer->reportSize();
96   bytes += this->block_size * this->number_of_blocks * sizeof(usint);
97   bytes += this->samples->reportSize();
98   bytes += this->rank_index->reportSize();
99   bytes += this->select_index->reportSize();
100   return bytes;
101 }
102
103 //--------------------------------------------------------------------------
104
105 usint
106 BitVector::sampleForIndex(usint index)
107 {
108   usint low = this->select_index->readItem(index / this->select_rate);
109   usint high = this->select_index->readItem(index / this->select_rate + 1);
110
111   this->samples->goToItem(2 * low + 2);
112   for(; low < high; low++)
113   {
114     if(this->samples->readItem() > index) { return low; }
115     this->samples->skipItem();
116   }
117
118   return low;
119 }
120
121 usint
122 BitVector::sampleForValue(usint value)
123 {
124   usint low = this->rank_index->readItem(value / this->rank_rate);
125   usint high = this->rank_index->readItem(value / this->rank_rate + 1);
126
127   this->samples->goToItem(2 * low + 3);
128   for(; low < high; low++)
129   {
130     if(this->samples->readItem() > value) { return low; }
131     this->samples->skipItem();
132   }
133
134   return low;
135 }
136
137 //--------------------------------------------------------------------------
138
139 void
140 BitVector::indexForRank()
141 {
142   delete this->rank_index;
143
144   usint value_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE;
145   this->rank_rate = (this->size + value_samples - 1) / value_samples;
146   value_samples = (this->size + this->rank_rate - 1) / this->rank_rate + 1;
147   this->rank_index = new FastBitBuffer(value_samples, length(this->number_of_blocks - 1));
148
149   usint current = 0, pointer = 0;
150   this->samples->goToItem(2);
151   while(this->samples->hasNextItem())
152   {
153     this->samples->skipItem();
154     usint limit = this->samples->readItem();
155     while(current < limit)
156     {
157       this->rank_index->writeItem(pointer);
158       current += this->rank_rate;
159     }
160     pointer++;
161   }
162   this->rank_index->writeItem(this->number_of_blocks - 1);
163 }
164
165 void
166 BitVector::indexForSelect()
167 {
168   delete this->select_index;
169
170   usint index_samples = (this->number_of_blocks + BitVector::INDEX_RATE - 1) / BitVector::INDEX_RATE;
171   this->select_rate = (this->items + index_samples - 1) / index_samples;
172   index_samples = (this->items + this->select_rate - 1) / this->select_rate + 1;
173   this->select_index = new FastBitBuffer(index_samples, length(this->number_of_blocks - 1));
174
175   usint current = 0, pointer = 0;
176   this->samples->goToItem(2);
177   while(this->samples->hasNextItem())
178   {
179     usint limit = this->samples->readItem();
180     this->samples->skipItem();
181     while(current < limit)
182     {
183       this->select_index->writeItem(pointer);
184       current += this->select_rate;
185     }
186     pointer++;
187   }
188   this->select_index->writeItem(this->number_of_blocks - 1);
189 }
190
191 //--------------------------------------------------------------------------
192
193 VectorEncoder::VectorEncoder(usint block_bytes, usint superblock_size) :
194   size(0), items(0), blocks(0),
195   block_size(BYTES_TO_WORDS(block_bytes)),
196   superblock_bytes(superblock_size)
197 {
198   this->array = new usint[this->superblock_bytes / sizeof(usint)];
199   memset(this->array, 0, this->superblock_bytes);
200   this->blocks_in_superblock = this->superblock_bytes / (sizeof(usint) * this->block_size);
201   this->current_blocks = 0;
202
203   this->samples = new usint[this->superblock_bytes / sizeof(usint)];
204   this->samples_in_superblock = this->superblock_bytes / (2 * sizeof(usint));
205   this->current_samples = 0;
206
207   this->buffer = new FastBitBuffer(this->array, this->block_size);
208 }
209
210 VectorEncoder::~VectorEncoder()
211 {
212   delete[] this->array;
213
214   delete this->buffer;
215   for(std::list<usint*>::iterator iter = this->array_blocks.begin(); iter != this->array_blocks.end(); iter++)
216   {
217     delete[] *iter;
218   }
219
220   delete[] this->samples;
221   for(std::list<usint*>::iterator iter = this->sample_blocks.begin(); iter != this->sample_blocks.end(); iter++)
222   {
223     delete[] *iter;
224   }
225 }
226
227 void
228 VectorEncoder::addNewBlock()
229 {
230   this->blocks++;
231   this->current_blocks++;
232   this->current_samples++;
233
234   // Do we need a new superblock for the block?
235   if(this->current_blocks > this->blocks_in_superblock)
236   {
237     this->array_blocks.push_back(this->array);
238     this->array = new usint[this->superblock_bytes / sizeof(usint)];
239     memset(this->array, 0, this->superblock_bytes);
240     this->current_blocks = 1;
241   }
242   this->buffer->moveBuffer(this->array + (this->block_size * (this->current_blocks - 1)));
243
244   // Do we need a new superblock for the sample?
245   if(this->current_samples > this->samples_in_superblock)
246   {
247     this->sample_blocks.push_back(this->samples);
248     this->samples = new usint[this->superblock_bytes / sizeof(usint)];
249     this->current_samples = 1;
250   }
251   this->samples[2 * this->current_samples - 2] = this->items - 1;
252   this->samples[2 * this->current_samples - 1] = this->size - 1;
253 }
254
255 void
256 VectorEncoder::setFirstBit(usint value)
257 {
258   this->samples[0] = 0;
259   this->samples[1] = value;
260
261   this->size = value + 1;
262   this->items = 1;
263   this->blocks = 1;
264
265   this->current_blocks = 1;
266   this->current_samples = 1;
267 }
268
269
270 } // namespace CSA