47a382e613dd1b55c83e55cd32d5c2c50ef7b8d5
[SXSI/XMLTree.git] / libcds / src / static_bitsequence / static_bitsequence_rrr02.cpp
1 /* static_bitsequence_rrr02.cpp
2  * Copyright (C) 2008, Francisco Claude, all rights reserved.
3  *
4  * static_bitsequence_rrr02 definition
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  */
21
22 #include <static_bitsequence_rrr02.h>
23
24 table_offset * static_bitsequence_rrr02::E = NULL;
25
26 static_bitsequence_rrr02::static_bitsequence_rrr02() {
27         ones=0;
28         len=0;
29         if(E==NULL) E = new table_offset(BLOCK_SIZE);
30   E->use();
31         C = NULL;
32         O = NULL;
33         C_sampling = NULL;
34         O_pos = NULL;
35   sample_rate = DEFAULT_SAMPLING;
36   C_len = O_len = C_sampling_len = O_pos_len = 0;
37   O_bits_len = C_sampling_field_bits = O_pos_field_bits = 0;
38 }
39
40 static_bitsequence_rrr02::static_bitsequence_rrr02(uint * bitseq, uint len, uint sample_rate) {
41         ones = 0;
42         this->len = len;
43         if(E==NULL) E = new table_offset(BLOCK_SIZE);
44   E->use();
45         // Table C
46         C_len = len/BLOCK_SIZE + (len%BLOCK_SIZE!=0);
47         C_field_bits = bits(BLOCK_SIZE);
48         C = new uint[uint_len(C_len,C_field_bits)];
49   for(uint i=0;i<uint_len(C_len,C_field_bits);i++)
50     C[i] = 0;
51         O_bits_len = 0;
52         for(uint i=0;i<C_len;i++) {
53                 uint value = popcount(get_var_field(bitseq,i*BLOCK_SIZE,min((uint)len-1,(i+1)*BLOCK_SIZE-1)));
54                 assert(value<=BLOCK_SIZE);
55                 set_field(C,C_field_bits,i,value);
56                 ones += value;
57                 O_bits_len += E->get_log2binomial(BLOCK_SIZE,value);
58         }
59         // Table O
60         O_len = uint_len(1,O_bits_len);
61         O = new uint[O_len];
62   for(uint i=0;i<O_len;i++)
63     O[i] = 0;
64         uint O_pos = 0;
65         for(uint i=0;i<C_len;i++) {
66                 uint value = (ushort)get_var_field(bitseq,i*BLOCK_SIZE,min((uint)len-1,(i+1)*BLOCK_SIZE-1));
67                 set_var_field(O,O_pos,O_pos+E->get_log2binomial(BLOCK_SIZE,popcount(value))-1,E->compute_offset((ushort)value));
68                 O_pos += E->get_log2binomial(BLOCK_SIZE,popcount(value));
69         }
70         C_sampling = NULL;
71   this->O_pos = NULL;
72   
73         create_sampling(sample_rate);
74 }
75
76 void static_bitsequence_rrr02::create_sampling(uint sample_rate) {
77         this->sample_rate = sample_rate;
78 /*  for(uint i=0;i<C_len;i++) {
79     O_bits_len += E->get_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i));
80   }*/
81         // Sampling for C
82         C_sampling_len = C_len/sample_rate+2;
83         C_sampling_field_bits = bits(ones);
84         if(C_sampling!=NULL) delete [] C_sampling;
85         C_sampling = new uint[max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits))];
86   for(uint i=0;i<max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits));i++)
87     C_sampling[i] = 0;
88         uint sum = 0;
89         for(uint i=0;i<C_len;i++) {
90                 if(i%sample_rate==0)
91                         set_field(C_sampling,C_sampling_field_bits,i/sample_rate,sum);
92                 sum += get_field(C,C_field_bits,i);
93         }
94         for(uint i=(C_len-1)/sample_rate+1;i<C_sampling_len;i++)
95                 set_field(C_sampling,C_sampling_field_bits,i,sum);
96         // Sampling for O (table S) (Code separated from previous construction for readability)
97         O_pos_len = C_len/sample_rate+1;
98         O_pos_field_bits = bits(O_bits_len);
99         if(O_pos!=NULL) delete [] O_pos;
100         O_pos = new uint[uint_len(O_pos_len,O_pos_field_bits)];
101   for(uint i=0;i<uint_len(O_pos_len,O_pos_field_bits);i++)
102     O_pos[i] = 0;
103         uint pos = 0;
104         for(uint i=0;i<C_len;i++) {
105                 if(i%sample_rate==0)
106                         set_field(O_pos,O_pos_field_bits,i/sample_rate,pos);
107                 pos += E->get_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i));
108         }
109 }
110
111 bool static_bitsequence_rrr02::access(uint i) {
112   uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate;
113   uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
114   uint pos = i/BLOCK_SIZE;
115         assert(pos<=C_len);
116   for(uint k=nearest_sampled_value*sample_rate;k<pos;k++) {
117     uint aux = get_field(C,C_field_bits,k);
118     pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
119   }
120   uint c = get_field(C,C_field_bits,pos);
121   return ((1<<(i%BLOCK_SIZE))&E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)))!=0;
122 }
123
124 uint static_bitsequence_rrr02::rank0(uint i) {
125   if(i+1==0) return 0;
126         return 1+i-rank1(i);
127 }
128
129 uint static_bitsequence_rrr02::rank1(uint i) {
130   if(i+1==0) return 0;
131         uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate;
132         uint sum = get_field(C_sampling,C_sampling_field_bits,nearest_sampled_value);
133         uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
134         uint pos = i/BLOCK_SIZE;
135         uint k=nearest_sampled_value*sample_rate;
136         if(k%2==1 && k<pos) {
137                 uint aux = get_field(C,C_field_bits,k);
138                 sum += aux;
139                 pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
140                 k++;
141         }
142         uchar * a = (uchar *)C;
143         uint mask = 0x0F;
144         a += k/2;
145         while(k<(uint)max(0,(int)pos-1)) {
146                 assert(((*a)&mask)==get_field(C,C_field_bits,k));
147                 assert((*a)/16==get_field(C,C_field_bits,k+1));
148                 sum += ((*a)&mask)+(*a)/16;
149                 pos_O += E->get_log2binomial(BLOCK_SIZE,((*a)&mask))+E->get_log2binomial(BLOCK_SIZE,((*a)/16));
150                 a++;
151                 k+=2;
152         }
153         if(k<pos) {
154                 uint aux = get_field(C,C_field_bits,k);
155                 sum += aux;
156                 pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
157                 k++;
158         }
159         uint c = get_field(C,C_field_bits,pos);
160         sum += popcount(((2<<(i%BLOCK_SIZE))-1) & E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)));
161         return sum;
162 }
163
164 uint static_bitsequence_rrr02::select0(uint i) {
165         if(i==0) return (uint)-1;
166         if(i>len-ones) return (uint)-1;
167         // Search over partial sums
168         uint start=0;
169         uint end=C_sampling_len-1;
170         uint med, acc=0, pos;
171         while(start<end-1) {
172                 med = (start+end)/2;
173                 acc = med*sample_rate*BLOCK_SIZE-get_field(C_sampling,C_sampling_field_bits,med);
174                 if(acc<i) {
175                         if(med==start) break;
176                         start=med;
177                 }
178                 else  {
179                         if(end==0) break;
180                         end = med-1;
181                 }
182         }
183         acc = get_field(C_sampling,C_sampling_field_bits,start);
184         while(start<C_len-1 && acc+sample_rate*BLOCK_SIZE==get_field(C_sampling,C_sampling_field_bits,start+1)) {
185     start++;
186     acc +=sample_rate*BLOCK_SIZE;
187   }
188   acc = start*sample_rate*BLOCK_SIZE-acc;
189         pos = (start)*sample_rate;
190         uint pos_O = get_field(O_pos,O_pos_field_bits,start);
191         // Sequential search over C
192         uint s = 0;
193         for(;pos<C_len;pos++) {
194                 s = get_field(C,C_field_bits,pos);
195                 if(acc+BLOCK_SIZE-s>=i) break;
196                 pos_O += E->get_log2binomial(BLOCK_SIZE,s);
197                 acc += BLOCK_SIZE-s;
198         }
199         pos = (pos)*BLOCK_SIZE;
200         // Search inside the block
201         
202         while(acc<i) {
203                 uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE,s);
204                 uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
205                 pos_O = new_posO;
206                 new_posO = 0;
207                 while(acc<i && new_posO<BLOCK_SIZE) {
208                         pos++;new_posO++;
209                         acc += (((block&1)==0)?1:0);
210                         block = block/2;
211                 }
212         }
213         pos--;
214         assert(acc==i);
215         assert(rank0(pos)==i);
216         assert(!access(pos));
217         return pos;
218 }
219
220 uint static_bitsequence_rrr02::select1(uint i) {
221         if(i==0) return -1;
222         if(i>ones) return -1;
223         // Search over partial sums
224         uint start=0;
225         uint end=C_sampling_len-1;
226         uint med, acc=0, pos;
227         while(start<end-1) {
228                 med = (start+end)/2;
229                 acc = get_field(C_sampling,C_sampling_field_bits,med);
230                 if(acc<i) {
231                         if(med==start) break;
232                         start=med;
233                 }
234                 else  {
235                         if(end==0) break;
236                         end = med-1;
237                 }
238         }
239         acc = get_field(C_sampling,C_sampling_field_bits,start);
240         while(start<C_len-1 && acc==get_field(C_sampling,C_sampling_field_bits,start+1)) start++;
241         pos = (start)*sample_rate;
242         uint pos_O = get_field(O_pos,O_pos_field_bits,start);
243         acc = get_field(C_sampling,C_sampling_field_bits,start);
244         // Sequential search over C
245         uint s = 0;
246         for(;pos<C_len;pos++) {
247                 s = get_field(C,C_field_bits,pos);
248                 if(acc+s>=i) break;
249                 pos_O += E->get_log2binomial(BLOCK_SIZE,s);
250                 acc += s;
251         }
252         pos = (pos)*BLOCK_SIZE;
253         //cout << "pos=" << pos << endl;
254         // Search inside the block
255         while(acc<i) {
256                 uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE,s);
257                 uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
258                 pos_O = new_posO;
259                 new_posO = 0;
260                 while(acc<i && new_posO<BLOCK_SIZE) {
261                         pos++;new_posO++;
262                         acc += (((block&1)!=0)?1:0);
263                         block = block/2;
264                 }
265                 //cout << "i=" << i << " len=" << len << " ones=" << ones << " pos=" << pos << " acc=" << acc << " rank=" << rank1(pos) << endl;
266         }
267         pos--;
268         assert(acc==i);
269         assert(rank1(pos)==i);
270         assert(access(pos));
271         return pos;
272 }
273
274 uint static_bitsequence_rrr02::size() {
275   /*cout << "RRR02 SIZE: " << endl;
276   cout << "Default: " << 9*sizeof(uint)+sizeof(uint*)*4 << endl;
277   cout << "Cs:      " << uint_len(C_len,C_field_bits)*sizeof(uint) << endl;
278   cout << "Os:      " << O_len*sizeof(uint) << endl;
279   cout << "CSamp:   " << uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint) << endl;
280   cout << "OSamp:   " << uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint) << endl;
281   cout << "E:       " << E->size() << endl;*/
282   uint sum = sizeof(static_bitsequence_rrr02);
283   sum += uint_len(C_len,C_field_bits)*sizeof(uint);
284   sum += O_len*sizeof(uint);
285   sum += uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint);
286   sum += uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint);
287   //sum += E->size();
288   return sum;
289 }
290
291 static_bitsequence_rrr02::~static_bitsequence_rrr02() {
292         if(C!=NULL) delete [] C;
293         if(O!=NULL) delete [] O;
294         if(C_sampling!=NULL) delete [] C_sampling;
295         if(O_pos!=NULL) delete [] O_pos;
296   E = E->unuse();
297 }
298
299 int static_bitsequence_rrr02::save(FILE * fp) {
300         uint wr = RRR02_HDR;
301   wr = fwrite(&wr,sizeof(uint),1,fp);
302         wr += fwrite(&len,sizeof(uint),1,fp);
303         wr += fwrite(&ones,sizeof(uint),1,fp);
304         wr += fwrite(&C_len,sizeof(uint),1,fp);
305         wr += fwrite(&C_field_bits,sizeof(uint),1,fp);
306         wr += fwrite(&O_len,sizeof(uint),1,fp);
307   wr += fwrite(&O_bits_len,sizeof(uint),1,fp);
308   wr += fwrite(&sample_rate,sizeof(uint),1,fp);
309         if(wr!=8) return -1;
310         wr = fwrite(C,sizeof(uint),uint_len(C_len,C_field_bits),fp);
311         if(wr!=uint_len(C_len,C_field_bits)) return -1;
312   wr = fwrite(O,sizeof(uint),O_len,fp);
313   if(wr!=O_len) return -1;
314         return 0;
315 }
316
317 static_bitsequence_rrr02 * static_bitsequence_rrr02::load(FILE * fp) {
318         static_bitsequence_rrr02 * ret = new static_bitsequence_rrr02();
319         uint rd = 0, type;
320   rd += fread(&type,sizeof(uint),1,fp);
321         rd += fread(&ret->len,sizeof(uint),1,fp);
322         rd += fread(&ret->ones,sizeof(uint),1,fp);
323         rd += fread(&ret->C_len,sizeof(uint),1,fp);
324         rd += fread(&ret->C_field_bits,sizeof(uint),1,fp);
325         rd += fread(&ret->O_len,sizeof(uint),1,fp);
326   rd += fread(&ret->O_bits_len,sizeof(uint),1,fp);
327   rd += fread(&ret->sample_rate,sizeof(uint),1,fp);
328         if(rd!=8 || type!=RRR02_HDR) {
329                 delete ret;
330                 return NULL;
331         }
332         ret->C = new uint[uint_len(ret->C_len,ret->C_field_bits)];
333         rd = fread(ret->C,sizeof(uint),uint_len(ret->C_len,ret->C_field_bits),fp);
334         if(rd!=uint_len(ret->C_len,ret->C_field_bits)) {
335                 ret->C=NULL;
336                 delete ret;
337                 return NULL;
338         }
339   ret->O = new uint[ret->O_len];
340   rd = fread(ret->O,sizeof(uint),ret->O_len,fp);
341   if(rd!=ret->O_len) {
342     ret->O=NULL;
343     delete ret;
344     return NULL;
345   }
346         ret->create_sampling(ret->sample_rate);
347         return ret;
348 }