Backport changes from the grammar branch
[SXSI/XMLTree.git] / libcds / src / static_bitsequence / static_bitsequence_rrr02.cpp
1 /* static_bitsequence_rrr02.cpp
2  * Copyright (C) 2008, Francisco Claude, all rights reserved.
3  *
4  * static_bitsequence_rrr02 definition
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  */
21
22 #include <static_bitsequence_rrr02.h>
23 using std::min;
24 using std::max;
25 table_offset * static_bitsequence_rrr02::E = NULL;
26
27 static_bitsequence_rrr02::static_bitsequence_rrr02() {
28         ones=0;
29         len=0;
30         if(E==NULL) E = new table_offset(BLOCK_SIZE);
31   E->use();
32         C = NULL;
33         O = NULL;
34         C_sampling = NULL;
35         O_pos = NULL;
36   sample_rate = DEFAULT_SAMPLING;
37   C_len = O_len = C_sampling_len = O_pos_len = 0;
38   O_bits_len = C_sampling_field_bits = O_pos_field_bits = 0;
39 }
40
41 static_bitsequence_rrr02::static_bitsequence_rrr02(uint * bitseq, uint len, uint sample_rate) {
42         ones = 0;
43         this->len = len;
44         if(E==NULL) E = new table_offset(BLOCK_SIZE);
45   E->use();
46         // Table C
47         C_len = len/BLOCK_SIZE + (len%BLOCK_SIZE!=0);
48         C_field_bits = bits(BLOCK_SIZE);
49         C = new uint[uint_len(C_len,C_field_bits)];
50   for(uint i=0;i<uint_len(C_len,C_field_bits);i++)
51     C[i] = 0;
52         O_bits_len = 0;
53         for(uint i=0;i<C_len;i++) {
54                 uint value = popcount(get_var_field(bitseq,i*BLOCK_SIZE,min((uint)len-1,(i+1)*BLOCK_SIZE-1)));
55                 assert(value<=BLOCK_SIZE);
56                 set_field(C,C_field_bits,i,value);
57                 ones += value;
58                 O_bits_len += E->get_log2binomial(BLOCK_SIZE,value);
59         }
60         // Table O
61         O_len = uint_len(1,O_bits_len);
62         O = new uint[O_len];
63   for(uint i=0;i<O_len;i++)
64     O[i] = 0;
65         uint O_pos = 0;
66         for(uint i=0;i<C_len;i++) {
67                 uint value = (ushort)get_var_field(bitseq,i*BLOCK_SIZE,min((uint)len-1,(i+1)*BLOCK_SIZE-1));
68                 set_var_field(O,O_pos,O_pos+E->get_log2binomial(BLOCK_SIZE,popcount(value))-1,E->compute_offset((ushort)value));
69                 O_pos += E->get_log2binomial(BLOCK_SIZE,popcount(value));
70         }
71         C_sampling = NULL;
72   this->O_pos = NULL;
73   
74         create_sampling(sample_rate);
75 }
76
77 void static_bitsequence_rrr02::create_sampling(uint sample_rate) {
78         this->sample_rate = sample_rate;
79 /*  for(uint i=0;i<C_len;i++) {
80     O_bits_len += E->get_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i));
81   }*/
82         // Sampling for C
83         C_sampling_len = C_len/sample_rate+2;
84         C_sampling_field_bits = bits(ones);
85         if(C_sampling!=NULL) delete [] C_sampling;
86         C_sampling = new uint[max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits))];
87   for(uint i=0;i<max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits));i++)
88     C_sampling[i] = 0;
89         uint sum = 0;
90         for(uint i=0;i<C_len;i++) {
91                 if(i%sample_rate==0)
92                         set_field(C_sampling,C_sampling_field_bits,i/sample_rate,sum);
93                 sum += get_field(C,C_field_bits,i);
94         }
95         for(uint i=(C_len-1)/sample_rate+1;i<C_sampling_len;i++)
96                 set_field(C_sampling,C_sampling_field_bits,i,sum);
97         // Sampling for O (table S) (Code separated from previous construction for readability)
98         O_pos_len = C_len/sample_rate+1;
99         O_pos_field_bits = bits(O_bits_len);
100         if(O_pos!=NULL) delete [] O_pos;
101         O_pos = new uint[uint_len(O_pos_len,O_pos_field_bits)];
102   for(uint i=0;i<uint_len(O_pos_len,O_pos_field_bits);i++)
103     O_pos[i] = 0;
104         uint pos = 0;
105         for(uint i=0;i<C_len;i++) {
106                 if(i%sample_rate==0)
107                         set_field(O_pos,O_pos_field_bits,i/sample_rate,pos);
108                 pos += E->get_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i));
109         }
110 }
111
112 bool static_bitsequence_rrr02::access(uint i) {
113   uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate;
114   uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
115   uint pos = i/BLOCK_SIZE;
116         assert(pos<=C_len);
117   for(uint k=nearest_sampled_value*sample_rate;k<pos;k++) {
118     uint aux = get_field(C,C_field_bits,k);
119     pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
120   }
121   uint c = get_field(C,C_field_bits,pos);
122   return ((1<<(i%BLOCK_SIZE))&E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)))!=0;
123 }
124
125 uint static_bitsequence_rrr02::rank0(uint i) {
126   if(i+1==0) return 0;
127         return 1+i-rank1(i);
128 }
129
130 uint static_bitsequence_rrr02::rank1(uint i) {
131   if(i+1==0) return 0;
132         uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate;
133         uint sum = get_field(C_sampling,C_sampling_field_bits,nearest_sampled_value);
134         uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
135         uint pos = i/BLOCK_SIZE;
136         uint k=nearest_sampled_value*sample_rate;
137         if(k%2==1 && k<pos) {
138                 uint aux = get_field(C,C_field_bits,k);
139                 sum += aux;
140                 pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
141                 k++;
142         }
143         uchar * a = (uchar *)C;
144         uint mask = 0x0F;
145         a += k/2;
146         while(k<(uint)max(0,(int)pos-1)) {
147                 assert(((*a)&mask)==get_field(C,C_field_bits,k));
148                 assert((*a)/16==get_field(C,C_field_bits,k+1));
149                 sum += ((*a)&mask)+(*a)/16;
150                 pos_O += E->get_log2binomial(BLOCK_SIZE,((*a)&mask))+E->get_log2binomial(BLOCK_SIZE,((*a)/16));
151                 a++;
152                 k+=2;
153         }
154         if(k<pos) {
155                 uint aux = get_field(C,C_field_bits,k);
156                 sum += aux;
157                 pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
158                 k++;
159         }
160         uint c = get_field(C,C_field_bits,pos);
161         sum += popcount(((2<<(i%BLOCK_SIZE))-1) & E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)));
162         return sum;
163 }
164
165 uint static_bitsequence_rrr02::select0(uint i) {
166         if(i==0) return (uint)-1;
167         if(i>len-ones) return (uint)-1;
168         // Search over partial sums
169         uint start=0;
170         uint end=C_sampling_len-1;
171         uint med, acc=0, pos;
172         while(start<end-1) {
173                 med = (start+end)/2;
174                 acc = med*sample_rate*BLOCK_SIZE-get_field(C_sampling,C_sampling_field_bits,med);
175                 if(acc<i) {
176                         if(med==start) break;
177                         start=med;
178                 }
179                 else  {
180                         if(end==0) break;
181                         end = med-1;
182                 }
183         }
184         acc = get_field(C_sampling,C_sampling_field_bits,start);
185         while(start<C_len-1 && acc+sample_rate*BLOCK_SIZE==get_field(C_sampling,C_sampling_field_bits,start+1)) {
186     start++;
187     acc +=sample_rate*BLOCK_SIZE;
188   }
189   acc = start*sample_rate*BLOCK_SIZE-acc;
190         pos = (start)*sample_rate;
191         uint pos_O = get_field(O_pos,O_pos_field_bits,start);
192         // Sequential search over C
193         uint s = 0;
194         for(;pos<C_len;pos++) {
195                 s = get_field(C,C_field_bits,pos);
196                 if(acc+BLOCK_SIZE-s>=i) break;
197                 pos_O += E->get_log2binomial(BLOCK_SIZE,s);
198                 acc += BLOCK_SIZE-s;
199         }
200         pos = (pos)*BLOCK_SIZE;
201         // Search inside the block
202         
203         while(acc<i) {
204                 uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE,s);
205                 uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
206                 pos_O = new_posO;
207                 new_posO = 0;
208                 while(acc<i && new_posO<BLOCK_SIZE) {
209                         pos++;new_posO++;
210                         acc += (((block&1)==0)?1:0);
211                         block = block/2;
212                 }
213         }
214         pos--;
215         assert(acc==i);
216         assert(rank0(pos)==i);
217         assert(!access(pos));
218         return pos;
219 }
220
221 uint static_bitsequence_rrr02::select1(uint i) {
222         if(i==0) return -1;
223         if(i>ones) return -1;
224         // Search over partial sums
225         uint start=0;
226         uint end=C_sampling_len-1;
227         uint med, acc=0, pos;
228         while(start<end-1) {
229                 med = (start+end)/2;
230                 acc = get_field(C_sampling,C_sampling_field_bits,med);
231                 if(acc<i) {
232                         if(med==start) break;
233                         start=med;
234                 }
235                 else  {
236                         if(end==0) break;
237                         end = med-1;
238                 }
239         }
240         acc = get_field(C_sampling,C_sampling_field_bits,start);
241         while(start<C_len-1 && acc==get_field(C_sampling,C_sampling_field_bits,start+1)) start++;
242         pos = (start)*sample_rate;
243         uint pos_O = get_field(O_pos,O_pos_field_bits,start);
244         acc = get_field(C_sampling,C_sampling_field_bits,start);
245         // Sequential search over C
246         uint s = 0;
247         for(;pos<C_len;pos++) {
248                 s = get_field(C,C_field_bits,pos);
249                 if(acc+s>=i) break;
250                 pos_O += E->get_log2binomial(BLOCK_SIZE,s);
251                 acc += s;
252         }
253         pos = (pos)*BLOCK_SIZE;
254         //cout << "pos=" << pos << endl;
255         // Search inside the block
256         while(acc<i) {
257                 uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE,s);
258                 uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
259                 pos_O = new_posO;
260                 new_posO = 0;
261                 while(acc<i && new_posO<BLOCK_SIZE) {
262                         pos++;new_posO++;
263                         acc += (((block&1)!=0)?1:0);
264                         block = block/2;
265                 }
266                 //cout << "i=" << i << " len=" << len << " ones=" << ones << " pos=" << pos << " acc=" << acc << " rank=" << rank1(pos) << endl;
267         }
268         pos--;
269         assert(acc==i);
270         assert(rank1(pos)==i);
271         assert(access(pos));
272         return pos;
273 }
274
275 uint static_bitsequence_rrr02::size() {
276   /*cout << "RRR02 SIZE: " << endl;
277   cout << "Default: " << 9*sizeof(uint)+sizeof(uint*)*4 << endl;
278   cout << "Cs:      " << uint_len(C_len,C_field_bits)*sizeof(uint) << endl;
279   cout << "Os:      " << O_len*sizeof(uint) << endl;
280   cout << "CSamp:   " << uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint) << endl;
281   cout << "OSamp:   " << uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint) << endl;
282   cout << "E:       " << E->size() << endl;*/
283   uint sum = sizeof(static_bitsequence_rrr02);
284   sum += uint_len(C_len,C_field_bits)*sizeof(uint);
285   sum += O_len*sizeof(uint);
286   sum += uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint);
287   sum += uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint);
288   //sum += E->size();
289   return sum;
290 }
291
292 static_bitsequence_rrr02::~static_bitsequence_rrr02() {
293         if(C!=NULL) delete [] C;
294         if(O!=NULL) delete [] O;
295         if(C_sampling!=NULL) delete [] C_sampling;
296         if(O_pos!=NULL) delete [] O_pos;
297   E = E->unuse();
298 }
299
300 int static_bitsequence_rrr02::save(FILE * fp) {
301         uint wr = RRR02_HDR;
302   wr = fwrite(&wr,sizeof(uint),1,fp);
303         wr += fwrite(&len,sizeof(uint),1,fp);
304         wr += fwrite(&ones,sizeof(uint),1,fp);
305         wr += fwrite(&C_len,sizeof(uint),1,fp);
306         wr += fwrite(&C_field_bits,sizeof(uint),1,fp);
307         wr += fwrite(&O_len,sizeof(uint),1,fp);
308   wr += fwrite(&O_bits_len,sizeof(uint),1,fp);
309   wr += fwrite(&sample_rate,sizeof(uint),1,fp);
310         if(wr!=8) return -1;
311         wr = fwrite(C,sizeof(uint),uint_len(C_len,C_field_bits),fp);
312         if(wr!=uint_len(C_len,C_field_bits)) return -1;
313   wr = fwrite(O,sizeof(uint),O_len,fp);
314   if(wr!=O_len) return -1;
315         return 0;
316 }
317
318 static_bitsequence_rrr02 * static_bitsequence_rrr02::load(FILE * fp) {
319         static_bitsequence_rrr02 * ret = new static_bitsequence_rrr02();
320         uint rd = 0, type;
321   rd += fread(&type,sizeof(uint),1,fp);
322         rd += fread(&ret->len,sizeof(uint),1,fp);
323         rd += fread(&ret->ones,sizeof(uint),1,fp);
324         rd += fread(&ret->C_len,sizeof(uint),1,fp);
325         rd += fread(&ret->C_field_bits,sizeof(uint),1,fp);
326         rd += fread(&ret->O_len,sizeof(uint),1,fp);
327   rd += fread(&ret->O_bits_len,sizeof(uint),1,fp);
328   rd += fread(&ret->sample_rate,sizeof(uint),1,fp);
329         if(rd!=8 || type!=RRR02_HDR) {
330                 delete ret;
331                 return NULL;
332         }
333         ret->C = new uint[uint_len(ret->C_len,ret->C_field_bits)];
334         rd = fread(ret->C,sizeof(uint),uint_len(ret->C_len,ret->C_field_bits),fp);
335         if(rd!=uint_len(ret->C_len,ret->C_field_bits)) {
336                 ret->C=NULL;
337                 delete ret;
338                 return NULL;
339         }
340   ret->O = new uint[ret->O_len];
341   rd = fread(ret->O,sizeof(uint),ret->O_len,fp);
342   if(rd!=ret->O_len) {
343     ret->O=NULL;
344     delete ret;
345     return NULL;
346   }
347         ret->create_sampling(ret->sample_rate);
348         return ret;
349 }