1 /* static_bitsequence_rrr02.cpp
2 * Copyright (C) 2008, Francisco Claude, all rights reserved.
4 * static_bitsequence_rrr02 definition
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <static_bitsequence_rrr02.h>
25 table_offset * static_bitsequence_rrr02::E = NULL;
27 static_bitsequence_rrr02::static_bitsequence_rrr02() {
30 if(E==NULL) E = new table_offset(BLOCK_SIZE);
36 sample_rate = DEFAULT_SAMPLING;
37 C_len = O_len = C_sampling_len = O_pos_len = 0;
38 O_bits_len = C_sampling_field_bits = O_pos_field_bits = 0;
41 static_bitsequence_rrr02::static_bitsequence_rrr02(uint * bitseq, uint len, uint sample_rate) {
44 if(E==NULL) E = new table_offset(BLOCK_SIZE);
47 C_len = len/BLOCK_SIZE + (len%BLOCK_SIZE!=0);
48 C_field_bits = bits(BLOCK_SIZE);
49 C = new uint[uint_len(C_len,C_field_bits)];
50 for(uint i=0;i<uint_len(C_len,C_field_bits);i++)
53 for(uint i=0;i<C_len;i++) {
54 uint value = popcount(get_var_field(bitseq,i*BLOCK_SIZE,min((uint)len-1,(i+1)*BLOCK_SIZE-1)));
55 assert(value<=BLOCK_SIZE);
56 set_field(C,C_field_bits,i,value);
58 O_bits_len += E->get_log2binomial(BLOCK_SIZE,value);
61 O_len = uint_len(1,O_bits_len);
63 for(uint i=0;i<O_len;i++)
66 for(uint i=0;i<C_len;i++) {
67 uint value = (ushort)get_var_field(bitseq,i*BLOCK_SIZE,min((uint)len-1,(i+1)*BLOCK_SIZE-1));
68 set_var_field(O,O_pos,O_pos+E->get_log2binomial(BLOCK_SIZE,popcount(value))-1,E->compute_offset((ushort)value));
69 O_pos += E->get_log2binomial(BLOCK_SIZE,popcount(value));
74 create_sampling(sample_rate);
77 void static_bitsequence_rrr02::create_sampling(uint sample_rate) {
78 this->sample_rate = sample_rate;
79 /* for(uint i=0;i<C_len;i++) {
80 O_bits_len += E->get_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i));
83 C_sampling_len = C_len/sample_rate+2;
84 C_sampling_field_bits = bits(ones);
85 if(C_sampling!=NULL) delete [] C_sampling;
86 C_sampling = new uint[max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits))];
87 for(uint i=0;i<max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits));i++)
90 for(uint i=0;i<C_len;i++) {
92 set_field(C_sampling,C_sampling_field_bits,i/sample_rate,sum);
93 sum += get_field(C,C_field_bits,i);
95 for(uint i=(C_len-1)/sample_rate+1;i<C_sampling_len;i++)
96 set_field(C_sampling,C_sampling_field_bits,i,sum);
97 // Sampling for O (table S) (Code separated from previous construction for readability)
98 O_pos_len = C_len/sample_rate+1;
99 O_pos_field_bits = bits(O_bits_len);
100 if(O_pos!=NULL) delete [] O_pos;
101 O_pos = new uint[uint_len(O_pos_len,O_pos_field_bits)];
102 for(uint i=0;i<uint_len(O_pos_len,O_pos_field_bits);i++)
105 for(uint i=0;i<C_len;i++) {
107 set_field(O_pos,O_pos_field_bits,i/sample_rate,pos);
108 pos += E->get_log2binomial(BLOCK_SIZE,get_field(C,C_field_bits,i));
112 bool static_bitsequence_rrr02::access(uint i) {
113 uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate;
114 uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
115 uint pos = i/BLOCK_SIZE;
117 for(uint k=nearest_sampled_value*sample_rate;k<pos;k++) {
118 uint aux = get_field(C,C_field_bits,k);
119 pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
121 uint c = get_field(C,C_field_bits,pos);
122 return ((1<<(i%BLOCK_SIZE))&E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)))!=0;
125 uint static_bitsequence_rrr02::rank0(uint i) {
130 uint static_bitsequence_rrr02::rank1(uint i) {
132 uint nearest_sampled_value = i/BLOCK_SIZE/sample_rate;
133 uint sum = get_field(C_sampling,C_sampling_field_bits,nearest_sampled_value);
134 uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
135 uint pos = i/BLOCK_SIZE;
136 uint k=nearest_sampled_value*sample_rate;
137 if(k%2==1 && k<pos) {
138 uint aux = get_field(C,C_field_bits,k);
140 pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
143 uchar * a = (uchar *)C;
146 while(k<(uint)max(0,(int)pos-1)) {
147 assert(((*a)&mask)==get_field(C,C_field_bits,k));
148 assert((*a)/16==get_field(C,C_field_bits,k+1));
149 sum += ((*a)&mask)+(*a)/16;
150 pos_O += E->get_log2binomial(BLOCK_SIZE,((*a)&mask))+E->get_log2binomial(BLOCK_SIZE,((*a)/16));
155 uint aux = get_field(C,C_field_bits,k);
157 pos_O += E->get_log2binomial(BLOCK_SIZE,aux);
160 uint c = get_field(C,C_field_bits,pos);
161 sum += popcount(((2<<(i%BLOCK_SIZE))-1) & E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE,c)-1)));
165 uint static_bitsequence_rrr02::select0(uint i) {
166 if(i==0) return (uint)-1;
167 if(i>len-ones) return (uint)-1;
168 // Search over partial sums
170 uint end=C_sampling_len-1;
171 uint med, acc=0, pos;
174 acc = med*sample_rate*BLOCK_SIZE-get_field(C_sampling,C_sampling_field_bits,med);
176 if(med==start) break;
184 acc = get_field(C_sampling,C_sampling_field_bits,start);
185 while(start<C_len-1 && acc+sample_rate*BLOCK_SIZE==get_field(C_sampling,C_sampling_field_bits,start+1)) {
187 acc +=sample_rate*BLOCK_SIZE;
189 acc = start*sample_rate*BLOCK_SIZE-acc;
190 pos = (start)*sample_rate;
191 uint pos_O = get_field(O_pos,O_pos_field_bits,start);
192 // Sequential search over C
194 for(;pos<C_len;pos++) {
195 s = get_field(C,C_field_bits,pos);
196 if(acc+BLOCK_SIZE-s>=i) break;
197 pos_O += E->get_log2binomial(BLOCK_SIZE,s);
200 pos = (pos)*BLOCK_SIZE;
201 // Search inside the block
204 uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE,s);
205 uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
208 while(acc<i && new_posO<BLOCK_SIZE) {
210 acc += (((block&1)==0)?1:0);
216 assert(rank0(pos)==i);
217 assert(!access(pos));
221 uint static_bitsequence_rrr02::select1(uint i) {
223 if(i>ones) return -1;
224 // Search over partial sums
226 uint end=C_sampling_len-1;
227 uint med, acc=0, pos;
230 acc = get_field(C_sampling,C_sampling_field_bits,med);
232 if(med==start) break;
240 acc = get_field(C_sampling,C_sampling_field_bits,start);
241 while(start<C_len-1 && acc==get_field(C_sampling,C_sampling_field_bits,start+1)) start++;
242 pos = (start)*sample_rate;
243 uint pos_O = get_field(O_pos,O_pos_field_bits,start);
244 acc = get_field(C_sampling,C_sampling_field_bits,start);
245 // Sequential search over C
247 for(;pos<C_len;pos++) {
248 s = get_field(C,C_field_bits,pos);
250 pos_O += E->get_log2binomial(BLOCK_SIZE,s);
253 pos = (pos)*BLOCK_SIZE;
254 //cout << "pos=" << pos << endl;
255 // Search inside the block
257 uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE,s);
258 uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
261 while(acc<i && new_posO<BLOCK_SIZE) {
263 acc += (((block&1)!=0)?1:0);
266 //cout << "i=" << i << " len=" << len << " ones=" << ones << " pos=" << pos << " acc=" << acc << " rank=" << rank1(pos) << endl;
270 assert(rank1(pos)==i);
275 uint static_bitsequence_rrr02::size() {
276 /*cout << "RRR02 SIZE: " << endl;
277 cout << "Default: " << 9*sizeof(uint)+sizeof(uint*)*4 << endl;
278 cout << "Cs: " << uint_len(C_len,C_field_bits)*sizeof(uint) << endl;
279 cout << "Os: " << O_len*sizeof(uint) << endl;
280 cout << "CSamp: " << uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint) << endl;
281 cout << "OSamp: " << uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint) << endl;
282 cout << "E: " << E->size() << endl;*/
283 uint sum = sizeof(static_bitsequence_rrr02);
284 sum += uint_len(C_len,C_field_bits)*sizeof(uint);
285 sum += O_len*sizeof(uint);
286 sum += uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint);
287 sum += uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint);
292 static_bitsequence_rrr02::~static_bitsequence_rrr02() {
293 if(C!=NULL) delete [] C;
294 if(O!=NULL) delete [] O;
295 if(C_sampling!=NULL) delete [] C_sampling;
296 if(O_pos!=NULL) delete [] O_pos;
300 int static_bitsequence_rrr02::save(FILE * fp) {
302 wr = fwrite(&wr,sizeof(uint),1,fp);
303 wr += fwrite(&len,sizeof(uint),1,fp);
304 wr += fwrite(&ones,sizeof(uint),1,fp);
305 wr += fwrite(&C_len,sizeof(uint),1,fp);
306 wr += fwrite(&C_field_bits,sizeof(uint),1,fp);
307 wr += fwrite(&O_len,sizeof(uint),1,fp);
308 wr += fwrite(&O_bits_len,sizeof(uint),1,fp);
309 wr += fwrite(&sample_rate,sizeof(uint),1,fp);
311 wr = fwrite(C,sizeof(uint),uint_len(C_len,C_field_bits),fp);
312 if(wr!=uint_len(C_len,C_field_bits)) return -1;
313 wr = fwrite(O,sizeof(uint),O_len,fp);
314 if(wr!=O_len) return -1;
318 static_bitsequence_rrr02 * static_bitsequence_rrr02::load(FILE * fp) {
319 static_bitsequence_rrr02 * ret = new static_bitsequence_rrr02();
321 rd += fread(&type,sizeof(uint),1,fp);
322 rd += fread(&ret->len,sizeof(uint),1,fp);
323 rd += fread(&ret->ones,sizeof(uint),1,fp);
324 rd += fread(&ret->C_len,sizeof(uint),1,fp);
325 rd += fread(&ret->C_field_bits,sizeof(uint),1,fp);
326 rd += fread(&ret->O_len,sizeof(uint),1,fp);
327 rd += fread(&ret->O_bits_len,sizeof(uint),1,fp);
328 rd += fread(&ret->sample_rate,sizeof(uint),1,fp);
329 if(rd!=8 || type!=RRR02_HDR) {
333 ret->C = new uint[uint_len(ret->C_len,ret->C_field_bits)];
334 rd = fread(ret->C,sizeof(uint),uint_len(ret->C_len,ret->C_field_bits),fp);
335 if(rd!=uint_len(ret->C_len,ret->C_field_bits)) {
340 ret->O = new uint[ret->O_len];
341 rd = fread(ret->O,sizeof(uint),ret->O_len,fp);
347 ret->create_sampling(ret->sample_rate);