1 /* static_bitsequence_rrr02_light.cpp
2 * Copyright (C) 2008, Francisco Claude, all rights reserved.
4 * static_bitsequence_rrr02_light definition
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <static_bitsequence_rrr02_light.h>
24 #define VARS_NEEDED uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0);\
25 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);\
26 uint O_len = uint_len(1,O_bits_len);\
27 uint C_sampling_len = C_len/sample_rate+2;\
28 uint C_sampling_field_bits = bits(ones);\
29 uint O_pos_len = C_len/sample_rate+1;\
30 uint O_pos_field_bits = bits(O_bits_len);
33 table_offset * static_bitsequence_rrr02_light::E = NULL;
35 static_bitsequence_rrr02_light::static_bitsequence_rrr02_light() {
38 if(E==NULL) E = new table_offset(BLOCK_SIZE_LIGHT);
44 sample_rate = DEFAULT_SAMPLING_LIGHT;
48 static_bitsequence_rrr02_light::static_bitsequence_rrr02_light(uint * bitseq, uint len, uint sample_rate) {
51 if(E==NULL) E = new table_offset(BLOCK_SIZE_LIGHT);
54 uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0);
55 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);
56 C = new uint[uint_len(C_len,C_field_bits)];
57 for(uint i=0;i<uint_len(C_len,C_field_bits);i++)
60 for(uint i=0;i<C_len;i++) {
61 uint value = popcount(get_var_field(bitseq,i*BLOCK_SIZE_LIGHT,min((uint)len-1,(i+1)*BLOCK_SIZE_LIGHT-1)));
62 assert(value<=BLOCK_SIZE_LIGHT);
63 set_field(C,C_field_bits,i,value);
65 O_bits_len += E->get_log2binomial(BLOCK_SIZE_LIGHT,value);
68 uint O_len = uint_len(1,O_bits_len);
70 for(uint i=0;i<O_len;i++)
73 for(uint i=0;i<C_len;i++) {
74 uint value = (ushort)get_var_field(bitseq,i*BLOCK_SIZE_LIGHT,min((uint)len-1,(i+1)*BLOCK_SIZE_LIGHT-1));
75 set_var_field(O,O_pos,O_pos+E->get_log2binomial(BLOCK_SIZE_LIGHT,popcount(value))-1,E->compute_offset((ushort)value));
76 O_pos += E->get_log2binomial(BLOCK_SIZE_LIGHT,popcount(value));
81 create_sampling(sample_rate);
84 void static_bitsequence_rrr02_light::create_sampling(uint sample_rate) {
85 this->sample_rate = sample_rate;
86 /* for(uint i=0;i<C_len;i++) {
87 O_bits_len += E->get_log2binomial(BLOCK_SIZE_LIGHT,get_field(C,C_field_bits,i));
90 uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0);
91 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);
92 uint C_sampling_len = C_len/sample_rate+2;
93 uint C_sampling_field_bits = bits(ones);
94 if(C_sampling!=NULL) delete [] C_sampling;
95 C_sampling = new uint[max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits))];
96 for(uint i=0;i<max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits));i++)
99 for(uint i=0;i<C_len;i++) {
101 set_field(C_sampling,C_sampling_field_bits,i/sample_rate,sum);
102 sum += get_field(C,C_field_bits,i);
104 for(uint i=(C_len-1)/sample_rate+1;i<C_sampling_len;i++)
105 set_field(C_sampling,C_sampling_field_bits,i,sum);
106 // Sampling for O (table S) (Code separated from previous construction for readability)
107 uint O_pos_len = C_len/sample_rate+1;
108 uint O_pos_field_bits = bits(O_bits_len);
109 if(O_pos!=NULL) delete [] O_pos;
110 O_pos = new uint[uint_len(O_pos_len,O_pos_field_bits)];
111 for(uint i=0;i<uint_len(O_pos_len,O_pos_field_bits);i++)
114 for(uint i=0;i<C_len;i++) {
116 set_field(O_pos,O_pos_field_bits,i/sample_rate,pos);
117 pos += E->get_log2binomial(BLOCK_SIZE_LIGHT,get_field(C,C_field_bits,i));
121 bool static_bitsequence_rrr02_light::access(uint i) {
122 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);
123 uint O_pos_field_bits = bits(O_bits_len);
124 uint nearest_sampled_value = i/BLOCK_SIZE_LIGHT/sample_rate;
125 uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
126 uint pos = i/BLOCK_SIZE_LIGHT;
127 for(uint k=nearest_sampled_value*sample_rate;k<pos;k++) {
128 uint aux = get_field(C,C_field_bits,k);
129 pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,aux);
131 uint c = get_field(C,C_field_bits,pos);
132 return ((1<<(i%BLOCK_SIZE_LIGHT))&E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE_LIGHT,c)-1)))!=0;
135 uint static_bitsequence_rrr02_light::rank0(uint i) {
140 uint static_bitsequence_rrr02_light::rank1(uint i) {
141 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);
142 uint C_sampling_field_bits = bits(ones);
143 uint O_pos_field_bits = bits(O_bits_len);
145 uint nearest_sampled_value = i/BLOCK_SIZE_LIGHT/sample_rate;
146 uint sum = get_field(C_sampling,C_sampling_field_bits,nearest_sampled_value);
147 uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value);
148 uint pos = i/BLOCK_SIZE_LIGHT;
149 uint k=nearest_sampled_value*sample_rate;
150 if(k%2==1 && k<pos) {
151 uint aux = get_field(C,C_field_bits,k);
153 pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,aux);
156 uchar * a = (uchar *)C;
159 while(k<(uint)max(0,(int)pos-1)) {
160 assert(((*a)&mask)==get_field(C,C_field_bits,k));
161 assert((*a)/16==get_field(C,C_field_bits,k+1));
162 sum += ((*a)&mask)+(*a)/16;
163 pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,((*a)&mask))+E->get_log2binomial(BLOCK_SIZE_LIGHT,((*a)/16));
168 uint aux = get_field(C,C_field_bits,k);
170 pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,aux);
173 uint c = get_field(C,C_field_bits,pos);
174 sum += popcount(((2<<(i%BLOCK_SIZE_LIGHT))-1) & E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE_LIGHT,c)-1)));
178 uint static_bitsequence_rrr02_light::select0(uint i) {
179 uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0);
180 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);
181 uint C_sampling_len = C_len/sample_rate+2;
182 uint C_sampling_field_bits = bits(ones);
183 uint O_pos_field_bits = bits(O_bits_len);
185 if(i>len-ones) return len;
186 // Search over partial sums
188 uint end=C_sampling_len-1;
189 uint med, acc=0, pos;
192 acc = med*sample_rate*BLOCK_SIZE_LIGHT-get_field(C_sampling,C_sampling_field_bits,med);
194 if(med==start) break;
202 acc = get_field(C_sampling,C_sampling_field_bits,start);
203 while(start<C_len-1 && acc+sample_rate*BLOCK_SIZE_LIGHT==get_field(C_sampling,C_sampling_field_bits,start+1)) {
205 acc +=sample_rate*BLOCK_SIZE_LIGHT;
207 acc = start*sample_rate*BLOCK_SIZE_LIGHT-acc;
208 pos = (start)*sample_rate;
209 uint pos_O = get_field(O_pos,O_pos_field_bits,start);
210 // Sequential search over C
212 for(;pos<C_len;pos++) {
213 s = get_field(C,C_field_bits,pos);
214 if(acc+BLOCK_SIZE_LIGHT-s>=i) break;
215 pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,s);
216 acc += BLOCK_SIZE_LIGHT-s;
218 pos = (pos)*BLOCK_SIZE_LIGHT;
219 // Search inside the block
222 uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE_LIGHT,s);
223 uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
226 while(acc<i && new_posO<BLOCK_SIZE_LIGHT) {
228 acc += (((block&1)==0)?1:0);
234 assert(rank0(pos)==i);
235 assert(!access(pos));
239 uint static_bitsequence_rrr02_light::select1(uint i) {
240 uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0);
241 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);
242 uint C_sampling_len = C_len/sample_rate+2;
243 uint C_sampling_field_bits = bits(ones);
244 uint O_pos_field_bits = bits(O_bits_len);
246 if(i>ones) return len;
247 // Search over partial sums
249 uint end=C_sampling_len-1;
250 uint med, acc=0, pos;
253 acc = get_field(C_sampling,C_sampling_field_bits,med);
255 if(med==start) break;
263 acc = get_field(C_sampling,C_sampling_field_bits,start);
264 while(start<C_len-1 && acc==get_field(C_sampling,C_sampling_field_bits,start+1)) start++;
265 pos = (start)*sample_rate;
266 uint pos_O = get_field(O_pos,O_pos_field_bits,start);
267 acc = get_field(C_sampling,C_sampling_field_bits,start);
268 // Sequential search over C
270 for(;pos<C_len;pos++) {
271 s = get_field(C,C_field_bits,pos);
273 pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,s);
276 pos = (pos)*BLOCK_SIZE_LIGHT;
277 //cout << "pos=" << pos << endl;
278 // Search inside the block
280 uint new_posO = pos_O+E->get_log2binomial(BLOCK_SIZE_LIGHT,s);
281 uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1));
284 while(acc<i && new_posO<BLOCK_SIZE_LIGHT) {
286 acc += (((block&1)!=0)?1:0);
289 //cout << "i=" << i << " len=" << len << " ones=" << ones << " pos=" << pos << " acc=" << acc << " rank=" << rank1(pos) << endl;
293 assert(rank1(pos)==i);
298 uint static_bitsequence_rrr02_light::size() {
300 /*cout << "RRR02 SIZE: " << endl;
301 cout << "Default: " << 9*sizeof(uint)+sizeof(uint*)*4 << endl;
302 cout << "Cs: " << uint_len(C_len,C_field_bits)*sizeof(uint) << endl;
303 cout << "Os: " << O_len*sizeof(uint) << endl;
304 cout << "CSamp: " << uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint) << endl;
305 cout << "OSamp: " << uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint) << endl;
306 cout << "E: " << E->size() << endl;*/
307 uint sum = sizeof(uint)*8;//sizeof(static_bitsequence_rrr02_light);
308 sum += uint_len(C_len,C_field_bits)*sizeof(uint);
309 sum += O_len*sizeof(uint);
310 sum += uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint);
311 sum += uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint);
316 static_bitsequence_rrr02_light::~static_bitsequence_rrr02_light() {
317 if(C!=NULL) delete [] C;
318 if(O!=NULL) delete [] O;
319 if(C_sampling!=NULL) delete [] C_sampling;
320 if(O_pos!=NULL) delete [] O_pos;
324 int static_bitsequence_rrr02_light::save(FILE * fp) {
325 uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0);
326 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);
327 uint O_len = uint_len(1,O_bits_len);
328 uint wr = RRR02_LIGHT_HDR;
329 wr = fwrite(&wr,sizeof(uint),1,fp);
330 wr += fwrite(&len,sizeof(uint),1,fp);
331 wr += fwrite(&ones,sizeof(uint),1,fp);
332 wr += fwrite(&O_bits_len,sizeof(uint),1,fp);
333 wr += fwrite(&sample_rate,sizeof(uint),1,fp);
335 wr = fwrite(C,sizeof(uint),uint_len(C_len,C_field_bits),fp);
336 if(wr!=uint_len(C_len,C_field_bits)) return -1;
337 wr = fwrite(O,sizeof(uint),O_len,fp);
338 if(wr!=O_len) return -1;
342 static_bitsequence_rrr02_light * static_bitsequence_rrr02_light::load(FILE * fp) {
343 static_bitsequence_rrr02_light * ret = new static_bitsequence_rrr02_light();
345 rd += fread(&type,sizeof(uint),1,fp);
346 rd += fread(&ret->len,sizeof(uint),1,fp);
347 rd += fread(&ret->ones,sizeof(uint),1,fp);
348 rd += fread(&ret->O_bits_len,sizeof(uint),1,fp);
349 rd += fread(&ret->sample_rate,sizeof(uint),1,fp);
350 uint C_len = ret->len/BLOCK_SIZE_LIGHT + (ret->len%BLOCK_SIZE_LIGHT!=0);
351 uint C_field_bits = bits(BLOCK_SIZE_LIGHT);
352 uint O_len = uint_len(1,ret->O_bits_len);
353 if(rd!=5 || type!=RRR02_LIGHT_HDR) {
357 ret->C = new uint[uint_len(C_len,C_field_bits)];
358 rd = fread(ret->C,sizeof(uint),uint_len(C_len,C_field_bits),fp);
359 if(rd!=uint_len(C_len,C_field_bits)) {
364 ret->O = new uint[O_len];
365 rd = fread(ret->O,sizeof(uint),O_len,fp);
371 ret->create_sampling(ret->sample_rate);