1 /* static_sequence_wvtree_noptrs.cpp
2 * Copyright (C) 2008, Francisco Claude, all rights reserved.
4 * static_sequence_wvtree_noptrs definition
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <static_sequence_wvtree_noptrs.h>
25 static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs(uint * symbols, uint n, static_bitsequence_builder * bmb, alphabet_mapper * am, bool deleteSymbols) {
30 symbols[i] = am->map(symbols[i]);
31 max_v=max_value(symbols,n);
33 uint *occurrences=new uint[max_v+1];
34 for(uint i=0;i<=max_v;i++) occurrences[i]=0;
36 occurrences[symbols[i]]++;
38 for(uint i=0;i<max_v;i++)
39 if(occurrences[i]==0) to_add++;
40 uint * new_symb = new uint[n+to_add];
42 new_symb[i] = symbols[i];
51 for(uint i=0;i<max_v;i++)
52 if(occurrences[i]==0) {
57 uint new_n = n+to_add;
58 for(uint i=1;i<=max_v;i++)
59 occurrences[i] += occurrences[i-1];
60 uint *oc = new uint[(new_n+1)/W+1];
61 for(uint i=0;i<(new_n+1)/W+1;i++)
63 for(uint i=0;i<=max_v;i++)
64 bitset(oc,occurrences[i]-1);
66 occ = bmb->build(oc,new_n+1);
67 delete [] occurrences;
69 uint ** _bm=new uint*[height];
70 for(uint i=0;i<height;i++) {
71 _bm[i] = new uint[new_n/W+1];
72 for(uint j=0;j<new_n/W+1;j++)
75 build_level(_bm,new_symb,0,new_n,0);
76 bitstring = new static_bitsequence*[height];
77 for(uint i=0;i<height;i++) {
78 bitstring[i] = bmb->build(_bm[i],new_n);
85 symbols[i] = am->unmap(symbols[i]);
87 // delete [] new_symb; // already deleted in build_level()!
91 // symbols is an array of elements of "width" bits
92 static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs(uint * symbols, uint n, unsigned width, static_bitsequence_builder * bmb, alphabet_mapper * am, bool deleteSymbols) {
97 set_field(symbols, width, i, am->map(get_field(symbols, width, i)));
98 max_v=max_value(symbols, width, n);
100 uint *occurrences=new uint[max_v+1];
101 for(uint i=0;i<=max_v;i++) occurrences[i]=0;
102 for(uint i=0;i<n;i++)
103 occurrences[get_field(symbols, width, i)]++;
105 for(uint i=0;i<max_v;i++)
106 if(occurrences[i]==0) to_add++;
107 uint * new_symb = new uint[((n+to_add)*width)/W + 1];
108 for(uint i=0;i<n;i++)
109 set_field(new_symb, width, i, get_field(symbols, width, i));
118 for(uint i=0;i<max_v;i++)
119 if(occurrences[i]==0) {
121 set_field(new_symb, width, n+to_add, i);
124 uint new_n = n+to_add;
125 for(uint i=1;i<=max_v;i++)
126 occurrences[i] += occurrences[i-1];
127 uint *oc = new uint[(new_n+1)/W+1];
128 for(uint i=0;i<(new_n+1)/W+1;i++)
130 for(uint i=0;i<=max_v;i++)
131 bitset(oc,occurrences[i]-1);
133 occ = bmb->build(oc,new_n+1);
134 delete [] occurrences;
136 uint ** _bm=new uint*[height];
137 for(uint i=0;i<height;i++) {
138 _bm[i] = new uint[new_n/W+1];
139 for(uint j=0;j<new_n/W+1;j++)
142 build_level(_bm,new_symb,width,0,new_n,0);
143 bitstring = new static_bitsequence*[height];
144 for(uint i=0;i<height;i++) {
145 bitstring[i] = bmb->build(_bm[i],new_n);
151 for(uint i=0;i<n;i++)
152 set_field(symbols, width, i, am->unmap(get_field(symbols, width, i)));
154 // delete [] new_symb; // already deleted in build_level()!
158 static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs() {
161 static_sequence_wvtree_noptrs::~static_sequence_wvtree_noptrs() {
162 for(uint i=0;i<height;i++)
169 uint static_sequence_wvtree_noptrs::save(FILE *fp) {
170 uint wr = WVTREE_NOPTRS_HDR;
171 wr = fwrite(&wr,sizeof(uint),1,fp);
172 wr += fwrite(&n,sizeof(uint),1,fp);
173 wr += fwrite(&max_v,sizeof(uint),1,fp);
174 wr += fwrite(&height,sizeof(uint),1,fp);
176 if(am->save(fp)) return 1;
177 for(uint i=0;i<height;i++)
178 if(bitstring[i]->save(fp)) return 1;
179 if(occ->save(fp)) return 1;
183 static_sequence_wvtree_noptrs * static_sequence_wvtree_noptrs::load(FILE *fp) {
185 if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
186 if(rd!=WVTREE_NOPTRS_HDR) return NULL;
187 static_sequence_wvtree_noptrs * ret = new static_sequence_wvtree_noptrs();
188 rd = fread(&ret->n,sizeof(uint),1,fp);
189 rd += fread(&ret->max_v,sizeof(uint),1,fp);
190 rd += fread(&ret->height,sizeof(uint),1,fp);
195 ret->am = alphabet_mapper::load(fp);
201 ret->bitstring = new static_bitsequence*[ret->height];
202 for(uint i=0;i<ret->height;i++) {
203 ret->bitstring[i] = static_bitsequence::load(fp);
204 if(ret->bitstring[i]==NULL){
209 ret->occ = static_bitsequence::load(fp);
217 uint static_sequence_wvtree_noptrs::access(uint pos) {
222 while(level<height) {
223 assert(pos>=start && pos<=end);
224 if(bitstring[level]->access(pos)) {
226 pos=bitstring[level]->rank1(pos-1)-bitstring[level]->rank1(start-1);
227 start=(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1));
232 pos=pos-start-(bitstring[level]->rank1(pos)-bitstring[level]->rank1(start-1));
233 end=end-start-(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1));
239 return am->unmap(ret);
242 uint static_sequence_wvtree_noptrs::rank(uint symbol, uint pos) {
243 symbol = am->map(symbol);
248 while(level<height) {
249 if(is_set(symbol,level)) {
250 pos=bitstring[level]->rank1(pos)-bitstring[level]->rank1(start-1)-1;
252 start=(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1));
257 pos=pos-start+bitstring[level]->rank1(start-1)-bitstring[level]->rank1(pos);
259 end=end-start-(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1));
264 if(count==0) return 0;
269 vector<int> static_sequence_wvtree_noptrs::access(uint i, uint j, uint min, uint max)
271 vector<int> resultSet;
272 // cout << "height = " << height << endl;
273 access(resultSet, i, j, am->map(min), am->map(max), 0, 0, 0, n-1);
277 void static_sequence_wvtree_noptrs::access(vector<int> &result, uint i, uint j, uint min, uint max, uint l, uint pivot, uint start, uint end)
279 uint symbol = pivot | (1 << (height-l-1));
280 //std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], [" << start << ", " << end << "], symbol = " << symbol << std::endl;
284 if (i <= j && pivot >= min && pivot <= max && start <= end)
285 result.push_back(am->unmap((int)pivot));
289 if (j < i || max < min || end < start)
295 uint newi = i + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(i-1);
296 uint newend = end - (bitstring[l]->rank1(end) - bitstring[l]->rank1(start-1));
297 uint newj = j + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(j) + 1;
299 uint newmax = max < symbol - 1 ? max : symbol - 1;
301 access(result, newi, newj-1, min, newmax, l+1, pivot, start, newend);
307 uint newstart = (bitstring[l]->rank1(end)-bitstring[l]->rank1(start-1));
308 newstart = end - newstart + 1;
309 uint newi = bitstring[l]->rank1(i-1)-bitstring[l]->rank1(start-1) + newstart;
310 uint newj = bitstring[l]->rank1(j)-bitstring[l]->rank1(start-1) + newstart;
312 uint newmin = min > symbol ? min : symbol;
314 access(result, newi, newj-1, newmin, max, l+1, symbol, newstart, end);
319 vector<int> static_sequence_wvtree_noptrs::accessAll(uint i, uint j)
321 vector<int> resultSet;
325 resultSet.reserve(j-i+1);
326 accessAll(resultSet, i, j, 0, 0, 0, n-1);
330 void static_sequence_wvtree_noptrs::accessAll(vector<int> &result, uint i, uint j, uint l, uint pivot, uint start, uint end)
332 uint symbol = pivot | (1 << (height-l-1));
333 // std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << start << ", " << end << "], symbol = " << symbol << std::endl;
337 if (i <= j && start <= end)
338 result.push_back(am->unmap((int)pivot));
342 if (j < i || end < start)
347 uint newi = i + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(i-1);
348 uint newend = end - (bitstring[l]->rank1(end) - bitstring[l]->rank1(start-1));
349 uint newj = j + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(j) + 1;
352 accessAll(result, newi, newj-1, l+1, pivot, start, newend);
357 uint newstart = (bitstring[l]->rank1(end)-bitstring[l]->rank1(start-1));
358 newstart = end - newstart + 1;
359 uint newi = bitstring[l]->rank1(i-1)-bitstring[l]->rank1(start-1) + newstart;
360 uint newj = bitstring[l]->rank1(j)-bitstring[l]->rank1(start-1) + newstart;
363 accessAll(result, newi, newj-1, l+1, symbol, newstart, end);
368 uint static_sequence_wvtree_noptrs::count(uint i, uint j, uint min, uint max)
370 return count(i, j, am->map(min), am->map(max), 0, 0, 0, n-1);
373 uint static_sequence_wvtree_noptrs::count(uint i, uint j, uint min, uint max, uint l, uint pivot, uint start, uint end)
375 uint symbol = pivot | (1 << (height-l-1));
376 //std::cout << "At l = " << l << ", [" << i << ", " << j << "], [" << min << ", " << max << "], [" << start << ", " << end << "], symbol = " << symbol << std::endl;
380 if (i <= j && pivot >= min && pivot <= max && start <= end)
385 if (j < i || max < min || end < start)
392 uint newi = i + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(i-1);
393 uint newend = end - (bitstring[l]->rank1(end) - bitstring[l]->rank1(start-1));
394 uint newj = j + bitstring[l]->rank1(start-1) - bitstring[l]->rank1(j) + 1;
396 uint newmax = max < symbol - 1 ? max : symbol - 1;
398 result += count(newi, newj-1, min, newmax, l+1, pivot, start, newend);
404 uint newstart = (bitstring[l]->rank1(end)-bitstring[l]->rank1(start-1));
405 newstart = end - newstart + 1;
406 uint newi = bitstring[l]->rank1(i-1)-bitstring[l]->rank1(start-1) + newstart;
407 uint newj = bitstring[l]->rank1(j)-bitstring[l]->rank1(start-1) + newstart;
409 uint newmin = min > symbol ? min : symbol;
411 result += count(newi, newj-1, newmin, max, l+1, symbol, newstart, end);
418 inline uint get_start(uint symbol, uint mask) {
422 inline uint get_end(uint symbol, uint mask) {
423 return get_start(symbol,mask)+!mask+1;
426 uint static_sequence_wvtree_noptrs::select(uint symbol, uint j) {
427 symbol = am->map(symbol);
428 uint mask = (1<<height)-2;
430 uint level = height-1;
433 uint start = get_start(symbol,mask);
434 uint end = min(max_v+1,start+sum);
435 start = (start==0)?0:(occ->select1(start)+1);
436 end = occ->select1(end+1)-1;
437 if(is_set(symbol,level)) {
438 uint ones_start = bitstring[level]->rank1(start-1);
439 pos = bitstring[level]->select1(ones_start+pos)-start+1;
442 uint ones_start = bitstring[level]->rank1(start-1);
443 pos = bitstring[level]->select0(start-ones_start+pos)-start+1;
453 uint static_sequence_wvtree_noptrs::size() {
454 uint ptrs = sizeof(static_sequence_wvtree_noptrs)+height*sizeof(static_sequence*);
455 uint bytesBitstrings = 0;
456 for(uint i=0;i<height;i++)
457 bytesBitstrings += bitstring[i]->size();
458 return bytesBitstrings+occ->size()+ptrs;
461 void static_sequence_wvtree_noptrs::build_level(uint **bm, uint *symbols, uint level, uint length, uint offset) {
468 for(uint i=0;i<length;i++)
469 if(!is_set(symbols[i],level))
471 uint cright=length-cleft;
472 uint *left=new uint[cleft], *right=new uint[cright];
474 for(uint i=0;i<length;i++)
475 if(!is_set(symbols[i],level)) {
476 left[cleft++]=symbols[i];
477 bitclean(bm[level],offset+i);
480 right[cright++]=symbols[i];
481 bitset(bm[level],offset+i);
487 build_level(bm,left,level+1,cleft,offset);
488 left = 0; // Gets deleted in recursion.
489 build_level(bm,right,level+1,cright,offset+cleft);
490 right = 0; // Gets deleted in recursion.
495 // symbols is an array of elements of "width" bits.
496 void static_sequence_wvtree_noptrs::build_level(uint **bm, uint *symbols, unsigned width, uint level, uint length, uint offset) {
503 for(uint i=0;i<length;i++)
504 if(!is_set(get_field(symbols, width, i),level))
506 uint cright=length-cleft;
507 uint *left=new uint[(cleft*width)/W + 1],
508 *right=new uint[(cright*width)/W + 1];
510 for(uint i=0;i<length;i++)
511 if(!is_set(get_field(symbols,width,i),level)) {
512 set_field(left,width,cleft++,get_field(symbols, width,i));
513 bitclean(bm[level],offset+i);
516 set_field(right,width,cright++,get_field(symbols,width,i));
517 bitset(bm[level],offset+i);
523 build_level(bm,left,width,level+1,cleft,offset);
524 left = 0; // Gets deleted in recursion.
525 build_level(bm,right,width,level+1,cright,offset+cleft);
526 right = 0; // Gets deleted in recursion.
531 uint static_sequence_wvtree_noptrs::max_value(uint *symbols, uint n) {
533 for(uint i=0;i<n;i++)
534 max_v = max(symbols[i],max_v);
538 uint static_sequence_wvtree_noptrs::max_value(uint *symbols, unsigned width, uint n) {
540 for(uint i=0;i<n;i++)
541 max_v = max(get_field(symbols, width, i),max_v);
545 uint static_sequence_wvtree_noptrs::bits(uint val) {
554 bool static_sequence_wvtree_noptrs::is_set(uint val, uint ind) {
556 return (val & (1<<(height-ind-1)))!=0;
560 uint static_sequence_wvtree_noptrs::set(uint val, uint ind) {
562 return val | (1<<(height-ind-1));