1 /* static_sequence_wvtree_noptrs.cpp
2 * Copyright (C) 2008, Francisco Claude, all rights reserved.
4 * static_sequence_wvtree_noptrs definition
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
22 #include <static_sequence_wvtree_noptrs.h>
24 static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs(uint * symbols, uint n, static_bitsequence_builder * bmb, alphabet_mapper * am) {
29 symbols[i] = am->map(symbols[i]);
30 max_v=max_value(symbols,n);
32 uint *occurrences=new uint[max_v+1];
33 for(uint i=0;i<=max_v;i++) occurrences[i]=0;
35 occurrences[symbols[i]]++;
37 for(uint i=0;i<max_v;i++)
38 if(occurrences[i]==0) to_add++;
39 uint * new_symb = new uint[n+to_add];
41 new_symb[i] = symbols[i];
43 for(uint i=0;i<max_v;i++)
44 if(occurrences[i]==0) {
49 uint new_n = n+to_add;
50 for(uint i=1;i<=max_v;i++)
51 occurrences[i] += occurrences[i-1];
52 uint *oc = new uint[(new_n+1)/W+1];
53 for(uint i=0;i<(new_n+1)/W+1;i++)
55 for(uint i=0;i<=max_v;i++)
56 bitset(oc,occurrences[i]-1);
58 occ = bmb->build(oc,new_n+1);
59 delete [] occurrences;
61 uint ** _bm=new uint*[height];
62 for(uint i=0;i<height;i++) {
63 _bm[i] = new uint[new_n/W+1];
64 for(uint j=0;j<new_n/W+1;j++)
67 build_level(_bm,new_symb,0,new_n,0);
68 bitstring = new static_bitsequence*[height];
69 for(uint i=0;i<height;i++) {
70 bitstring[i] = bmb->build(_bm[i],new_n);
75 symbols[i] = am->unmap(symbols[i]);
80 static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs() {
83 static_sequence_wvtree_noptrs::~static_sequence_wvtree_noptrs() {
84 for(uint i=0;i<height;i++)
91 uint static_sequence_wvtree_noptrs::save(FILE *fp) {
92 uint wr = WVTREE_NOPTRS_HDR;
93 wr = fwrite(&wr,sizeof(uint),1,fp);
94 wr += fwrite(&n,sizeof(uint),1,fp);
95 wr += fwrite(&max_v,sizeof(uint),1,fp);
96 wr += fwrite(&height,sizeof(uint),1,fp);
98 if(am->save(fp)) return 1;
99 for(uint i=0;i<height;i++)
100 if(bitstring[i]->save(fp)) return 1;
101 if(occ->save(fp)) return 1;
105 static_sequence_wvtree_noptrs * static_sequence_wvtree_noptrs::load(FILE *fp) {
107 if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
108 if(rd!=WVTREE_NOPTRS_HDR) return NULL;
109 static_sequence_wvtree_noptrs * ret = new static_sequence_wvtree_noptrs();
110 rd = fread(&ret->n,sizeof(uint),1,fp);
111 rd += fread(&ret->max_v,sizeof(uint),1,fp);
112 rd += fread(&ret->height,sizeof(uint),1,fp);
117 ret->am = alphabet_mapper::load(fp);
123 ret->bitstring = new static_bitsequence*[ret->height];
124 for(uint i=0;i<ret->height;i++) {
125 ret->bitstring[i] = static_bitsequence::load(fp);
126 if(ret->bitstring[i]==NULL){
131 ret->occ = static_bitsequence::load(fp);
139 uint static_sequence_wvtree_noptrs::access(uint pos) {
144 while(level<height) {
145 assert(pos>=start && pos<=end);
146 if(bitstring[level]->access(pos)) {
148 pos=bitstring[level]->rank1(pos-1)-bitstring[level]->rank1(start-1);
149 start=(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1));
154 pos=pos-start-(bitstring[level]->rank1(pos)-bitstring[level]->rank1(start-1));
155 end=end-start-(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1));
161 return am->unmap(ret);
164 uint static_sequence_wvtree_noptrs::rank(uint symbol, uint pos) {
165 symbol = am->map(symbol);
170 while(level<height) {
171 if(is_set(symbol,level)) {
172 pos=bitstring[level]->rank1(pos)-bitstring[level]->rank1(start-1)-1;
174 start=(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1));
179 pos=pos-start+bitstring[level]->rank1(start-1)-bitstring[level]->rank1(pos);
181 end=end-start-(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1));
186 if(count==0) return 0;
191 inline uint get_start(uint symbol, uint mask) {
195 inline uint get_end(uint symbol, uint mask) {
196 return get_start(symbol,mask)+!mask+1;
199 uint static_sequence_wvtree_noptrs::select(uint symbol, uint j) {
200 symbol = am->map(symbol);
201 uint mask = (1<<height)-2;
203 uint level = height-1;
206 uint start = get_start(symbol,mask);
207 uint end = min(max_v+1,start+sum);
208 start = (start==0)?0:(occ->select1(start)+1);
209 end = occ->select1(end+1)-1;
210 if(is_set(symbol,level)) {
211 uint ones_start = bitstring[level]->rank1(start-1);
212 pos = bitstring[level]->select1(ones_start+pos)-start+1;
215 uint ones_start = bitstring[level]->rank1(start-1);
216 pos = bitstring[level]->select0(start-ones_start+pos)-start+1;
226 uint static_sequence_wvtree_noptrs::size() {
227 uint ptrs = sizeof(static_sequence_wvtree_noptrs)+height*sizeof(static_sequence*);
228 uint bytesBitstrings = 0;
229 for(uint i=0;i<height;i++)
230 bytesBitstrings += bitstring[i]->size();
231 return bytesBitstrings+occ->size()+ptrs;
234 void static_sequence_wvtree_noptrs::build_level(uint **bm, uint *symbols, uint level, uint length, uint offset) {
235 if(level==height) return;
237 for(uint i=0;i<length;i++)
238 if(!is_set(symbols[i],level))
240 uint cright=length-cleft;
241 uint *left=new uint[cleft], *right=new uint[cright];
243 for(uint i=0;i<length;i++)
244 if(!is_set(symbols[i],level)) {
245 left[cleft++]=symbols[i];
246 bitclean(bm[level],offset+i);
249 right[cright++]=symbols[i];
250 bitset(bm[level],offset+i);
252 build_level(bm,left,level+1,cleft,offset);
253 build_level(bm,right,level+1,cright,offset+cleft);
258 uint static_sequence_wvtree_noptrs::max_value(uint *symbols, uint n) {
260 for(uint i=0;i<n;i++)
261 max_v = max(symbols[i],max_v);
265 uint static_sequence_wvtree_noptrs::bits(uint val) {
274 bool static_sequence_wvtree_noptrs::is_set(uint val, uint ind) {
276 return (val & (1<<(height-ind-1)))!=0;
280 uint static_sequence_wvtree_noptrs::set(uint val, uint ind) {
282 return val | (1<<(height-ind-1));