Adding support for building wavelet trees using uchar arrays
[SXSI/XMLTree.git] / libcds / src / static_sequence / wt_node_internal.cpp
1 /* wt_node_internal.cpp
2  * Copyright (C) 2008, Francisco Claude, all rights reserved.
3  *
4  * wt_node_internal
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this library; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
19  *
20  */
21  
22 #include <wt_node_internal.h>
23
24 wt_node_internal::wt_node_internal(uint * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb) {
25         uint * ibitmap = new uint[n/W+1];
26         for(uint i=0;i<n/W+1;i++)
27                 ibitmap[i]=0;
28         for(uint i=0;i<n;i++) 
29                 if(c->is_set(symbols[i],l))
30                         bitset(ibitmap,i);
31         bitmap = bmb->build(ibitmap, n);
32   delete [] ibitmap;
33         uint count_right = bitmap->rank1(n-1);
34         uint count_left = n-count_right+1;
35         uint * left = new uint[count_left+1];
36         uint * right = new uint[count_right+1];
37         count_right = count_left = 0;
38         bool match_left = true, match_right = true;
39         for(uint i=0;i<n;i++) {
40                 if(bitmap->access(i)) {
41                         right[count_right++]=symbols[i];
42                         if(count_right>1)
43                                 if(right[count_right-1]!=right[count_right-2])
44                                         match_right = false;
45                 }
46                 else {
47                         left[count_left++]=symbols[i];
48                         if(count_left>1)
49                                 if(left[count_left-1]!=left[count_left-2])
50                                         match_left = false;
51                 }
52         }
53         if(count_left>0) {
54                 if(match_left/* && c->done(left[0],l+1)*/)
55                         left_child = new wt_node_leaf(left[0], count_left);
56                 else
57                         left_child = new wt_node_internal(left, count_left, l+1, c, bmb);
58         } else {
59                 left_child = NULL;
60         }
61         if(count_right>0) {
62                 if(match_right/* && c->done(right[0],l+1)*/)
63                         right_child = new wt_node_leaf(right[0], count_right);
64                 else
65                         right_child = new wt_node_internal(right, count_right, l+1, c, bmb);
66         } else {
67                 right_child = NULL;
68         }
69         delete [] left;
70         delete [] right;
71 }
72
73 wt_node_internal::wt_node_internal(uchar * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb) {
74         uint * ibitmap = new uint[n/W+1];
75         for(uint i=0;i<n/W+1;i++)
76                 ibitmap[i]=0;
77         for(uint i=0;i<n;i++) 
78                 if(c->is_set((uint)symbols[i],l))
79                         bitset(ibitmap,i);
80         bitmap = bmb->build(ibitmap, n);
81   delete [] ibitmap;
82         uint count_right = bitmap->rank1(n-1);
83         uint count_left = n-count_right+1;
84         uchar * left = new uchar[count_left+1];
85         uchar * right = new uchar[count_right+1];
86         count_right = count_left = 0;
87         bool match_left = true, match_right = true;
88         for(uint i=0;i<n;i++) {
89                 if(bitmap->access(i)) {
90                         right[count_right++]=symbols[i];
91                         if(count_right>1)
92                                 if(right[count_right-1]!=right[count_right-2])
93                                         match_right = false;
94                 }
95                 else {
96                         left[count_left++]=symbols[i];
97                         if(count_left>1)
98                                 if(left[count_left-1]!=left[count_left-2])
99                                         match_left = false;
100                 }
101         }
102         if(count_left>0) {
103                 if(match_left/* && c->done(left[0],l+1)*/)
104                         left_child = new wt_node_leaf((uint)left[0], count_left);
105                 else
106                         left_child = new wt_node_internal(left, count_left, l+1, c, bmb);
107         } else {
108                 left_child = NULL;
109         }
110         if(count_right>0) {
111                 if(match_right/* && c->done(right[0],l+1)*/)
112                         right_child = new wt_node_leaf((uint)right[0], count_right);
113                 else
114                         right_child = new wt_node_internal(right, count_right, l+1, c, bmb);
115         } else {
116                 right_child = NULL;
117         }
118         delete [] left;
119         delete [] right;
120 }
121
122
123 wt_node_internal::wt_node_internal() { }
124
125 wt_node_internal::~wt_node_internal() {
126         delete bitmap;
127         if(right_child!=NULL) delete right_child;
128         if(left_child!=NULL) delete left_child;
129 }
130
131 uint wt_node_internal::rank(uint symbol, uint pos, uint l, wt_coder * c) {
132         bool is_set = c->is_set(symbol,l);
133         if(!is_set) {
134                 if(left_child==NULL) return 0;
135                 return left_child->rank(symbol, bitmap->rank0(pos)-1,l+1,c);
136         }
137         else {
138                 if(right_child==NULL) return 0;
139                 return right_child->rank(symbol, bitmap->rank1(pos)-1,l+1,c);
140         }
141 }
142
143 uint wt_node_internal::select(uint symbol, uint pos, uint l, wt_coder * c) {
144         bool is_set = c->is_set(symbol, l);
145         if(!is_set) {
146                 if(left_child==NULL)
147                         return (uint)(-1);
148                 uint new_pos = left_child->select(symbol, pos, l+1,c);
149                 if(new_pos+1==0) return (uint)(-1);
150                 return bitmap->select0(new_pos)+1;
151         } else {
152                 if(right_child==NULL)
153                         return (uint)(-1);
154                 uint new_pos = right_child->select(symbol, pos, l+1,c);
155                 if(new_pos+1==0) return (uint)(-1);
156                 return bitmap->select1(new_pos)+1;
157         }
158 }
159
160 uint wt_node_internal::access(uint pos) {
161         bool is_set = bitmap->access(pos);
162         if(!is_set) {
163                 assert(left_child!=NULL);
164                 return left_child->access(bitmap->rank0(pos)-1);
165         } else {
166                 assert(right_child!=NULL);
167                 return right_child->access(bitmap->rank1(pos)-1);
168         }
169 }
170
171 uint wt_node_internal::size() {
172         uint s = bitmap->size()+sizeof(wt_node_internal);
173         if(left_child!=NULL)
174                 s += left_child->size();
175         if(right_child!=NULL)
176                 s += right_child->size();
177         return s;
178 }
179
180 uint wt_node_internal::save(FILE *fp) {
181   uint wr = WT_NODE_INTERNAL_HDR;
182   wr = fwrite(&wr,sizeof(uint),1,fp);
183   if(wr!=1) return 1;
184   if(bitmap->save(fp)) return 1;
185   if(left_child!=NULL) {
186     if(left_child->save(fp)) return 1;
187   } else {
188     wr = WT_NODE_NULL_HDR;
189     wr = fwrite(&wr,sizeof(uint),1,fp);
190     if(wr!=1) return 1;
191   }
192   if(right_child!=NULL) {
193     if(right_child->save(fp)) return 1;
194   } else {
195     wr = WT_NODE_NULL_HDR;
196     wr = fwrite(&wr,sizeof(uint),1,fp);
197     if(wr!=1) return 1;
198   }
199   return 0;
200 }
201
202 wt_node_internal * wt_node_internal::load(FILE *fp) {
203   uint rd;
204   if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
205   if(rd!=WT_NODE_INTERNAL_HDR) return NULL;
206   wt_node_internal * ret = new wt_node_internal();
207   ret->bitmap = static_bitsequence::load(fp);
208   ret->left_child = wt_node::load(fp);
209   ret->right_child = wt_node::load(fp);
210   return ret;
211 }