Initial Import
[SXSI/TextCollection.git] / BitRank.cpp
1 #include "BitRank.h"
2
3 /*****************************************************************************
4  * Copyright (C) 2005, Rodrigo Gonzalez, all rights reserved.                *
5  *                                                                           *
6  * New RANK, SELECT, SELECT-NEXT and SPARSE RANK implementations.            *
7  *                                                                           *
8  * This library is free software; you can redistribute it and/or             *
9  * modify it under the terms of the GNU Lesser General Public                *
10  * License as published by the Free Software Foundation; either              *
11  * version 2.1 of the License, or (at your option) any later version.        *
12  *                                                                           *
13  * This library is distributed in the hope that it will be useful,           * 
14  * but WITHOUT ANY WARRANTY; without even the implied warranty of            *
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU         *
16  * Lesser General Public License for more details.                           *
17  *                                                                           *
18  * You should have received a copy of the GNU Lesser General Public          *
19  * License along with this library; if not, write to the Free Software       *
20  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301 USA *
21  ****************************************************************************/
22
23 // Modified by Niko Välimäki
24  
25 /////////////
26 //Rank(B,i)// 
27 /////////////
28 //This Class use a superblock size of 256-512 bits
29 //and a block size of 32-64 bits also
30
31
32 const unsigned char __popcount_tab[] =
33 {
34 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
35 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
36 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
37 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
38 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
39 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
40 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
41 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8,
42 };
43
44 const unsigned char select_tab[] =
45 {
46 0,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,
47
48 6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,
49
50 7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,
51
52 6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,
53
54 8,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,
55
56 6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,
57
58 7,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,
59
60 6,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,5,1,2,1,3,1,2,1,4,1,2,1,3,1,2,1,
61 };
62
63
64 // bits needed to represent a number between 0 and n
65 inline ulong bits (ulong n){
66         ulong b = 0;
67         while (n) { b++; n >>= 1; }
68         return b;
69 }
70
71 #if W == 32
72     // 32 bit version
73     inline unsigned popcount (register ulong x){
74         return __popcount_tab[(x >>  0) & 0xff]  + __popcount_tab[(x >>  8) & 0xff]  + __popcount_tab[(x >> 16) & 0xff] + __popcount_tab[(x >> 24) & 0xff];
75     }
76 #else
77     // 64 bit version
78     inline unsigned popcount (register ulong x){
79         return __popcount_tab[(x >>  0) & 0xff]  + __popcount_tab[(x >>  8) & 0xff]  + __popcount_tab[(x >> 16) & 0xff] + __popcount_tab[(x >> 24) & 0xff] + __popcount_tab[(x >> 32) & 0xff] + __popcount_tab[(x >> 40) & 0xff] + __popcount_tab[(x >> 48) & 0xff] + __popcount_tab[(x >> 56) & 0xff];
80     }
81 #endif
82
83 inline unsigned popcount16 (register int x){
84   return __popcount_tab[x & 0xff]  + __popcount_tab[(x >>  8) & 0xff];
85 }
86
87 inline unsigned popcount8 (register int x){
88   return __popcount_tab[x & 0xff];
89 }
90
91 BitRank::BitRank(ulong *bitarray, ulong n, bool owner) {
92     data=bitarray;
93     this->owner = owner;
94     this->n=n;  // length of bitarray in bits
95     b = W; // b is a word
96     s=b*superFactor;
97     ulong aux=(n+1)%W;
98     if (aux != 0)
99         integers = (n+1)/W+1;
100     else 
101         integers = (n+1)/W;
102     BuildRank();
103 }
104
105 BitRank::~BitRank() {
106     delete [] Rs;
107     delete [] Rb;
108     if (owner) delete [] data;
109 }
110
111 //Build the rank (blocks and superblocks)
112 void BitRank::BuildRank()
113 {
114     ulong num_sblock = n/s;
115     ulong num_block = n/b;
116     Rs = new ulong[num_sblock+1];//+1 we add the 0 pos
117     Rb = new uchar[num_block+1];//+1 we add the 0 pos
118         
119         ulong j;
120     Rs[0] = 0lu;
121         
122     for (j=1;j<=num_sblock;j++) 
123         {
124                         Rs[j]=BuildRankSub((j-1)*superFactor,superFactor)+Rs[j-1];
125                 }
126     
127     Rb[0]=0;
128     for (ulong k=1;k<=num_block;k++) {
129         j = k / superFactor;
130         Rb[k]=BuildRankSub(j*superFactor, k%superFactor);
131           }
132 }
133
134 ulong BitRank::BuildRankSub(ulong ini, ulong bloques){
135     ulong rank=0,aux;
136     
137         
138         for(ulong i=ini;i<ini+bloques;i++) {
139                 if (i < integers) {
140                         aux=data[i];
141                         rank+=popcount(aux);
142                 }
143         }
144      return rank; //return the numbers of 1's in the interval
145 }
146
147
148 //this rank ask from 0 to n-1
149 ulong BitRank::rank(ulong i) {
150     ++i; // the following gives sum of 1s before i 
151     return Rs[i>>8]+Rb[i>>wordShift]
152         +popcount(data[i >> wordShift] & ((1lu << (i & Wminusone))-1));
153 }
154
155 ulong BitRank::select(ulong x) {
156     // returns i such that x=rank(i) && rank(i-1)<x or n if that i not exist
157     // first binary search over first level rank structure
158     // then sequential search using popcount over a int
159     // then sequential search using popcount over a char
160     // then sequential search bit a bit
161     
162     //binary search over first level rank structure
163     if (x == 0)
164         return 0;
165         
166     ulong l=0, r=n/s;
167     ulong mid=(l+r)/2;      
168     ulong rankmid = Rs[mid];
169     while (l<=r) {
170         if (rankmid<x)
171             l = mid+1;
172         else
173             r = mid-1;
174         mid = (l+r)/2;              
175         rankmid = Rs[mid];
176     }    
177     //sequential search using popcount over a int
178     ulong left;
179     left=mid*superFactor;
180     x-=rankmid;
181     ulong j = data[left];
182     
183     unsigned ones = popcount(j);
184     while (ones < x) {
185         x-=ones;left++;
186         if (left > integers) 
187             return n;
188             
189         j = data[left];
190         
191         ones = popcount(j);
192     }
193     //sequential search using popcount over a char
194     left=left*b; 
195     rankmid = popcount8(j);
196     if (rankmid < x) {
197         j=j>>8;
198         x-=rankmid;
199         left+=8;
200         rankmid = popcount8(j);
201         if (rankmid < x) {
202             j=j>>8;
203             x-=rankmid;
204             left+=8;
205             rankmid = popcount8(j);
206             if (rankmid < x) {
207                 j=j>>8;
208                 x-=rankmid;
209                 left+=8;
210             }
211         }
212     }
213
214     // then sequential search bit a bit
215     while (x>0) {
216         if  (j&1lu) x--;
217         j=j>>1;
218         left++;
219     }
220     return left-1;
221 }
222
223 ulong BitRank::select0(ulong x) {
224     // returns i such that x=rank0(i) && rank0(i-1)<x or n if that i not exist
225     // first binary search over first level rank structure
226     // then sequential search using popcount over a int
227     // then sequential search using popcount over a char
228     // then sequential search bit a bit
229     
230     //binary search over first level rank structure
231     if (x == 0)
232         return 0;
233         
234     ulong l=0, r=n/s;
235     ulong mid=(l+r)/2;
236     ulong rankmid = mid * s - Rs[mid];
237     while (l<=r) {
238         if (rankmid<x)
239             l = mid+1;
240         else
241             r = mid-1;
242         mid = (l+r)/2;              
243         rankmid = mid * s - Rs[mid];
244     }    
245     
246     //sequential search using popcount over a int
247     ulong left;
248     left=mid*superFactor;
249     x-=rankmid;
250     ulong j = data[left];
251     
252     unsigned zeros = W - popcount(j);
253     while (zeros < x) {
254         x-=zeros;
255         left++;
256         if (left > integers) 
257             return n;
258             
259         j = data[left];
260         zeros = W - popcount(j);
261     }
262     
263     //sequential search using popcount over a char
264     left=left*b;
265     rankmid = 8 - popcount8(j);
266     if (rankmid < x) {
267         j=j>>8;
268         x-=rankmid;
269         left+=8;
270         rankmid = 8 - popcount8(j);
271         if (rankmid < x) {
272             j=j>>8;
273             x-=rankmid;
274             left+=8;
275             rankmid = 8 - popcount8(j);
276             if (rankmid < x) {
277                 j=j>>8;
278                 x-=rankmid;
279                 left+=8;
280             }
281         }
282     }
283
284     // then sequential search bit a bit
285     while (x>0) {
286         if  (!(j&1lu)) x--;
287         j=j>>1;
288         left++;
289     }
290     return left - 1;
291 }
292
293
294 bool BitRank::IsBitSet(ulong i) {
295     return (1lu << (i % W)) & data[i/W];
296 }
297
298 ulong BitRank::NumberOfBits() {
299     return n;
300 }