5 typedef unsigned int qword;
8 typedef unsigned long long qword;
11 #define PBS (sizeof(uint)*8)
17 #define logLL 16 // size of word
23 #define LLL (1<<logLLL)
25 //#define logL (logLL-3)
26 #define logL (logLL-1-5)
40 int __setbit(uint *B, int i,int x) {
45 if (x==0) B[j] &= (~(1<<(D-1-l)));
46 else if (x==1) B[j] |= (1<<(D-1-l));
48 printf("error __setbit x=%d\n",x);
55 int __setbit2(uchar *B, int i,int x) {
60 if (x==0) B[j] &= (~(1<<(8-1-l)));
61 else if (x==1) B[j] |= (1<<(8-1-l));
63 printf("error __setbit2 x=%d\n",x);
70 int __setbits(uint *B, int i, int d, int x) {
74 __setbit(B,i+j,(x>>(d-j-1))&1);
80 int __getbit(uint *B, int i) {
87 return (B[j] >> (D-1-l)) & 1;
91 int __getbit2(uchar *B, int i) {
98 return (B[j] >> (8-1-l)) & 1;
103 uint __getbits(uint *B, int i, int d) {
109 x = (((qword)B[0]) << D) + B[1];
115 x = (((qword)B[0])<<D)+B[1];
118 x &= (((qword)1L<<D)-1)<<D;
130 uint __getbits(uint *B, int i, int d) {
134 for (j=0; j<d; j++) {
136 x += __getbit(B,i+j);
142 static const unsigned int _popCount[] = {
143 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,
144 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
145 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
146 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
147 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
148 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
149 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
150 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
151 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
152 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
153 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
154 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
155 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
156 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
157 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
158 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
160 static inline unsigned int
161 _fast_popcount2(int x)
163 uint m1 = 0x55555555;
164 uint m2 = 0x33333333;
165 uint m4 = 0x0f0f0f0f;
167 x = (x & m2) + ((x >> 2) & m2);
168 x = (x + (x >> 4)) & m4;
170 return (x + (x >> 16)) & 0x3f;
173 static inline unsigned int
174 _fast_popcount3(int x)
176 uint m1 = 0x55555555;
177 uint m2 = 0xc30c30c3;
179 x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2);
181 return (x + (x >> 12) + (x >> 24)) & 0x3f;
184 static inline unsigned int
185 _fast_popcount(int x) {
189 static unsigned int __selecttbl[8*256];
190 static int built = 0;
192 void make___selecttbl(void) {
198 for (x = 0; x < 256; x++) {
199 __setbits(buf,0,8,x);
200 for (r=0; r<8; r++) __selecttbl[(r<<8)+x] = -1;
202 for (i=0; i<8; i++) {
203 if (__getbit(buf,i)) {
204 __selecttbl[(r<<8)+x] = i;
212 unsigned int __popCount(uint x) {
216 r = r - ((r>>1) & 0x77777777) - ((r>>2) & 0x33333333) - ((r>>3) & 0x11111111);
217 r = ((r + (r>>4)) & 0x0f0f0f0f) % 0xff;
220 r = ((r & 0xaaaaaaaa)>>1) + (r & 0x55555555);
221 r = ((r & 0xcccccccc)>>2) + (r & 0x33333333);
222 //r = ((r & 0xf0f0f0f0)>>4) + (r & 0x0f0f0f0f);
223 r = ((r>>4) + r) & 0x0f0f0f0f;
224 //r = ((r & 0xff00ff00)>>8) + (r & 0x00ff00ff);
226 //r = ((r & 0xffff0000)>>16) + (r & 0x0000ffff);
227 r = ((r>>16) + r) & 63;
229 r = _popCount[x & 0xff];
231 r += _popCount[x & 0xff];
233 r += _popCount[x & 0xff];
235 r += _popCount[x & 0xff];
241 unsigned int __popCount8(uint x) {
245 r = ((r & 0xaa)>>1) + (r & 0x55);
246 r = ((r & 0xcc)>>2) + (r & 0x33);
247 r = ((r>>4) + r) & 0x0f;
249 r = _popCount[x & 0xff];
255 int selectd2_save(selectd2 * s, FILE * fp) {
257 wr += fwrite(&s->n,sizeof(uint),1,fp);
258 wr += fwrite(&s->m,sizeof(uint),1,fp);
259 wr += fwrite(&s->size,sizeof(uint),1,fp);
260 wr += fwrite(&s->ss_len,sizeof(uint),1,fp);
261 wr += fwrite(&s->sl_len,sizeof(uint),1,fp);
262 wr += fwrite(s->buf,sizeof(uchar),(s->n+7)/8+1,fp);
263 uint nl = (s->m-1) / L + 1;
264 wr += fwrite(s->lp,sizeof(uint),nl+1,fp);
265 wr += fwrite(s->p,sizeof(uint),nl+1,fp);
266 wr += fwrite(s->ss,sizeof(ushort),s->ss_len,fp);
267 wr += fwrite(s->sl,sizeof(uint),s->sl_len,fp);
268 if(wr!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5)
274 int selectd2_load(selectd2 * s, FILE * fp) {
276 rd += fread(&s->n,sizeof(uint),1,fp);
277 rd += fread(&s->m,sizeof(uint),1,fp);
278 rd += fread(&s->size,sizeof(uint),1,fp);
279 rd += fread(&s->ss_len,sizeof(uint),1,fp);
280 rd += fread(&s->sl_len,sizeof(uint),1,fp);
281 s->buf = new uchar[(s->n+7)/8+1];
282 rd += fread(s->buf,sizeof(uchar),(s->n+7)/8+1,fp);
283 uint nl = (s->m-1) / L + 1;
284 s->lp = new uint[nl+1];
285 rd += fread(s->lp,sizeof(uint),nl+1,fp);
286 s->p = new uint[nl+1];
287 rd += fread(s->p,sizeof(uint),nl+1,fp);
288 s->ss = new ushort[s->ss_len];
289 rd += fread(s->ss,sizeof(ushort),s->ss_len,fp);
290 s->sl = new uint[s->sl_len];
291 rd += fread(s->sl,sizeof(uint),s->sl_len,fp);
292 if(rd!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5)
298 void selectd2_free(selectd2 * s) {
307 int selectd2_construct(selectd2 *select, int n, uchar *buf) {
318 printf("ERROR: L=%d LLL=%d\n",L,LLL);
323 for (i=0; i<n; i++) m += __getbit2(buf,i);
326 //printf("n=%d m=%d\n",n,m);
332 for (i=0; i<n; i++) {
333 if (__getbit2(buf,i)) {
340 select->size = 0; //ignoring buf, shared with selects3
341 select->lp = new uint[nl+1];
342 for(int k=0;k<nl+1;k++) select->lp[k]=0;
343 select->size += (nl+1)*sizeof(uint);
344 select->p = new uint[nl+1];
345 for(int k=0;k<nl+1;k++) select->p[k]=0;
346 select->size += (nl+1)*sizeof(uint);
348 for (r = 0; r < 2; r++) {
350 for (il = 0; il < nl; il++) {
353 i = min((il+1)*L-1,m-1);
355 //printf("%d ",p-pp);
358 for (is = 0; is < L; is++) {
359 if (il*L+is >= m) break;
360 select->sl[ml*L+is] = s[il*L+is];
363 select->p[il] = -((ml<<logL)+1);
368 for (is = 0; is < L/LLL; is++) {
369 if (il*L+is*LLL >= m) break;
370 select->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp;
373 select->p[il] = ms << (logL-logLLL);
378 select->sl = new uint[ml*L+1];
379 for(int k=0;k<ml*L+1;k++) select->sl[k]=0;
380 select->size += sizeof(uint)*(ml*L+1);
381 select->sl_len = ml*L+1;
382 select->ss = new ushort[ms*(L/LLL)+1];
383 for(int k=0;k<ms*(L/LLL)+1;k++) select->ss[k]=0;
384 select->ss_len = ms*(L/LLL)+1;
385 select->size += sizeof(ushort)*(ms*(L/LLL)+1);
393 int selectd2_select(selectd2 *select, int i,int f) {
399 if (i == 0) return -1;
403 printf("ERROR: m=%d i=%d\n",select->m,i);
410 il = select->p[i>>logL];
413 //p = select->sl[(il<<logL)+(i & (L-1))];
414 p = select->sl[il+(i & (L-1))];
417 p = select->lp[i>>logL];
418 //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];
419 p += select->ss[il+((i & (L-1))>>logLLL)];
420 r = i - (i & (LLL-1));
422 q = &(select->buf[p>>3]);
426 //r -= _popCount[*q >> (8-1-rr)];
427 r -= _fast_popcount(*q >> (8-1-rr));
431 //rr = _popCount[*q];
432 rr = _fast_popcount(*q);
433 if (r + rr >= i) break;
438 p = (q - select->buf) << 3;
439 p += __selecttbl[((i-r-1)<<8)+(*q)];
443 //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
444 r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
448 //rr = _popCount[*q ^ 0xff];
449 rr = _fast_popcount(*q ^ 0xff);
450 if (r + rr >= i) break;
455 p = (q - select->buf) << 3;
456 p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
463 int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) {
476 printf("ERROR: m=%d i=%d\n",select->m,i);
483 il = select->p[i>>logL];
486 //p = select->sl[(il<<logL)+(i & (L-1))];
487 p = select->sl[il+(i & (L-1))];
489 if ((i>>logL) == ((i+1)>>logL)) {
490 p2 = select->sl[il+((i+1) & (L-1))];
493 p2 = selectd2_select(select,i+2,f);
497 p = select->lp[i>>logL];
498 //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];
499 p += select->ss[il+((i & (L-1))>>logLLL)];
500 r = i - (i & (LLL-1));
502 q = &(select->buf[p>>3]);
506 //r -= _popCount[*q >> (8-1-rr)];
507 r -= _fast_popcount(*q >> (8-1-rr));
511 //rr = _popCount[*q];
512 rr = _fast_popcount(*q);
513 if (r + rr >= i) break;
518 p = (q - select->buf) << 3;
519 p += __selecttbl[((i-r-1)<<8)+(*q)];
521 if ((i>>logL) == ((i+1)>>logL)) {
524 //rr = _popCount[*q];
525 r = _fast_popcount(*q);
526 if (r + rr >= i) break;
530 p2 = (q - select->buf) << 3;
531 p2 += __selecttbl[((i-r-1)<<8)+(*q)];
534 p2 = selectd2_select(select,i+2,f);
540 //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
541 r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
545 //rr = _popCount[*q ^ 0xff];
546 rr = _fast_popcount(*q ^ 0xff);
547 if (r + rr >= i) break;
552 p = (q - select->buf) << 3;
553 p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
555 if ((i>>logL) == ((i+1)>>logL)) {
558 //rr = _popCount[*q ^ 0xff];
559 rr = _fast_popcount(*q ^ 0xff);
560 if (r + rr >= i) break;
564 p2 = (q - select->buf) << 3;
565 p2 += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
568 p2 = selectd2_select(select,i+2,f);
578 int selects3_save(selects3 * s, FILE * fp) {
580 wr += fwrite(&s->n,sizeof(uint),1,fp);
581 wr += fwrite(&s->m,sizeof(uint),1,fp);
582 wr += fwrite(&s->size,sizeof(uint),1,fp);
583 wr += fwrite(&s->d,sizeof(uint),1,fp);
584 wr += fwrite(&s->hi_len,sizeof(uint),1,fp);
585 wr += fwrite(&s->low_len,sizeof(uint),1,fp);
586 wr += fwrite(s->hi,sizeof(uchar),s->hi_len,fp);
587 wr += fwrite(s->low,sizeof(uint),s->low_len,fp);
588 if(wr!=(6+s->hi_len+s->low_len))
590 if(selectd2_save(s->sd0,fp)) return 2;
591 if(selectd2_save(s->sd1,fp)) return 3;
596 int selects3_load(selects3 * s, FILE * fp) {
598 rd += fread(&s->n,sizeof(uint),1,fp);
599 rd += fread(&s->m,sizeof(uint),1,fp);
600 rd += fread(&s->size,sizeof(uint),1,fp);
601 rd += fread(&s->d,sizeof(uint),1,fp);
602 rd += fread(&s->hi_len,sizeof(uint),1,fp);
603 rd += fread(&s->low_len,sizeof(uint),1,fp);
604 s->hi = new uchar[s->hi_len];
605 rd += fread(s->hi,sizeof(uchar),s->hi_len,fp);
606 s->low = new uint[s->low_len];
607 rd += fread(s->low,sizeof(uint),s->low_len,fp);
608 if(rd!=(6+s->hi_len+s->low_len))
610 s->sd0 = new selectd2;
611 if(selectd2_load(s->sd0,fp)) return 2;
612 s->sd1 = new selectd2;
613 if(selectd2_load(s->sd1,fp)) return 3;
614 delete [] s->sd0->buf;
615 delete [] s->sd1->buf;
622 void selects3_free(selects3 * s) {
625 //delete [] s->sd0->buf;
626 selectd2_free(s->sd0);
628 selectd2_free(s->sd1);
633 int selects3_construct(selects3 *select, int n, uint *buf) {
641 for (i=0; i<n; i++) m += __getbit(buf,i);
645 if (m == 0) return 0;
656 buf2 = new uchar[(2*m+8-1)/8+1];
657 for(int k=0;k<(2*m+8-1)/8+1;k++) buf2[k]=0;
658 select->hi_len = (2*m+8-1)/8+1;
659 low = new uint[(d*m+PBS-1)/PBS+1];
660 for(uint k=0;k<(d*m+PBS-1)/PBS+1;k++) low[k]=0;
661 select->low_len = (d*m+PBS-1)/PBS+1;
665 select->size = sizeof(uchar)*((2*m+8-1)/8+1) + sizeof(uint)*((d*m+PBS-1)/PBS+1);
667 for (i=0; i<m*2; i++) __setbit2(buf2,i,0);
670 for (i=0; i<n; i++) {
671 if (__getbit(buf,i)) {
672 __setbit2(buf2,(i>>d)+m,1);
673 __setbits(low,m*d,d,i & ((1<<d)-1));
680 select->size += 2*sizeof(selectd2);
682 selectd2_construct(sd1,m*2,buf2);
685 for (i=0; i<m*2; i++) __setbit2(buf2,i,1-__getbit2(buf2,i));
686 selectd2_construct(sd0,m*2,buf2);
689 for (i=0; i<m*2; i++) __setbit2(buf2,i,1-__getbit2(buf2,i));
693 //selects3 * lasts3=NULL;
697 int selects3_select(selects3 *select, int i) {
702 printf("ERROR: m=%d i=%d\n",select->m,i);
707 if (i == 0) return -1;
710 /*if(select->lasti==(uint)i-1) {
711 while(!__getbit2(select->sd1->buf,++select->lasts));
714 select->lasts = selectd2_select(select->sd1,i,1);
717 //lasts3 = select; */
718 x = selectd2_select(select->sd1,i,1) - (i-1);
719 //x = (select->lasts-(i-1)) << d;
721 x += __getbits(select->low,(i-1)*d,d);
726 int selects3_selectnext(selects3 *select, int i) {
727 //return selects3_select(select,selects3_rank(select,i)+1);
735 y = selectd2_select(select->sd0,ii,0)+1;
744 if (((z << r) & 0x80) == 0) {
748 w = __getbits(q,x*d,d);
751 if(__getbit2(select->hi,(8*y+r))) k2++;
759 if(__getbit2(select->hi,(8*y+r))) k2++;
770 for(int kk=0;kk<8-r;kk++) {
771 if(__getbit2(select->hi,c)) {
779 while(select->hi[pp]==0) {
783 while(!__getbit2(select->hi,c)) c++;
786 return __getbits(q,x*d,d)+((c)<<d);
789 int selects3_rank(selects3 *select, int i) {
800 y = selectd2_select(select->sd0,ii,0)+1;
801 // selectd2_select2(select->sd0,ii,0,&y1,&y2);
803 //printf("y %d y1 %d %d\n",y,y1,y2-y1);
813 if (((z << r) & 0x80) == 0) break;
814 w = __getbits(q,x*d,d);