5 typedef unsigned int qword;
8 typedef unsigned long long qword;
11 #define PBS (sizeof(uint)*8)
17 #define logLL 16 // size of word
23 #define LLL (1<<logLLL)
25 //#define logL (logLL-3)
26 #define logL (logLL-1-5)
40 int __setbit(uint *B, int i,int x) {
45 if (x==0) B[j] &= (~(1<<(D-1-l)));
46 else if (x==1) B[j] |= (1<<(D-1-l));
48 printf("error __setbit x=%d\n",x);
55 int __setbit2(uchar *B, int i,int x) {
60 if (x==0) B[j] &= (~(1<<(8-1-l)));
61 else if (x==1) B[j] |= (1<<(8-1-l));
63 printf("error __setbit2 x=%d\n",x);
70 int __setbits(uint *B, int i, int d, int x) {
74 __setbit(B,i+j,(x>>(d-j-1))&1);
80 int __getbit(uint *B, int i) {
87 return (B[j] >> (D-1-l)) & 1;
91 static int __getbit2(uchar *B, int i) {
98 return (B[j] >> (8-1-l)) & 1;
103 uint __getbits_aux(uint *B, int i, int d) {
108 if (d==8 && (j & 7) == 0) {
110 x = (uint) (((unsigned char*) B)[i]);
111 } else if (i+d <= 2*D) {
112 x = (((qword)B[0]) << D) + B[1];
118 fprintf(stderr, "Warning: %d, %d\n", D, d);
119 x = (((qword)B[0])<<D)+B[1];
122 x &= (((qword)1L<<D)-1)<<D;
132 uint __getbits(uint *B, int i, int d)
136 // y = __getbits_aux(B, i,d);
139 x = ((ulong *) B)[0];
140 x = (x << 32)|(x >> 32);
143 // fprintf(stderr, "slow: %i, fast: %i\n",
151 uint __getbits(uint *B, int i, int d) {
155 for (j=0; j<d; j++) {
157 x += __getbit(B,i+j);
165 #ifdef HAS_NATIVE_POPCOUNT
166 static inline unsigned int popcount(unsigned int n){
167 asm ("popcnt %1, %0" : "=r" (n) : "0" (n));
171 static inline unsigned int popcount8(unsigned int n) {
172 return popcount(n & 0xff);
175 static inline unsigned int _fast_popcount(int x)
183 static const unsigned int _popCount[] = {
184 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,
185 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
186 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
187 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
188 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
189 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
190 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
191 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
192 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
193 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
194 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
195 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
196 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
197 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
198 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
199 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
202 static inline unsigned int
203 _fast_popcount(int x) {
210 static unsigned int __selecttbl[8*256];
211 static int built = 0;
213 void make___selecttbl(void) {
219 for (x = 0; x < 256; x++) {
220 __setbits(buf,0,8,x);
221 for (r=0; r<8; r++) __selecttbl[(r<<8)+x] = -1;
223 for (i=0; i<8; i++) {
224 if (__getbit(buf,i)) {
225 __selecttbl[(r<<8)+x] = i;
233 unsigned int __popCount(uint x) {
237 r = r - ((r>>1) & 0x77777777) - ((r>>2) & 0x33333333) - ((r>>3) & 0x11111111);
238 r = ((r + (r>>4)) & 0x0f0f0f0f) % 0xff;
241 r = ((r & 0xaaaaaaaa)>>1) + (r & 0x55555555);
242 r = ((r & 0xcccccccc)>>2) + (r & 0x33333333);
243 //r = ((r & 0xf0f0f0f0)>>4) + (r & 0x0f0f0f0f);
244 r = ((r>>4) + r) & 0x0f0f0f0f;
245 //r = ((r & 0xff00ff00)>>8) + (r & 0x00ff00ff);
247 //r = ((r & 0xffff0000)>>16) + (r & 0x0000ffff);
248 r = ((r>>16) + r) & 63;
250 r = _popCount[x & 0xff];
252 r += _popCount[x & 0xff];
254 r += _popCount[x & 0xff];
256 r += _popCount[x & 0xff];
262 unsigned int __popCount8(uint x) {
266 r = ((r & 0xaa)>>1) + (r & 0x55);
267 r = ((r & 0xcc)>>2) + (r & 0x33);
268 r = ((r>>4) + r) & 0x0f;
270 r = _popCount[x & 0xff];
276 int selectd2_save(selectd2 * s, FILE * fp) {
278 wr += fwrite(&s->n,sizeof(uint),1,fp);
279 wr += fwrite(&s->m,sizeof(uint),1,fp);
280 wr += fwrite(&s->size,sizeof(uint),1,fp);
281 wr += fwrite(&s->ss_len,sizeof(uint),1,fp);
282 wr += fwrite(&s->sl_len,sizeof(uint),1,fp);
283 wr += fwrite(s->buf,sizeof(uchar),(s->n+7)/8+1,fp);
284 uint nl = (s->m-1) / L + 1;
285 wr += fwrite(s->lp,sizeof(uint),nl+1,fp);
286 wr += fwrite(s->p,sizeof(uint),nl+1,fp);
287 wr += fwrite(s->ss,sizeof(ushort),s->ss_len,fp);
288 wr += fwrite(s->sl,sizeof(uint),s->sl_len,fp);
289 if(wr!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5)
295 int selectd2_load(selectd2 * s, FILE * fp) {
297 rd += fread(&s->n,sizeof(uint),1,fp);
298 rd += fread(&s->m,sizeof(uint),1,fp);
299 rd += fread(&s->size,sizeof(uint),1,fp);
300 rd += fread(&s->ss_len,sizeof(uint),1,fp);
301 rd += fread(&s->sl_len,sizeof(uint),1,fp);
302 s->buf = new uchar[(s->n+7)/8+1];
303 rd += fread(s->buf,sizeof(uchar),(s->n+7)/8+1,fp);
304 uint nl = (s->m-1) / L + 1;
305 s->lp = new uint[nl+1];
306 rd += fread(s->lp,sizeof(uint),nl+1,fp);
307 s->p = new uint[nl+1];
308 rd += fread(s->p,sizeof(uint),nl+1,fp);
309 s->ss = new ushort[s->ss_len];
310 rd += fread(s->ss,sizeof(ushort),s->ss_len,fp);
311 s->sl = new uint[s->sl_len];
312 rd += fread(s->sl,sizeof(uint),s->sl_len,fp);
313 if(rd!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5)
319 void selectd2_free(selectd2 * s) {
328 int selectd2_construct(selectd2 *select, int n, uchar *buf) {
339 printf("ERROR: L=%d LLL=%d\n",L,LLL);
344 for (i=0; i<n; i++) m += __getbit2(buf,i);
347 //printf("n=%d m=%d\n",n,m);
353 for (i=0; i<n; i++) {
354 if (__getbit2(buf,i)) {
361 select->size = 0; //ignoring buf, shared with selects3
362 select->lp = new uint[nl+1];
363 for(int k=0;k<nl+1;k++) select->lp[k]=0;
364 select->size += (nl+1)*sizeof(uint);
365 select->p = new uint[nl+1];
366 for(int k=0;k<nl+1;k++) select->p[k]=0;
367 select->size += (nl+1)*sizeof(uint);
369 for (r = 0; r < 2; r++) {
371 for (il = 0; il < nl; il++) {
374 i = min((il+1)*L-1,m-1);
376 //printf("%d ",p-pp);
379 for (is = 0; is < L; is++) {
380 if (il*L+is >= m) break;
381 select->sl[ml*L+is] = s[il*L+is];
384 select->p[il] = -((ml<<logL)+1);
389 for (is = 0; is < L/LLL; is++) {
390 if (il*L+is*LLL >= m) break;
391 select->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp;
394 select->p[il] = ms << (logL-logLLL);
399 select->sl = new uint[ml*L+1];
400 for(int k=0;k<ml*L+1;k++) select->sl[k]=0;
401 select->size += sizeof(uint)*(ml*L+1);
402 select->sl_len = ml*L+1;
403 select->ss = new ushort[ms*(L/LLL)+1];
404 for(int k=0;k<ms*(L/LLL)+1;k++) select->ss[k]=0;
405 select->ss_len = ms*(L/LLL)+1;
406 select->size += sizeof(ushort)*(ms*(L/LLL)+1);
414 int selectd2_select1(selectd2 *select, int i) {
419 if (i <= 0) return -1;
422 il = select->p[i>>logL];
425 //p = select->sl[(il<<logL)+(i & (L-1))];
426 p = select->sl[il+(i & (L-1))];
429 p = select->lp[i>>logL];
430 p += select->ss[il+((i & (L-1))>>logLLL)];
431 r = i - (i & (LLL-1));
433 q = &(select->buf[p>>3]);
436 r -= _fast_popcount(*q >> (8-1-rr));
439 //rr = _popCount[*q];
440 rr = _fast_popcount(*q);
441 if (r + rr >= i) break;
446 p = (q - select->buf) << 3;
447 p += __selecttbl[((i-r-1)<<8)+(*q)];
452 int selectd2_select0(selectd2 *select, int i) {
457 if (i <= 0) return -1;
460 il = select->p[i>>logL];
463 //p = select->sl[(il<<logL)+(i & (L-1))];
464 p = select->sl[il+(i & (L-1))];
467 p = select->lp[i>>logL];
468 //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];
469 p += select->ss[il+((i & (L-1))>>logLLL)];
470 r = i - (i & (LLL-1));
472 q = &(select->buf[p>>3]);
476 r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
479 //rr = _popCount[*q ^ 0xff];
480 rr = _fast_popcount(*q ^ 0xff);
481 if (r + rr >= i) break;
486 p = (q - select->buf) << 3;
487 p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
492 int selectd2_select(selectd2 *select, int i,int f) {
493 return f ? selectd2_select1(select, i) :
494 selectd2_select0(select, i);
498 int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) {
511 printf("ERROR: m=%d i=%d\n",select->m,i);
518 il = select->p[i>>logL];
521 //p = select->sl[(il<<logL)+(i & (L-1))];
522 p = select->sl[il+(i & (L-1))];
524 if ((i>>logL) == ((i+1)>>logL)) {
525 p2 = select->sl[il+((i+1) & (L-1))];
528 p2 = selectd2_select(select,i+2,f);
532 p = select->lp[i>>logL];
533 //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];
534 p += select->ss[il+((i & (L-1))>>logLLL)];
535 r = i - (i & (LLL-1));
537 q = &(select->buf[p>>3]);
541 //r -= _popCount[*q >> (8-1-rr)];
542 r -= _fast_popcount(*q >> (8-1-rr));
546 //rr = _popCount[*q];
547 rr = _fast_popcount(*q);
548 if (r + rr >= i) break;
553 p = (q - select->buf) << 3;
554 p += __selecttbl[((i-r-1)<<8)+(*q)];
556 if ((i>>logL) == ((i+1)>>logL)) {
559 //rr = _popCount[*q];
560 r = _fast_popcount(*q);
561 if (r + rr >= i) break;
565 p2 = (q - select->buf) << 3;
566 p2 += __selecttbl[((i-r-1)<<8)+(*q)];
569 p2 = selectd2_select(select,i+2,f);
575 //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
576 r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
580 //rr = _popCount[*q ^ 0xff];
581 rr = _fast_popcount(*q ^ 0xff);
582 if (r + rr >= i) break;
587 p = (q - select->buf) << 3;
588 p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
590 if ((i>>logL) == ((i+1)>>logL)) {
593 //rr = _popCount[*q ^ 0xff];
594 rr = _fast_popcount(*q ^ 0xff);
595 if (r + rr >= i) break;
599 p2 = (q - select->buf) << 3;
600 p2 += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
603 p2 = selectd2_select(select,i+2,f);
613 int selects3_save(selects3 * s, FILE * fp) {
615 wr += fwrite(&s->n,sizeof(uint),1,fp);
616 wr += fwrite(&s->m,sizeof(uint),1,fp);
617 wr += fwrite(&s->size,sizeof(uint),1,fp);
618 wr += fwrite(&s->d,sizeof(uint),1,fp);
619 wr += fwrite(&s->hi_len,sizeof(uint),1,fp);
620 wr += fwrite(&s->low_len,sizeof(uint),1,fp);
621 wr += fwrite(s->hi,sizeof(uchar),s->hi_len,fp);
622 wr += fwrite(s->low,sizeof(uint),s->low_len,fp);
623 if(wr!=(6+s->hi_len+s->low_len))
625 if(selectd2_save(s->sd0,fp)) return 2;
626 if(selectd2_save(s->sd1,fp)) return 3;
631 int selects3_load(selects3 * s, FILE * fp) {
633 rd += fread(&s->n,sizeof(uint),1,fp);
634 rd += fread(&s->m,sizeof(uint),1,fp);
635 rd += fread(&s->size,sizeof(uint),1,fp);
636 rd += fread(&s->d,sizeof(uint),1,fp);
637 rd += fread(&s->hi_len,sizeof(uint),1,fp);
638 rd += fread(&s->low_len,sizeof(uint),1,fp);
639 s->hi = new uchar[s->hi_len];
640 rd += fread(s->hi,sizeof(uchar),s->hi_len,fp);
641 s->low = new uint[s->low_len];
642 rd += fread(s->low,sizeof(uint),s->low_len,fp);
643 if(rd!=(6+s->hi_len+s->low_len))
645 s->sd0 = new selectd2;
646 if(selectd2_load(s->sd0,fp)) return 2;
647 s->sd1 = new selectd2;
648 if(selectd2_load(s->sd1,fp)) return 3;
649 delete [] s->sd0->buf;
650 delete [] s->sd1->buf;
657 void selects3_free(selects3 * s) {
660 //delete [] s->sd0->buf;
661 selectd2_free(s->sd0);
663 selectd2_free(s->sd1);
668 int selects3_construct(selects3 *select, int n, uint *buf) {
676 for (i=0; i<n; i++) m += __getbit(buf,i);
680 if (m == 0) return 0;
691 buf2 = new uchar[(2*m+8-1)/8+1];
692 for(int k=0;k<(2*m+8-1)/8+1;k++) buf2[k]=0;
693 select->hi_len = (2*m+8-1)/8+1;
694 low = new uint[(d*m+PBS-1)/PBS+1];
695 for(uint k=0;k<(d*m+PBS-1)/PBS+1;k++) low[k]=0;
696 select->low_len = (d*m+PBS-1)/PBS+1;
700 select->size = sizeof(uchar)*((2*m+8-1)/8+1) + sizeof(uint)*((d*m+PBS-1)/PBS+1);
702 for (i=0; i<m*2; i++) __setbit2(buf2,i,0);
705 for (i=0; i<n; i++) {
706 if (__getbit(buf,i)) {
707 __setbit2(buf2,(i>>d)+m,1);
708 __setbits(low,m*d,d,i & ((1<<d)-1));
715 select->size += 2*sizeof(selectd2);
717 selectd2_construct(sd1,m*2,buf2);
720 for (i=0; i<m*2; i++) __setbit2(buf2,i,1-__getbit2(buf2,i));
721 selectd2_construct(sd0,m*2,buf2);
724 for (i=0; i<m*2; i++) __setbit2(buf2,i,1-__getbit2(buf2,i));
728 //selects3 * lasts3=NULL;
732 int selects3_select(selects3 *select, int i) {
737 printf("ERROR: m=%d i=%d\n",select->m,i);
742 if (i == 0) return -1;
745 /*if(select->lasti==(uint)i-1) {
746 while(!__getbit2(select->sd1->buf,++select->lasts));
749 select->lasts = selectd2_select(select->sd1,i,1);
752 //lasts3 = select; */
753 x = selectd2_select1(select->sd1,i) - (i-1);
754 //x = (select->lasts-(i-1)) << d;
756 x += __getbits(select->low,(i-1)*d,d);
760 void pr_byte(FILE* fp, uchar b)
763 for(int i = 0; i < 8; i++){
764 fprintf(stderr, "%i", __getbit2(buff, i));
768 int selects3_selectnext(selects3 *select, int i) {
769 //return selects3_select(select,selects3_rank(select,i)+1);
778 y = selectd2_select0(select->sd0,ii)+1;
788 if (((z << r) & 0x80) == 0) {
793 w = __getbits(q,xoffset,d);
796 bool t2 = (__getbit2(select->hi,((y << 3)+r)));
806 if(__getbit2(select->hi,( (y << 3)+r))) k2++;
814 if(x==select->m) return (uint)-1;
818 unsigned int mask = 0x00ffffff;
819 unsigned int tmp = (select->hi[y] << (24+r)) | mask;
820 unsigned int c_incr = __builtin_clz(tmp);
825 while(select->hi[pp]==0) {
829 while(!__getbit2(select->hi,c)) c++;
834 return __getbits(q,x*d,d)+((c)<<d);
837 int selects3_rank(selects3 *select, int i) {
849 y = selectd2_select0(select->sd0, ii)+1;
850 // selectd2_select2(select->sd0,ii,0,&y1,&y2);
852 //printf("y %d y1 %d %d\n",y,y1,y2-y1);
863 if (((z << r) & 0x80) == 0) break;
864 w = __getbits(q, xoffset, d);