5 typedef unsigned int qword;
8 typedef unsigned long long qword;
11 #define PBS (sizeof(uint)*8)
17 #define logLL 16 // size of word
23 #define LLL (1<<logLLL)
25 //#define logL (logLL-3)
26 #define logL (logLL-1-5)
40 int __setbit(uint *B, int i,int x) {
45 if (x==0) B[j] &= (~(1<<(D-1-l)));
46 else if (x==1) B[j] |= (1<<(D-1-l));
48 printf("error __setbit x=%d\n",x);
55 int __setbit2(uchar *B, int i,int x) {
60 if (x==0) B[j] &= (~(1<<(8-1-l)));
61 else if (x==1) B[j] |= (1<<(8-1-l));
63 printf("error __setbit2 x=%d\n",x);
70 int __setbits(uint *B, int i, int d, int x) {
74 __setbit(B,i+j,(x>>(d-j-1))&1);
80 int __getbit(uint *B, int i) {
87 return (B[j] >> (D-1-l)) & 1;
91 static int __getbit2(uchar *B, int i) {
98 return (B[j] >> (8-1-l)) & 1;
103 uint __getbits_aux(uint *B, int i, int d) {
108 if (d==8 && (j & 7) == 0) {
110 x = (uint) (((unsigned char*) B)[i]);
111 } else if (i+d <= 2*D) {
112 x = (((qword)B[0]) << D) + B[1];
118 fprintf(stderr, "Warning: %d, %d\n", D, d);
119 x = (((qword)B[0])<<D)+B[1];
122 x &= (((qword)1L<<D)-1)<<D;
132 static uint __getbits(uint *B, int i, int d)
137 x = ((ulong *) B)[0];
138 x = (x << 32)|(x >> 32);
139 x = (x << i) >> (2*D - d);
145 #if HAS_NATIVE_POPCOUNT
146 static inline unsigned int popcount(unsigned int n){
147 asm ("popcnt %1, %0" : "=r" (n) : "0" (n));
151 // static inline unsigned int popcount8(unsigned int n) {
152 // return __builtin_popcount(n & 0xff);
155 static inline unsigned int _fast_popcount(uchar x)
157 return __builtin_popcount(x);
162 static const unsigned int _popCount[] = {
163 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,
164 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
165 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
166 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
167 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
168 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
169 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
170 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
171 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
172 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
173 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
174 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
175 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
176 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
177 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
178 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
181 static inline unsigned int
182 _fast_popcount(int x) {
183 return _popCount[x & 0xff];
189 //static unsigned int __selecttbl[8*256];
190 static int built = 0;
192 void make___selecttbl(void) {
198 for (x = 0; x < 256; x++) {
199 __setbits(buf,0,8,x);
200 // for (r=0; r<8; r++) __selecttbl[(r<<8)+x] = -1;
202 for (i=0; i<8; i++) {
203 if (__getbit(buf,i)) {
204 // __selecttbl[(r<<8)+x] = i;
211 int selectd2_save(selectd2 * s, FILE * fp) {
213 wr += fwrite(&s->n,sizeof(uint),1,fp);
214 wr += fwrite(&s->m,sizeof(uint),1,fp);
215 wr += fwrite(&s->size,sizeof(uint),1,fp);
216 wr += fwrite(&s->ss_len,sizeof(uint),1,fp);
217 wr += fwrite(&s->sl_len,sizeof(uint),1,fp);
218 wr += fwrite(s->buf,sizeof(uchar),(s->n+7)/8+1,fp);
219 uint nl = (s->m-1) / L + 1;
220 wr += fwrite(s->lp,sizeof(uint),nl+1,fp);
221 wr += fwrite(s->p,sizeof(uint),nl+1,fp);
222 wr += fwrite(s->ss,sizeof(ushort),s->ss_len,fp);
223 wr += fwrite(s->sl,sizeof(uint),s->sl_len,fp);
224 if(wr!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5)
230 int selectd2_load(selectd2 * s, FILE * fp) {
232 rd += fread(&s->n,sizeof(uint),1,fp);
233 rd += fread(&s->m,sizeof(uint),1,fp);
234 rd += fread(&s->size,sizeof(uint),1,fp);
235 rd += fread(&s->ss_len,sizeof(uint),1,fp);
236 rd += fread(&s->sl_len,sizeof(uint),1,fp);
237 s->buf = new uchar[(s->n+7)/8+1];
238 rd += fread(s->buf,sizeof(uchar),(s->n+7)/8+1,fp);
239 uint nl = (s->m-1) / L + 1;
240 s->lp = new uint[nl+1];
241 rd += fread(s->lp,sizeof(uint),nl+1,fp);
242 s->p = new uint[nl+1];
243 rd += fread(s->p,sizeof(uint),nl+1,fp);
244 s->ss = new ushort[s->ss_len];
245 rd += fread(s->ss,sizeof(ushort),s->ss_len,fp);
246 s->sl = new uint[s->sl_len];
247 rd += fread(s->sl,sizeof(uint),s->sl_len,fp);
248 if(rd!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5)
254 void selectd2_free(selectd2 * s) {
263 int selectd2_construct(selectd2 *select, int n, uchar *buf) {
271 //make___selecttbl();
274 printf("ERROR: L=%d LLL=%d\n",L,LLL);
279 for (i=0; i<n; i++) m += __getbit2(buf,i);
282 //printf("n=%d m=%d\n",n,m);
288 for (i=0; i<n; i++) {
289 if (__getbit2(buf,i)) {
296 select->size = 0; //ignoring buf, shared with selects3
297 select->lp = new uint[nl+1];
298 for(int k=0;k<nl+1;k++) select->lp[k]=0;
299 select->size += (nl+1)*sizeof(uint);
300 select->p = new uint[nl+1];
301 for(int k=0;k<nl+1;k++) select->p[k]=0;
302 select->size += (nl+1)*sizeof(uint);
304 for (r = 0; r < 2; r++) {
306 for (il = 0; il < nl; il++) {
309 i = min((il+1)*L-1,m-1);
311 //printf("%d ",p-pp);
314 for (is = 0; is < L; is++) {
315 if (il*L+is >= m) break;
316 select->sl[ml*L+is] = s[il*L+is];
319 select->p[il] = -((ml<<logL)+1);
324 for (is = 0; is < L/LLL; is++) {
325 if (il*L+is*LLL >= m) break;
326 select->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp;
329 select->p[il] = ms << (logL-logLLL);
334 select->sl = new uint[ml*L+1];
335 for(int k=0;k<ml*L+1;k++) select->sl[k]=0;
336 select->size += sizeof(uint)*(ml*L+1);
337 select->sl_len = ml*L+1;
338 select->ss = new ushort[ms*(L/LLL)+1];
339 for(int k=0;k<ms*(L/LLL)+1;k++) select->ss[k]=0;
340 select->ss_len = ms*(L/LLL)+1;
341 select->size += sizeof(ushort)*(ms*(L/LLL)+1);
349 int selectd2_select1(selectd2 *select, int i) {
354 if (i <= 0) return -1;
357 il = select->p[i>>logL];
360 //p = select->sl[(il<<logL)+(i & (L-1))];
361 p = select->sl[il+(i & (L-1))];
364 p = select->lp[i>>logL];
365 p += select->ss[il+((i & (L-1))>>logLLL)];
366 r = i - (i & (LLL-1));
368 q = &(select->buf[p>>3]);
371 r -= _fast_popcount(*q >> (8-1-rr));
374 //rr = _popCount[*q];
375 rr = _fast_popcount(*q);
376 if (r + rr >= i) break;
381 p = (q - select->buf) << 3;
382 p += selecttbl[((i-r-1)<<8)+(*q)];
387 int selectd2_select0(selectd2 *select, int i) {
393 if (i <= 0) return -1;
396 il = select->p[i>>logL];
399 return select->sl[il+(i & (L-1))];
402 p = select->lp[i>>logL];
404 p += select->ss[il+((i & (L-1))>>logLLL)];
405 r = i - (i & (LLL-1));
407 q = &(select->buf[p>>3]);
411 uint masked_q = *q ^ 0xff;
413 r -= _fast_popcount(masked_q >> (7-rr));
416 rr = _fast_popcount(masked_q);
418 p = (q - select->buf) << 3;
419 p += selecttbl[((i-r-1)<<8)+masked_q];
424 masked_q = *q ^ 0xff;
429 int selectd2_select(selectd2 *select, int i,int f) {
430 return f ? selectd2_select1(select, i) :
431 selectd2_select0(select, i);
435 int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) {
448 printf("ERROR: m=%d i=%d\n",select->m,i);
455 il = select->p[i>>logL];
458 //p = select->sl[(il<<logL)+(i & (L-1))];
459 p = select->sl[il+(i & (L-1))];
461 if ((i>>logL) == ((i+1)>>logL)) {
462 p2 = select->sl[il+((i+1) & (L-1))];
465 p2 = selectd2_select(select,i+2,f);
469 p = select->lp[i>>logL];
470 //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];
471 p += select->ss[il+((i & (L-1))>>logLLL)];
472 r = i - (i & (LLL-1));
474 q = &(select->buf[p>>3]);
478 //r -= _popCount[*q >> (8-1-rr)];
479 r -= _fast_popcount(*q >> (8-1-rr));
483 //rr = _popCount[*q];
484 rr = _fast_popcount(*q);
485 if (r + rr >= i) break;
490 p = (q - select->buf) << 3;
491 p += selecttbl[((i-r-1)<<8)+(*q)];
493 if ((i>>logL) == ((i+1)>>logL)) {
496 //rr = _popCount[*q];
497 r = _fast_popcount(*q);
498 if (r + rr >= i) break;
502 p2 = (q - select->buf) << 3;
503 p2 += selecttbl[((i-r-1)<<8)+(*q)];
506 p2 = selectd2_select(select,i+2,f);
512 //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
513 r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
517 //rr = _popCount[*q ^ 0xff];
518 rr = _fast_popcount(*q ^ 0xff);
519 if (r + rr >= i) break;
524 p = (q - select->buf) << 3;
525 p += selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
527 if ((i>>logL) == ((i+1)>>logL)) {
530 //rr = _popCount[*q ^ 0xff];
531 rr = _fast_popcount(*q ^ 0xff);
532 if (r + rr >= i) break;
536 p2 = (q - select->buf) << 3;
537 p2 += selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
540 p2 = selectd2_select(select,i+2,f);
550 int selects3_save(selects3 * s, FILE * fp) {
552 wr += fwrite(&s->n,sizeof(uint),1,fp);
553 wr += fwrite(&s->m,sizeof(uint),1,fp);
554 wr += fwrite(&s->size,sizeof(uint),1,fp);
555 wr += fwrite(&s->d,sizeof(uint),1,fp);
556 wr += fwrite(&s->hi_len,sizeof(uint),1,fp);
557 wr += fwrite(&s->low_len,sizeof(uint),1,fp);
558 wr += fwrite(s->hi,sizeof(uchar),s->hi_len,fp);
559 wr += fwrite(s->low,sizeof(uint),s->low_len,fp);
560 if(wr!=(6+s->hi_len+s->low_len))
562 if(selectd2_save(s->sd0,fp)) return 2;
563 if(selectd2_save(s->sd1,fp)) return 3;
568 int selects3_load(selects3 * s, FILE * fp) {
570 rd += fread(&s->n,sizeof(uint),1,fp);
571 rd += fread(&s->m,sizeof(uint),1,fp);
572 rd += fread(&s->size,sizeof(uint),1,fp);
573 rd += fread(&s->d,sizeof(uint),1,fp);
574 rd += fread(&s->hi_len,sizeof(uint),1,fp);
575 rd += fread(&s->low_len,sizeof(uint),1,fp);
576 s->hi = new uchar[s->hi_len];
577 rd += fread(s->hi,sizeof(uchar),s->hi_len,fp);
578 s->low = new uint[s->low_len];
579 rd += fread(s->low,sizeof(uint),s->low_len,fp);
580 if(rd!=(6+s->hi_len+s->low_len))
582 s->sd0 = new selectd2;
583 if(selectd2_load(s->sd0,fp)) return 2;
584 s->sd1 = new selectd2;
585 if(selectd2_load(s->sd1,fp)) return 3;
586 delete [] s->sd0->buf;
587 delete [] s->sd1->buf;
594 void selects3_free(selects3 * s) {
597 //delete [] s->sd0->buf;
598 selectd2_free(s->sd0);
600 selectd2_free(s->sd1);
605 int selects3_construct(selects3 *select, int n, uint *buf) {
613 for (i=0; i<n; i++) m += __getbit(buf,i);
617 if (m == 0) return 0;
628 buf2 = new uchar[(2*m+8-1)/8+1];
629 for(int k=0;k<(2*m+8-1)/8+1;k++) buf2[k]=0;
630 select->hi_len = (2*m+8-1)/8+1;
631 low = new uint[(d*m+PBS-1)/PBS+1];
632 for(uint k=0;k<(d*m+PBS-1)/PBS+1;k++) low[k]=0;
633 select->low_len = (d*m+PBS-1)/PBS+1;
637 select->size = sizeof(uchar)*((2*m+8-1)/8+1) + sizeof(uint)*((d*m+PBS-1)/PBS+1);
639 for (i=0; i<m*2; i++) __setbit2(buf2,i,0);
642 for (i=0; i<n; i++) {
643 if (__getbit(buf,i)) {
644 __setbit2(buf2,(i>>d)+m,1);
645 __setbits(low,m*d,d,i & ((1<<d)-1));
652 select->size += 2*sizeof(selectd2);
654 selectd2_construct(sd1,m*2,buf2);
657 for (i=0; i<m*2; i++) __setbit2(buf2,i,1-__getbit2(buf2,i));
658 selectd2_construct(sd0,m*2,buf2);
661 for (i=0; i<m*2; i++) __setbit2(buf2,i,1-__getbit2(buf2,i));
665 //selects3 * lasts3=NULL;
669 int selects3_select(selects3 *select, int i) {
674 printf("ERROR: m=%d i=%d\n",select->m,i);
679 if (i == 0) return -1;
682 /*if(select->lasti==(uint)i-1) {
683 while(!__getbit2(select->sd1->buf,++select->lasts));
686 select->lasts = selectd2_select(select->sd1,i,1);
689 //lasts3 = select; */
690 x = selectd2_select1(select->sd1,i) - (i-1);
691 //x = (select->lasts-(i-1)) << d;
693 x += __getbits(select->low,(i-1)*d,d);
697 void pr_byte(FILE* fp, uchar b)
700 for(int i = 0; i < 8; i++){
701 fprintf(stderr, "%i", __getbit2(buff, i));
705 int selects3_selectnext(selects3 *select, int i) {
706 //return selects3_select(select,selects3_rank(select,i)+1);
715 y = selectd2_select0(select->sd0,ii)+1;
725 if (((z << r) & 0x80) == 0) {
730 w = __getbits(q,xoffset,d);
733 if (__getbit2(select->hi,((y << 3)+r))) k2 ++;
744 if(__getbit2(select->hi,( (y << 3)+r))) k2++;
752 if(x==select->m) return (uint)-1;
757 uint mask = ~(0xffu << (8*sizeof(uint) - 8));
758 uint tmp = (select->hi[y] << ((8*sizeof(uint) - 8)+r)) | mask;
759 uint c_incr = __builtin_clz(tmp);
764 uchar * pp = &(select->hi[c >> 3]);
771 c += __builtin_clz(*pp) - 24;
776 return __getbits(q,xoffset,d)+((c)<<d);
779 int selects3_rank(selects3 *select, int i) {
791 y = selectd2_select0(select->sd0, ii)+1;
803 if (((z << r) & 0x80) == 0) return x;
805 w = __getbits(q, xoffset, d);
807 if (w >= j) return x + (w == j);