6 typedef unsigned int qword;
9 typedef unsigned long long qword;
12 #define PBS (sizeof(uint)*8)
18 #define logLL 16 // size of word
24 #define LLL (1<<logLLL)
26 //#define logL (logLL-3)
27 #define logL (logLL-1-5)
41 int __setbit(uint *B, int i,int x) {
46 if (x==0) B[j] &= (~(1<<(D-1-l)));
47 else if (x==1) B[j] |= (1<<(D-1-l));
49 printf("error __setbit x=%d\n",x);
56 int __setbit2(uchar *B, int i,int x) {
61 if (x==0) B[j] &= (~(1<<(8-1-l)));
62 else if (x==1) B[j] |= (1<<(8-1-l));
64 printf("error __setbit2 x=%d\n",x);
71 int __setbits(uint *B, int i, int d, int x) {
75 __setbit(B,i+j,(x>>(d-j-1))&1);
81 int __getbit(uint *B, int i) {
88 return (B[j] >> (D-1-l)) & 1;
92 int __getbit2(uchar *B, int i) {
99 return (B[j] >> (8-1-l)) & 1;
104 uint __getbits(uint *B, int i, int d) {
110 x = (((qword)B[0]) << D) + B[1];
116 x = (((qword)B[0])<<D)+B[1];
119 x &= (((qword)1L<<D)-1)<<D;
131 uint __getbits(uint *B, int i, int d) {
135 for (j=0; j<d; j++) {
137 x += __getbit(B,i+j);
143 static const unsigned int _popCount[] = {
144 0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,
145 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
146 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
147 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
148 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
149 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
150 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
151 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
152 1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,
153 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
154 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
155 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
156 2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,
157 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
158 3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,
159 4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8
161 static inline unsigned int
162 _fast_popcount2(int x)
164 uint m1 = 0x55555555;
165 uint m2 = 0x33333333;
166 uint m4 = 0x0f0f0f0f;
168 x = (x & m2) + ((x >> 2) & m2);
169 x = (x + (x >> 4)) & m4;
171 return (x + (x >> 16)) & 0x3f;
174 static inline unsigned int
175 _fast_popcount3(int x)
177 uint m1 = 0x55555555;
178 uint m2 = 0xc30c30c3;
180 x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2);
182 return (x + (x >> 12) + (x >> 24)) & 0x3f;
185 static inline unsigned int
186 _fast_popcount(int x) {
190 static unsigned int __selecttbl[8*256];
191 static int built = 0;
193 void make___selecttbl(void) {
199 for (x = 0; x < 256; x++) {
200 __setbits(buf,0,8,x);
201 for (r=0; r<8; r++) __selecttbl[(r<<8)+x] = -1;
203 for (i=0; i<8; i++) {
204 if (__getbit(buf,i)) {
205 __selecttbl[(r<<8)+x] = i;
213 unsigned int __popCount(uint x) {
217 r = r - ((r>>1) & 0x77777777) - ((r>>2) & 0x33333333) - ((r>>3) & 0x11111111);
218 r = ((r + (r>>4)) & 0x0f0f0f0f) % 0xff;
221 r = ((r & 0xaaaaaaaa)>>1) + (r & 0x55555555);
222 r = ((r & 0xcccccccc)>>2) + (r & 0x33333333);
223 //r = ((r & 0xf0f0f0f0)>>4) + (r & 0x0f0f0f0f);
224 r = ((r>>4) + r) & 0x0f0f0f0f;
225 //r = ((r & 0xff00ff00)>>8) + (r & 0x00ff00ff);
227 //r = ((r & 0xffff0000)>>16) + (r & 0x0000ffff);
228 r = ((r>>16) + r) & 63;
230 r = _popCount[x & 0xff];
232 r += _popCount[x & 0xff];
234 r += _popCount[x & 0xff];
236 r += _popCount[x & 0xff];
242 unsigned int __popCount8(uint x) {
246 r = ((r & 0xaa)>>1) + (r & 0x55);
247 r = ((r & 0xcc)>>2) + (r & 0x33);
248 r = ((r>>4) + r) & 0x0f;
250 r = _popCount[x & 0xff];
256 int selectd2_save(selectd2 * s, FILE * fp) {
258 wr += fwrite(&s->n,sizeof(uint),1,fp);
259 wr += fwrite(&s->m,sizeof(uint),1,fp);
260 wr += fwrite(&s->size,sizeof(uint),1,fp);
261 wr += fwrite(&s->ss_len,sizeof(uint),1,fp);
262 wr += fwrite(&s->sl_len,sizeof(uint),1,fp);
263 wr += fwrite(s->buf,sizeof(uchar),(s->n+7)/8+1,fp);
264 uint nl = (s->m-1) / L + 1;
265 wr += fwrite(s->lp,sizeof(uint),nl+1,fp);
266 wr += fwrite(s->p,sizeof(uint),nl+1,fp);
267 wr += fwrite(s->ss,sizeof(ushort),s->ss_len,fp);
268 wr += fwrite(s->sl,sizeof(uint),s->sl_len,fp);
269 if(wr!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5)
275 int selectd2_load(selectd2 * s, FILE * fp) {
277 rd += fread(&s->n,sizeof(uint),1,fp);
278 rd += fread(&s->m,sizeof(uint),1,fp);
279 rd += fread(&s->size,sizeof(uint),1,fp);
280 rd += fread(&s->ss_len,sizeof(uint),1,fp);
281 rd += fread(&s->sl_len,sizeof(uint),1,fp);
282 s->buf = new uchar[(s->n+7)/8+1];
283 rd += fread(s->buf,sizeof(uchar),(s->n+7)/8+1,fp);
284 uint nl = (s->m-1) / L + 1;
285 s->lp = new uint[nl+1];
286 rd += fread(s->lp,sizeof(uint),nl+1,fp);
287 s->p = new uint[nl+1];
288 rd += fread(s->p,sizeof(uint),nl+1,fp);
289 s->ss = new ushort[s->ss_len];
290 rd += fread(s->ss,sizeof(ushort),s->ss_len,fp);
291 s->sl = new uint[s->sl_len];
292 rd += fread(s->sl,sizeof(uint),s->sl_len,fp);
293 if(rd!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5)
299 void selectd2_free(selectd2 * s) {
308 int selectd2_construct(selectd2 *select, int n, uchar *buf) {
319 printf("ERROR: L=%d LLL=%d\n",L,LLL);
324 for (i=0; i<n; i++) m += __getbit2(buf,i);
327 //printf("n=%d m=%d\n",n,m);
333 for (i=0; i<n; i++) {
334 if (__getbit2(buf,i)) {
341 select->size = 0; //ignoring buf, shared with selects3
342 select->lp = new uint[nl+1];
343 for(int k=0;k<nl+1;k++) select->lp[k]=0;
344 select->size += (nl+1)*sizeof(uint);
345 select->p = new uint[nl+1];
346 for(int k=0;k<nl+1;k++) select->p[k]=0;
347 select->size += (nl+1)*sizeof(uint);
349 for (r = 0; r < 2; r++) {
351 for (il = 0; il < nl; il++) {
354 i = min((il+1)*L-1,m-1);
356 //printf("%d ",p-pp);
359 for (is = 0; is < L; is++) {
360 if (il*L+is >= m) break;
361 select->sl[ml*L+is] = s[il*L+is];
364 select->p[il] = -((ml<<logL)+1);
369 for (is = 0; is < L/LLL; is++) {
370 if (il*L+is*LLL >= m) break;
371 select->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp;
374 select->p[il] = ms << (logL-logLLL);
379 select->sl = new uint[ml*L+1];
380 for(int k=0;k<ml*L+1;k++) select->sl[k]=0;
381 select->size += sizeof(uint)*(ml*L+1);
382 select->sl_len = ml*L+1;
383 select->ss = new ushort[ms*(L/LLL)+1];
384 for(int k=0;k<ms*(L/LLL)+1;k++) select->ss[k]=0;
385 select->ss_len = ms*(L/LLL)+1;
386 select->size += sizeof(ushort)*(ms*(L/LLL)+1);
394 int selectd2_select(selectd2 *select, int i,int f) {
400 if (i == 0) return -1;
404 printf("ERROR: m=%d i=%d\n",select->m,i);
411 il = select->p[i>>logL];
414 //p = select->sl[(il<<logL)+(i & (L-1))];
415 p = select->sl[il+(i & (L-1))];
418 p = select->lp[i>>logL];
419 //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];
420 p += select->ss[il+((i & (L-1))>>logLLL)];
421 r = i - (i & (LLL-1));
423 q = &(select->buf[p>>3]);
427 //r -= _popCount[*q >> (8-1-rr)];
428 r -= _fast_popcount(*q >> (8-1-rr));
432 //rr = _popCount[*q];
433 rr = _fast_popcount(*q);
434 if (r + rr >= i) break;
439 p = (q - select->buf) << 3;
440 p += __selecttbl[((i-r-1)<<8)+(*q)];
444 //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
445 r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
449 //rr = _popCount[*q ^ 0xff];
450 rr = _fast_popcount(*q ^ 0xff);
451 if (r + rr >= i) break;
456 p = (q - select->buf) << 3;
457 p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
464 int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) {
477 printf("ERROR: m=%d i=%d\n",select->m,i);
484 il = select->p[i>>logL];
487 //p = select->sl[(il<<logL)+(i & (L-1))];
488 p = select->sl[il+(i & (L-1))];
490 if ((i>>logL) == ((i+1)>>logL)) {
491 p2 = select->sl[il+((i+1) & (L-1))];
494 p2 = selectd2_select(select,i+2,f);
498 p = select->lp[i>>logL];
499 //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL];
500 p += select->ss[il+((i & (L-1))>>logLLL)];
501 r = i - (i & (LLL-1));
503 q = &(select->buf[p>>3]);
507 //r -= _popCount[*q >> (8-1-rr)];
508 r -= _fast_popcount(*q >> (8-1-rr));
512 //rr = _popCount[*q];
513 rr = _fast_popcount(*q);
514 if (r + rr >= i) break;
519 p = (q - select->buf) << 3;
520 p += __selecttbl[((i-r-1)<<8)+(*q)];
522 if ((i>>logL) == ((i+1)>>logL)) {
525 //rr = _popCount[*q];
526 r = _fast_popcount(*q);
527 if (r + rr >= i) break;
531 p2 = (q - select->buf) << 3;
532 p2 += __selecttbl[((i-r-1)<<8)+(*q)];
535 p2 = selectd2_select(select,i+2,f);
541 //r -= _popCount[(*q ^ 0xff) >> (8-1-rr)];
542 r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr));
546 //rr = _popCount[*q ^ 0xff];
547 rr = _fast_popcount(*q ^ 0xff);
548 if (r + rr >= i) break;
553 p = (q - select->buf) << 3;
554 p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
556 if ((i>>logL) == ((i+1)>>logL)) {
559 //rr = _popCount[*q ^ 0xff];
560 rr = _fast_popcount(*q ^ 0xff);
561 if (r + rr >= i) break;
565 p2 = (q - select->buf) << 3;
566 p2 += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)];
569 p2 = selectd2_select(select,i+2,f);
579 int selects3_save(selects3 * s, FILE * fp) {
581 wr += fwrite(&s->n,sizeof(uint),1,fp);
582 wr += fwrite(&s->m,sizeof(uint),1,fp);
583 wr += fwrite(&s->size,sizeof(uint),1,fp);
584 wr += fwrite(&s->d,sizeof(uint),1,fp);
585 wr += fwrite(&s->hi_len,sizeof(uint),1,fp);
586 wr += fwrite(&s->low_len,sizeof(uint),1,fp);
587 wr += fwrite(s->hi,sizeof(uchar),s->hi_len,fp);
588 wr += fwrite(s->low,sizeof(uint),s->low_len,fp);
589 if(wr!=(6+s->hi_len+s->low_len))
591 if(selectd2_save(s->sd0,fp)) return 2;
592 if(selectd2_save(s->sd1,fp)) return 3;
597 int selects3_load(selects3 * s, FILE * fp) {
599 rd += fread(&s->n,sizeof(uint),1,fp);
600 rd += fread(&s->m,sizeof(uint),1,fp);
601 rd += fread(&s->size,sizeof(uint),1,fp);
602 rd += fread(&s->d,sizeof(uint),1,fp);
603 rd += fread(&s->hi_len,sizeof(uint),1,fp);
604 rd += fread(&s->low_len,sizeof(uint),1,fp);
605 s->hi = new uchar[s->hi_len];
606 rd += fread(s->hi,sizeof(uchar),s->hi_len,fp);
607 s->low = new uint[s->low_len];
608 rd += fread(s->low,sizeof(uint),s->low_len,fp);
609 if(rd!=(6+s->hi_len+s->low_len))
611 s->sd0 = new selectd2;
612 if(selectd2_load(s->sd0,fp)) return 2;
613 s->sd1 = new selectd2;
614 if(selectd2_load(s->sd1,fp)) return 3;
615 delete [] s->sd0->buf;
616 delete [] s->sd1->buf;
623 void selects3_free(selects3 * s) {
626 //delete [] s->sd0->buf;
627 selectd2_free(s->sd0);
629 selectd2_free(s->sd1);
634 int selects3_construct(selects3 *select, int n, uint *buf) {
642 for (i=0; i<n; i++) m += __getbit(buf,i);
646 if (m == 0) return 0;
657 buf2 = new uchar[(2*m+8-1)/8+1];
658 for(int k=0;k<(2*m+8-1)/8+1;k++) buf2[k]=0;
659 select->hi_len = (2*m+8-1)/8+1;
660 low = new uint[(d*m+PBS-1)/PBS+1];
661 for(uint k=0;k<(d*m+PBS-1)/PBS+1;k++) low[k]=0;
662 select->low_len = (d*m+PBS-1)/PBS+1;
666 select->size = sizeof(uchar)*((2*m+8-1)/8+1) + sizeof(uint)*((d*m+PBS-1)/PBS+1);
668 for (i=0; i<m*2; i++) __setbit2(buf2,i,0);
671 for (i=0; i<n; i++) {
672 if (__getbit(buf,i)) {
673 __setbit2(buf2,(i>>d)+m,1);
674 __setbits(low,m*d,d,i & ((1<<d)-1));
681 select->size += 2*sizeof(selectd2);
683 selectd2_construct(sd1,m*2,buf2);
686 for (i=0; i<m*2; i++) __setbit2(buf2,i,1-__getbit2(buf2,i));
687 selectd2_construct(sd0,m*2,buf2);
690 for (i=0; i<m*2; i++) __setbit2(buf2,i,1-__getbit2(buf2,i));
694 //selects3 * lasts3=NULL;
698 int selects3_select(selects3 *select, int i) {
703 printf("ERROR: m=%d i=%d\n",select->m,i);
708 if (i == 0) return -1;
711 /*if(select->lasti==(uint)i-1) {
712 while(!__getbit2(select->sd1->buf,++select->lasts));
715 select->lasts = selectd2_select(select->sd1,i,1);
718 //lasts3 = select; */
719 x = selectd2_select(select->sd1,i,1) - (i-1);
720 //x = (select->lasts-(i-1)) << d;
722 x += __getbits(select->low,(i-1)*d,d);
727 int selects3_selectnext(selects3 *select, int i) {
728 //return selects3_select(select,selects3_rank(select,i)+1);
736 y = selectd2_select(select->sd0,ii,0)+1;
745 if (((z << r) & 0x80) == 0) {
749 w = __getbits(q,x*d,d);
752 if(__getbit2(select->hi,(8*y+r))) k2++;
760 if(__getbit2(select->hi,(8*y+r))) k2++;
771 for(int kk=0;kk<8-r;kk++) {
772 if(__getbit2(select->hi,c)) {
780 while(select->hi[pp]==0) {
784 while(!__getbit2(select->hi,c)) c++;
787 return __getbits(q,x*d,d)+((c)<<d);
790 int selects3_rank(selects3 *select, int i) {
801 y = selectd2_select(select->sd0,ii,0)+1;
802 // selectd2_select2(select->sd0,ii,0,&y1,&y2);
804 //printf("y %d y1 %d %d\n",y,y1,y2-y1);
814 if (((z << r) & 0x80) == 0) break;
815 w = __getbits(q,x*d,d);