From: Kim Nguyễn Date: Tue, 29 May 2012 06:16:27 +0000 (+0200) Subject: Optimize sdarray.cpp to use g++ builtin instead of doing naive counting. X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2Flibcds.git;a=commitdiff_plain;h=f485a898fccde5f6beb9d9b31be4afe973ad627b Optimize sdarray.cpp to use g++ builtin instead of doing naive counting. --- diff --git a/src/static_bitsequence/sdarray.cpp b/src/static_bitsequence/sdarray.cpp index fba36bf..43dea90 100644 --- a/src/static_bitsequence/sdarray.cpp +++ b/src/static_bitsequence/sdarray.cpp @@ -129,57 +129,36 @@ uint __getbits_aux(uint *B, int i, int d) { return x; } -uint __getbits(uint *B, int i, int d) +static uint __getbits(uint *B, int i, int d) { ulong x; -// uint y; - // y = __getbits_aux(B, i,d); B += (i >> logD); i &= (D-1); x = ((ulong *) B)[0]; x = (x << 32)|(x >> 32); - x <<= i; - x >>= 2*D - d; -// fprintf(stderr, "slow: %i, fast: %i\n", -// y, (uint) x); + x = (x << i) >> (2*D - d); return x; } #endif -#if 0 -uint __getbits(uint *B, int i, int d) { - uint j,x; - - x = 0; - for (j=0; j>1) & 0x77777777) - ((r>>2) & 0x33333333) - ((r>>3) & 0x11111111); - r = ((r + (r>>4)) & 0x0f0f0f0f) % 0xff; - #elif 1 - r = x; - r = ((r & 0xaaaaaaaa)>>1) + (r & 0x55555555); - r = ((r & 0xcccccccc)>>2) + (r & 0x33333333); - //r = ((r & 0xf0f0f0f0)>>4) + (r & 0x0f0f0f0f); - r = ((r>>4) + r) & 0x0f0f0f0f; - //r = ((r & 0xff00ff00)>>8) + (r & 0x00ff00ff); - r = (r>>8) + r; - //r = ((r & 0xffff0000)>>16) + (r & 0x0000ffff); - r = ((r>>16) + r) & 63; - #else - r = _popCount[x & 0xff]; - x >>= 8; - r += _popCount[x & 0xff]; - x >>= 8; - r += _popCount[x & 0xff]; - x >>= 8; - r += _popCount[x & 0xff]; - #endif - return r; -} - - -unsigned int __popCount8(uint x) { - uint r; - #if 1 - r = x; - r = ((r & 0xaa)>>1) + (r & 0x55); - r = ((r & 0xcc)>>2) + (r & 0x33); - r = ((r>>4) + r) & 0x0f; - #else - r = _popCount[x & 0xff]; - #endif - return r; -} - - int selectd2_save(selectd2 * s, FILE * fp) { uint wr = 0; wr += fwrite(&s->n,sizeof(uint),1,fp); @@ -333,7 +268,7 @@ int selectd2_construct(selectd2 *select, int n, uchar *buf) { int r; uint *s; - make___selecttbl(); + //make___selecttbl(); if (L/LLL == 0) { printf("ERROR: L=%d LLL=%d\n",L,LLL); @@ -444,7 +379,7 @@ int selectd2_select1(selectd2 *select, int i) { q++; } p = (q - select->buf) << 3; - p += __selecttbl[((i-r-1)<<8)+(*q)]; + p += selecttbl[((i-r-1)<<8)+(*q)]; } return p; } @@ -454,39 +389,41 @@ int selectd2_select0(selectd2 *select, int i) { int il; int rr; uchar *q; + if (i <= 0) return -1; i--; il = select->p[i>>logL]; if (il < 0) { il = -il-1; - //p = select->sl[(il<sl[il+(i & (L-1))]; - } - else { + return select->sl[il+(i & (L-1))]; + + } else { p = select->lp[i>>logL]; - //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + p += select->ss[il+((i & (L-1))>>logLLL)]; r = i - (i & (LLL-1)); q = &(select->buf[p>>3]); - rr = p & (8-1); + rr = p & 7; - r -= _fast_popcount((*q ^ 0xff) >> (8-1-rr)); + uint masked_q = *q ^ 0xff; - while (1) { - //rr = _popCount[*q ^ 0xff]; - rr = _fast_popcount(*q ^ 0xff); - if (r + rr >= i) break; + r -= _fast_popcount(masked_q >> (7-rr)); + + for(;;) { + rr = _fast_popcount(masked_q); + if (rr >= i-r) { + p = (q - select->buf) << 3; + p += selecttbl[((i-r-1)<<8)+masked_q]; + return p; + }; r += rr; - //p += 8; q++; - } - p = (q - select->buf) << 3; - p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + masked_q = *q ^ 0xff; + }; } - return p; } int selectd2_select(selectd2 *select, int i,int f) { @@ -551,7 +488,7 @@ int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { q++; } p = (q - select->buf) << 3; - p += __selecttbl[((i-r-1)<<8)+(*q)]; + p += selecttbl[((i-r-1)<<8)+(*q)]; if ((i>>logL) == ((i+1)>>logL)) { i++; @@ -563,7 +500,7 @@ int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { q++; } p2 = (q - select->buf) << 3; - p2 += __selecttbl[((i-r-1)<<8)+(*q)]; + p2 += selecttbl[((i-r-1)<<8)+(*q)]; } else { p2 = selectd2_select(select,i+2,f); @@ -585,7 +522,7 @@ int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { q++; } p = (q - select->buf) << 3; - p += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + p += selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; if ((i>>logL) == ((i+1)>>logL)) { i++; @@ -597,7 +534,7 @@ int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { q++; } p2 = (q - select->buf) << 3; - p2 += __selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + p2 += selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; } else { p2 = selectd2_select(select,i+2,f); @@ -766,7 +703,7 @@ void pr_byte(FILE* fp, uchar b) } int selects3_selectnext(selects3 *select, int i) { - //return selects3_select(select,selects3_rank(select,i)+1); + //return selects3_select(select,selects3_rank(select,i)+1); int d,x,w,y; int r,j; int z,ii; @@ -792,11 +729,12 @@ int selects3_selectnext(selects3 *select, int i) { w = __getbits(q,xoffset,d); if (w >= j) { - bool t1 = (w == j); - bool t2 = (__getbit2(select->hi,((y << 3)+r))); - if (t2) k2 += (t1); - x += t1; - r += t1; + if (w == j) { + if (__getbit2(select->hi,((y << 3)+r))) k2 ++; + x++; + r++; + xoffset += d; + }; break; }; @@ -814,24 +752,28 @@ int selects3_selectnext(selects3 *select, int i) { if(x==select->m) return (uint)-1; - int c=(y << 3)+r; - unsigned int mask = 0x00ffffff; - unsigned int tmp = (select->hi[y] << (24+r)) | mask; - unsigned int c_incr = __builtin_clz(tmp); + int c = (y << 3)+r; + + uint mask = ~(0xffu << (8*sizeof(uint) - 8)); + uint tmp = (select->hi[y] << ((8*sizeof(uint) - 8)+r)) | mask; + uint c_incr = __builtin_clz(tmp); c += (c_incr & 7); if (c_incr == 8) { c += (8-r); - int pp = c >> 3; - while(select->hi[pp]==0) { + + uchar * pp = &(select->hi[c >> 3]); + + while(*pp == 0) { pp++; c += 8; }; - while(!__getbit2(select->hi,c)) c++; + + c += __builtin_clz(*pp) - 24; }; - c -= (k2); + c -= k2; - return __getbits(q,x*d,d)+((c)<d; q = select->low; - ii = i>>d; + ii = i >> d; y = selectd2_select0(select->sd0, ii)+1; - // selectd2_select2(select->sd0,ii,0,&y1,&y2); - //y1++; y2++; - //printf("y %d y1 %d %d\n",y,y1,y2-y1); x = y - ii; - j = i - (ii<>= 3; z = select->hi[y]; xoffset = x * d; + while (1) { - if (((z << r) & 0x80) == 0) break; + if (((z << r) & 0x80) == 0) return x; + w = __getbits(q, xoffset, d); - if (w >= j) { - x += (w == j); - break; - } + + if (w >= j) return x + (w == j); + x++; r++; xoffset += d; @@ -875,7 +815,5 @@ int selects3_rank(selects3 *select, int i) { z = select->hi[y]; } } - - return x; } diff --git a/src/static_bitsequence/sdarray.h b/src/static_bitsequence/sdarray.h index fe01200..de86c0c 100644 --- a/src/static_bitsequence/sdarray.h +++ b/src/static_bitsequence/sdarray.h @@ -25,10 +25,12 @@ typedef struct { uchar *hi; uint *low; selectd2 *sd0,*sd1; - uint hi_len, low_len; - //uint lasti, lasts; + uint hi_len, low_len; + //uint lasti, lasts; } selects3; +extern unsigned int selecttbl[8*256]; + int selects3_construct(selects3 *select, int n, uint *buf); int selects3_select(selects3 *select, int i); int selects3_rank(selects3 *select, int i); diff --git a/src/static_bitsequence/static_bitsequence.h b/src/static_bitsequence/static_bitsequence.h index d8e3f18..1bcb0b4 100644 --- a/src/static_bitsequence/static_bitsequence.h +++ b/src/static_bitsequence/static_bitsequence.h @@ -36,30 +36,30 @@ /** Base class for static bitsequences, contains many abstract functions, so this can't * be instantiated. It includes base implementations for rank0, select0 and select1 based * on rank0. - * + * * @author Francisco Claude */ class static_bitsequence { - + public: virtual ~static_bitsequence() {}; /** Returns the number of zeros until position i */ virtual uint rank0(uint i); - /** Returns the position of the i-th zero + /** Returns the position of the i-th zero * @return (uint)-1 if i=0, len if i>num_zeros or the position */ virtual uint select0(uint i); /** Returns the number of ones until position i */ virtual uint rank1(uint i); - /** Returns the position of the i-th one + /** Returns the position of the i-th one * @return (uint)-1 if i=0, len if i>num_ones or the position */ virtual uint select1(uint i); - virtual uint select_next1(uint i); - virtual uint select_next0(uint i); + virtual uint select_next1(uint i); + virtual uint select_next0(uint i); /** Returns the i-th bit */ virtual bool access(uint i); @@ -77,17 +77,17 @@ public: virtual uint size()=0; /** Stores the bitmap given a file pointer, return 0 in case of success */ - virtual int save(FILE * fp)=0; - + virtual int save(FILE * fp)=0; + /** Reads a bitmap determining the type */ static static_bitsequence * load(FILE * fp); - + protected: /** Length of the bitstring */ uint len; /** Number of ones in the bitstring */ - uint ones; - + uint ones; + }; #include diff --git a/src/static_bitsequence/static_bitsequence_sdarray.cpp b/src/static_bitsequence/static_bitsequence_sdarray.cpp index 8ce9164..df93b37 100644 --- a/src/static_bitsequence/static_bitsequence_sdarray.cpp +++ b/src/static_bitsequence/static_bitsequence_sdarray.cpp @@ -19,35 +19,20 @@ static_bitsequence_sdarray::static_bitsequence_sdarray(uint * buff, uint len) { delete [] tmp_seq; } +static_bitsequence_sdarray::static_bitsequence_sdarray(uint * buff, uint len, uint ones_) { -static_bitsequence_sdarray::static_bitsequence_sdarray() {make___selecttbl();} - -static_bitsequence_sdarray::~static_bitsequence_sdarray() { + ones = ones_; if(ones) - selects3_free(&sd); + selects3_construct(&sd,len,buff); + this->len = len; } -uint static_bitsequence_sdarray::rank1(uint i) { - if(i>=len) return -1; - if(ones) - return selects3_rank(&sd,i); - else - return 0; -} - +static_bitsequence_sdarray::static_bitsequence_sdarray() {make___selecttbl();} -uint static_bitsequence_sdarray::select1(uint i) { - if(i>ones || i==0) return -1; +static_bitsequence_sdarray::~static_bitsequence_sdarray() { if(ones) - return selects3_select(&sd,i); - else - return (uint)-1; -} - - -uint static_bitsequence_sdarray::select_next1(uint i) { - return selects3_selectnext(&sd,i); + selects3_free(&sd); } @@ -80,3 +65,7 @@ static_bitsequence_sdarray * static_bitsequence_sdarray::load(FILE * fp) { } return ret; } + +uint static_bitsequence_sdarray::select1(uint i) { return this->select(i); } +uint static_bitsequence_sdarray::rank1(uint i) { return this->rank(i); } +uint static_bitsequence_sdarray::select_next1(uint i) { return this->select_next(i); } diff --git a/src/static_bitsequence/static_bitsequence_sdarray.h b/src/static_bitsequence/static_bitsequence_sdarray.h index f8944d4..2e739db 100644 --- a/src/static_bitsequence/static_bitsequence_sdarray.h +++ b/src/static_bitsequence/static_bitsequence_sdarray.h @@ -7,22 +7,41 @@ #include class static_bitsequence_sdarray: public static_bitsequence { - public: + protected: + selects3 sd; + static_bitsequence_sdarray(); + +public: static_bitsequence_sdarray(uint * buff, uint len); - virtual ~static_bitsequence_sdarray(); + static_bitsequence_sdarray(uint * buff, uint len, uint); + ~static_bitsequence_sdarray(); + virtual uint select1(uint i); virtual uint rank1(uint i); virtual uint select_next1(uint i); - virtual uint size(); - virtual int save(FILE * fp); - static static_bitsequence_sdarray * load(FILE * fp); + inline uint select(uint i) { + if(i>ones || i==0) return -1; + if(ones) + return selects3_select(&this->sd,i); + else + return (uint)-1; + } - uint select_next1_unsafe(uint i){ - return selects3_selectnext(&sd,i); - }; - protected: - selects3 sd; - static_bitsequence_sdarray(); + inline uint rank(uint i) { + if(i>=len) return -1; + if(ones) + return selects3_rank(&this->sd, i); + else + return 0; + } + + inline uint select_next(uint i) { + return selects3_selectnext(&this->sd,i); + } + + uint size(); + int save(FILE * fp); + static static_bitsequence_sdarray * load(FILE * fp); }; diff --git a/src/static_sequence/static_sequence_bs.h b/src/static_sequence/static_sequence_bs.h index fe023d0..e19dfd1 100644 --- a/src/static_sequence/static_sequence_bs.h +++ b/src/static_sequence/static_sequence_bs.h @@ -50,12 +50,6 @@ public: /** Reads a bitmap determining the type */ static static_sequence_bs * load(FILE * fp); - - uint select_next_unsafe(uint c, uint i){ - static_bitsequence * bs = bitmaps[c]; - static_bitsequence_sdarray * sd = reinterpret_cast(bs); - return sd->select_next1_unsafe(i); - }; protected: uint sigma;