X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2Flibbp.git;a=blobdiff_plain;f=bp-darray.c;fp=bp-darray.c;h=c3c06a613239ca119cbb8816e4e42e2a1a7277a8;hp=0000000000000000000000000000000000000000;hb=45ff7a2260f890f6ef6a7b56f654ffa1a057a7e7;hpb=b3b328570c63aea4d9d15854eec9f7434cc9fe0d diff --git a/bp-darray.c b/bp-darray.c new file mode 100644 index 0000000..c3c06a6 --- /dev/null +++ b/bp-darray.c @@ -0,0 +1,593 @@ +#include +#include +#include "bp-darray.h" +#include "bp-utils.h" + +#define PBS (sizeof(pb)*8) +#define D (1< _b ? _a : _b; }) + + +#define min(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a <= _b ? _a : _b; }) + + + + +static dword getbits(pb *B, int i, int d) +{ + dword j,x; + + x = 0; + for (j=0; j>(k-1-j))); + } + //printf("getpattern(%d) = %d\n",i,x); + return x; +} + +static int selecttbl[8*256]; +static int selecttbl_init = 0; + +static void make_selecttbl(void) +{ + int i,x,r; + pb buf[1]; + if (selecttbl_init) return; + + selecttbl_init = 1; + + for (x = 0; x < 256; x++) { + bp_setbits(buf,0,8,x); + for (r=0; r<8; r++) selecttbl[(r<<8)+x] = -1; + r = 0; + for (i=0; i<8; i++) { + if (bp_getbit(buf,i)) { + selecttbl[(r<<8)+x] = i; + r++; + } + } + } +} + + +darray * bp_darray_construct(int n, pb *buf, int opt) +{ + int i,j,k,m; + int nl; + int p,pp; + int il,is,ml,ms; + int r,m2; + int p1,p2,p3,p4,s1,s2,s3,s4; + darray * da; + + bp_xmalloc(da, 1); + + da->idx_size = 0; + + make_selecttbl(); + + + if (L/LLL == 0) { + printf("ERROR: L=%d LLL=%d\n",L,LLL); + exit(1); + } + + m = 0; + for (i=0; in = n; + da->m = m; + //printf("n=%d m=%d\n",n,m); + + da->buf = buf; + + if (opt & (~OPT_NO_RANK)) { // construct select table + + nl = (m-1) / L + 1; + bp_xmalloc(da->lp, nl+1); + da->idx_size += (nl+1) * sizeof(*da->lp); + bp_xmalloc(da->p, nl+1); + da->idx_size += (nl+1) * sizeof(*da->p); + + for (r = 0; r < 2; r++) { + s1 = s2 = s3 = s4 = 0; + p1 = p2 = p3 = p4 = -1; + + ml = ms = 0; + for (il = 0; il < nl; il++) { + + while (s1 <= il*L) { + if (bp_getbit(buf,p1+1)) s1++; + p1++; + } + pp = p1; + da->lp[il] = pp; + i = min((il+1)*L-1,m-1); + + while (s2 <= i) { + if (bp_getbit(buf,p2+1)) s2++; + p2++; + } + p = p2; + + if (p - pp >= LL) { + if (r == 1) { + for (is = 0; is < L; is++) { + if (il*L+is >= m) break; + + while (s3 <= il*L+is) { + if (bp_getbit(buf,p3+1)) s3++; + p3++; + } + da->sl[ml*L+is] = p3; + } + } + da->p[il] = -(ml+1); + ml++; + } else { + if (r == 1) { + for (is = 0; is < L/LLL; is++) { + if (il*L+is*LLL >= m) break; + while (s4 <= il*L+is*LLL) { + if (bp_getbit(buf,p4+1)) s4++; + p4++; + } + + da->ss[ms*(L/LLL)+is] = p4 - pp; + } + } + da->p[il] = ms; + ms++; + } + } + if (r == 0) { + bp_xmalloc(da->sl,ml*L+1); + da->idx_size += (ml*L+1) * sizeof(*da->sl); + bp_xmalloc(da->ss, ms*(L/LLL)+1); + da->idx_size += (ms*(L/LLL)+1) * sizeof(*da->ss); + } + } + + } else { // no select table + da->lp = NULL; + da->p = da->sl = NULL; + da->ss = NULL; + } + + // construct rank table + + if ((opt & OPT_NO_RANK) == 0) { + bp_xmalloc(da->rl,n/R1+2); + da->idx_size += (n/R1+2) * sizeof(*da->rl); + bp_xmalloc(da->rm,n/RR+2); + + da->idx_size += (n/RR+2) * sizeof(*da->rm); + + bp_xmalloc(da->rs,n/RRR+2); + da->idx_size += (n/RRR+2) * sizeof(*da->rs); + + r = 0; + for (k=0; k<=n; k+=R1) { + da->rl[k/R1] = r; + m2 = 0; + for (i=0; irm[(k+i)/RR] = m2; + m = 0; + for (j=0; jrs[(k+i+j)/RRR] = m; + m2 += m; + m = 0; + } + } + if (m != 0) { + printf("???\n"); + } + } + r += m2; + } + } + + return da; +} + + +void bp_darray_free(darray *da) { + + if (!da) return; + if (da->buf) bp_free(da->buf); + if (da->lp) bp_free(da->lp); + if (da->sl) bp_free(da->sl); + if (da->ss) bp_free(da->ss); + if (da->p) bp_free(da->p); + if (da->rl) bp_free(da->rl); + if (da->rm) bp_free(da->rm); + if (da->rs) bp_free(da->rs); + bp_free(da); + +} + +static int darray_rank0(darray *da, int i) +{ + int r,j; + pb *p; + +#if (RRR == D*2) + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + j = i & (RRR-1); + if (j < D) r += popcount(*p >> (D-1-j)); + else r += popcount(*p) + popcount(p[1] >> (D-1-(j-D))); +#else + + j = i & (RRR-1); + if (j < RRR/2) { + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + while (j >= D) { + r += popcount(*p++); + j -= D; + } + r += popcount(*p >> (D-1-j)); + } else { + j = RRR-1 - (i & (RRR-1)); + i += j+1; + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + while (j >= D) { + r -= popcount(*--p); + j -= D; + } + if (j > 0) r -= popcount(*--p << (D-j)); + } + +#endif + + return r; +} + +int bp_darray_rank(darray *da, int i) +{ + int r,j,i_rr, i_rrr; + pb *p; + i_rr = i >> logRR; + i_rrr = i >> logRRR; + r = da->rl[i>>logR] + da->rm[i_rr]; + + j = (i_rrr) & (RR/RRR-1); + while (j > 0) { + r += da->rs[((i_rr)<<(logRR-logRRR))+j-1]; + j--; + } + + p = da->buf + ((i_rrr)<<(logRRR-logD)); + j = i & (RRR-1); + while (j >= D) { + r += popcount(*p++); + j -= D; + } + r += popcount(*p >> (D-1-j)); + + return r; +} + +int bp_darray_select_bsearch(darray *da, int i, pb (*getpat)(pb *)) +{ + int j; + int l,r,m,n; + int s; + int t,x,rr; + pb *p,w; + + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + } + + i--; + + + + n = da->m; + + t = i; + + l = 0; r = (n-1)>>logR; + // find the smallest index x s.t. rl[x] >= t + while (l < r) { + m = (l+r)/2; + //printf("m=%d rl[m+1]=%d t=%d\n",m,da->rl[m+1],t); + if (da->rl[m+1] > t) { // m+1 is out of range + r = m; // new r = m >= l + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rl[x]; + + x <<= logR; + + l = x >> logRR; r = (min(x+R1-1,n))>>logRR; + while (l < r) { + m = (l+r)/2; + //printf("m=%d rm[m+1]=%d t=%d\n",m,da->rm[m+1],t); + if (da->rm[m+1] > t) { // m+1 is out of range + r = m; + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rm[x]; + + x <<= logRR; + +#if 0 + l = x >> logRRR; r = (min(x+RR-1,n))>>logRRR; + while (l < r) { + m = (l+r)/2; + //printf("m=%d rs[m+1]=%d t=%d\n",m,da->rs[m+1],t); + if (da->rs[m+1] > t) { // m+1 is out of range + r = m; + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rs[x]; +#else + l = x >> logRRR; + while (t > da->rs[l]) { + t -= da->rs[l]; + l++; + } + x = l; +#endif + + x <<= logRRR; + + p = &da->buf[x >> logD]; + while (1) { + m = popcount(getpat(p)); + if (m > t) break; + t -= m; + x += D; + p++; + } + + w = getpat(p); + while (1) { + rr = popcount8(w >> (D-8)); + if (rr > t) break; + t -= rr; + x += 8; + w <<= 8; + } + x += selecttbl[((t-0)<<8)+(w>>(D-8))]; + +#if 0 + if (x != s) { + printf("error x=%d s=%d\n",x,s); + } +#endif + return x; +} + +int bp_darray_pat_rank(darray *da, int i, pb (*getpat)(pb *)) +{ + int r,j; + pb *p; + + r = da->rl[i>>logR] + da->rm[i>>logRR]; + j = (i>>logRRR) & (RR/RRR-1); + while (j > 0) { + r += da->rs[((i>>logRR)<<(logRR-logRRR))+j-1]; + j--; + } + + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + j = i & (RRR-1); + while (j >= D) { + r += popcount(getpat(p)); + p++; + j -= D; + } + r += popcount(getpat(p) >> (D-1-j)); + + return r; +} + + +int bp_darray_select(darray *da, int i,int f) +{ + int p,r; + int il; + int rr; + pb x; + pb *q; + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + //printf("ERROR: m=%d i=%d\n",da->m,i); + //exit(1); + } + + i--; + + il = da->p[i>>logL]; + if (il < 0) { + il = -il-1; + p = da->sl[(il<lp[i>>logL]; + p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + r = i - (i & (LLL-1)); + + q = &(da->buf[p>>logD]); + + if (f == 1) { + rr = p & (D-1); + r -= popcount(*q >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(*q); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = *q; + while (1) { + //rr = popcount(x >> (D-8)); + //rr = popcount(x >> (D-8)); + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } else { + rr = p & (D-1); + r -= popcount((~(*q)) >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(~(*q)); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = ~(*q); + + while (1) { + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } + } + return p; +} + +int bp_darray_pat_select(darray *da, int i, pb (*getpat)(pb *)) +{ + int p,r; + int il; + int rr; + pb x; + pb *q; + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + //printf("ERROR: m=%d i=%d\n",da->m,i); + //exit(1); + } + + i--; + + il = da->p[i>>logL]; + if (il < 0) { + il = -il-1; + p = da->sl[(il<lp[i>>logL]; + p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + r = i - (i & (LLL-1)); + + q = &(da->buf[p>>logD]); + + rr = p & (D-1); + r -= popcount(getpat(q) >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(getpat(q)); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = getpat(q); + while (1) { + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } + return p; +} + + +darray * bp_darray_pat_construct(int n, pb *buf, int k, pb pat, int opt) +{ + int i; + pb *b; + darray *da; + + bp_xmalloc(b, (n+D-1)/D); + + for (i=0; ibuf = buf; + + bp_free(b); + + return da; +}