X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2Flibbp.git;a=blobdiff_plain;f=darray.c;fp=darray.c;h=890de4522a6224a7d299da93a69852e214315153;hp=0000000000000000000000000000000000000000;hb=b94f8d72735df125b191bf5a49cba0c037278787;hpb=657b1bc3dc283b45b9cceab5f1825a06496146cd diff --git a/darray.c b/darray.c new file mode 100644 index 0000000..890de45 --- /dev/null +++ b/darray.c @@ -0,0 +1,682 @@ +#include +#include +#include "darray.h" +#include "utils.h" + +//typedef unsigned char byte; +//typedef unsigned short word; +//typedef unsigned int dword; + +//typedef dword pb; +//#define logD 5 + +#define PBS (sizeof(pb)*8) +#define D (1<> logD; + l = i & (D-1); + B[j] &= (~(1<<(D-1-l))); + return 0; +} + +int setbit_one(pb *B, int i) +{ + int j,l; + j = i >> logD; + l = i & (D-1); + B[j] |= (1<<(D-1-l)); + return 1; + +} + + + +int setbits(pb *B, int i, int d, int x) +{ + int j; + + for (j=0; j>(d-j-1))&1); + } + return x; +} + +int getbit(pb *B, int i) +{ + int j,l; + + //j = i / D; + //l = i % D; + j = i >> logD; + l = i & (D-1); + return (B[j] >> (D-1-l)) & 1; +} + +dword getbits(pb *B, int i, int d) +{ + dword j,x; + + x = 0; + for (j=0; j>(k-1-j))); + } + //printf("getpattern(%d) = %d\n",i,x); + return x; +} + + +static const unsigned int popCount[] = { +0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4, +1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, +1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, +2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, +1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, +2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, +2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, +3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, +1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, +2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, +2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, +3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, +2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, +3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, +3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, +4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8 +}; + +static int selecttbl[8*256]; + +void make_selecttbl(void) +{ + int i,x,r; + pb buf[1]; + + for (x = 0; x < 256; x++) { + setbits(buf,0,8,x); + for (r=0; r<8; r++) selecttbl[(r<<8)+x] = -1; + r = 0; + for (i=0; i<8; i++) { + if (getbit(buf,i)) { + selecttbl[(r<<8)+x] = i; + r++; + } + } + } +} + + +int darray_construct(darray *da, int n, pb *buf, int opt) +{ + int i,j,k,m; + int nl; + int p,pp; + int il,is,ml,ms; + int r,m2; + dword *s; + int p1,p2,p3,p4,s1,s2,s3,s4; + + da->idx_size = 0; + + make_selecttbl(); + + + if (L/LLL == 0) { + printf("ERROR: L=%d LLL=%d\n",L,LLL); + exit(1); + } + + m = 0; + for (i=0; in = n; + da->m = m; + //printf("n=%d m=%d\n",n,m); + + da->buf = buf; + + if (opt & (~OPT_NO_RANK)) { // construct select table +#if 0 + mymalloc(s,m,0); + m = 0; + for (i=0; ilp,nl+1,1); da->idx_size += (nl+1)*sizeof(*da->lp); + mymalloc(da->p,nl+1,1); da->idx_size += (nl+1)*sizeof(*da->p); +#if 0 + printf("lp table: %d bytes (%1.2f bpc)\n",(nl+1)*sizeof(*da->lp), (double)(nl+1)*sizeof(*da->lp) * 8/n); + printf("p table: %d bytes (%1.2f bpc)\n",(nl+1)*sizeof(*da->p), (double)(nl+1)*sizeof(*da->p) * 8/n); +#endif + + for (r = 0; r < 2; r++) { + s1 = s2 = s3 = s4 = 0; + p1 = p2 = p3 = p4 = -1; + + ml = ms = 0; + for (il = 0; il < nl; il++) { + //pp = s[il*L]; + while (s1 <= il*L) { + if (getbit(buf,p1+1)) s1++; + p1++; + } + pp = p1; + da->lp[il] = pp; + i = min((il+1)*L-1,m-1); + //p = s[i]; + while (s2 <= i) { + if (getbit(buf,p2+1)) s2++; + p2++; + } + p = p2; + //printf("%d ",p-pp); + if (p - pp >= LL) { + if (r == 1) { + for (is = 0; is < L; is++) { + if (il*L+is >= m) break; + //da->sl[ml*L+is] = s[il*L+is]; + while (s3 <= il*L+is) { + if (getbit(buf,p3+1)) s3++; + p3++; + } + da->sl[ml*L+is] = p3; + } + } + da->p[il] = -(ml+1); + ml++; + } else { + if (r == 1) { + for (is = 0; is < L/LLL; is++) { + if (il*L+is*LLL >= m) break; + while (s4 <= il*L+is*LLL) { + if (getbit(buf,p4+1)) s4++; + p4++; + } + //da->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp; + da->ss[ms*(L/LLL)+is] = p4 - pp; + } + } + da->p[il] = ms; + ms++; + } + } + if (r == 0) { + mymalloc(da->sl,ml*L+1,1); da->idx_size += (ml*L+1)*sizeof(*da->sl); + mymalloc(da->ss,ms*(L/LLL)+1,1); da->idx_size += (ms*(L/LLL)+1)*sizeof(*da->ss); +#if 0 + printf("sl table: %d bytes (%1.2f bpc)\n",(ml*L+1)*sizeof(*da->sl), (double)(ml*L+1)*sizeof(*da->sl) * 8/n); + printf("ss table: %d bytes (%1.2f bpc)\n",(ms*(L/LLL)+1)*sizeof(*da->ss), (double)(ms*(L/LLL)+1)*sizeof(*da->ss) * 8/n); +#endif + } + } + //free(s); + } else { // no select table + da->lp = NULL; + da->p = da->sl = NULL; + da->ss = NULL; + } + + // construct rank table + + if ((opt & OPT_NO_RANK) == 0) { + mymalloc(da->rl,n/R1+2,1); da->idx_size += (n/R1+2)*sizeof(*da->rl); + mymalloc(da->rm,n/RR+2,1); da->idx_size += (n/RR+2)*sizeof(*da->rm); + mymalloc(da->rs,n/RRR+2,1); da->idx_size += (n/RRR+2)*sizeof(*da->rs); +#if 0 + printf("rl table: %d bytes (%1.2f bpc)\n",(n/R1+2)*sizeof(*da->rl), (double)(n/R1+2)*sizeof(*da->rl) * 8/n); + printf("rm table: %d bytes (%1.2f bpc)\n",(n/RR+2)*sizeof(*da->rm), (double)(n/RR+2)*sizeof(*da->rm) * 8/n); + printf("rs table: %d bytes (%1.2f bpc)\n",(n/RRR+2)*sizeof(*da->rs), (double)(n/RRR+2)*sizeof(*da->rs) * 8/n); +#endif + r = 0; + for (k=0; k<=n; k+=R1) { + da->rl[k/R1] = r; + m2 = 0; + for (i=0; irm[(k+i)/RR] = m2; + m = 0; + for (j=0; jrs[(k+i+j)/RRR] = m; + m2 += m; + m = 0; + } + } + if (m != 0) { + printf("???\n"); + } + //m2 += m; + } + r += m2; + } + } + + return 0; +} + +// destroyDarray: frees the memory of darray +// Added by Diego Arroyuelo +void destroyDarray(darray *da) { + if (!da) return; // nothing to free + + if (da->buf) free(da->buf); + if (da->lp) free(da->lp); + if (da->sl) free(da->sl); + if (da->ss) free(da->ss); + if (da->p) free(da->p); + if (da->rl) free(da->rl); + if (da->rm) free(da->rm); + if (da->rs) free(da->rs); +} + +int darray_rank0(darray *da, int i) +{ + int r,j; + pb *p; + +#if (RRR == D*2) + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + j = i & (RRR-1); + if (j < D) r += popcount(*p >> (D-1-j)); + else r += popcount(*p) + popcount(p[1] >> (D-1-(j-D))); +#else + + j = i & (RRR-1); + if (j < RRR/2) { + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + while (j >= D) { + r += popcount(*p++); + j -= D; + } + r += popcount(*p >> (D-1-j)); + } else { + j = RRR-1 - (i & (RRR-1)); + i += j+1; + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + while (j >= D) { + r -= popcount(*--p); + j -= D; + } + if (j > 0) r -= popcount(*--p << (D-j)); + } + +#endif + + return r; +} + +int darray_rank(darray *da, int i) +{ + int r,j,i_rr, i_rrr; + pb *p; + i_rr = i >> logRR; + i_rrr = i >> logRRR; + r = da->rl[i>>logR] + da->rm[i_rr]; + + j = (i_rrr) & (RR/RRR-1); + while (j > 0) { + r += da->rs[((i_rr)<<(logRR-logRRR))+j-1]; + j--; + } + + p = da->buf + ((i_rrr)<<(logRRR-logD)); + j = i & (RRR-1); + while (j >= D) { + r += popcount(*p++); + j -= D; + } + r += popcount(*p >> (D-1-j)); + + return r; +} + +int darray_select_bsearch(darray *da, int i, pb (*getpat)(pb *)) +{ + int j; + int l,r,m,n; + int s; + int t,x,rr; + pb *p,w; + + // for debug + //s = darray_select(da,i,1); + // + //printf("select(%d)=%d\n",i,s); + + + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + } + + i--; + + + + n = da->m; + + t = i; + + l = 0; r = (n-1)>>logR; + // find the smallest index x s.t. rl[x] >= t + while (l < r) { + m = (l+r)/2; + //printf("m=%d rl[m+1]=%d t=%d\n",m,da->rl[m+1],t); + if (da->rl[m+1] > t) { // m+1 is out of range + r = m; // new r = m >= l + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rl[x]; + + x <<= logR; + + l = x >> logRR; r = (min(x+R1-1,n))>>logRR; + while (l < r) { + m = (l+r)/2; + //printf("m=%d rm[m+1]=%d t=%d\n",m,da->rm[m+1],t); + if (da->rm[m+1] > t) { // m+1 is out of range + r = m; + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rm[x]; + + x <<= logRR; + +#if 0 + l = x >> logRRR; r = (min(x+RR-1,n))>>logRRR; + while (l < r) { + m = (l+r)/2; + //printf("m=%d rs[m+1]=%d t=%d\n",m,da->rs[m+1],t); + if (da->rs[m+1] > t) { // m+1 is out of range + r = m; + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rs[x]; +#else + l = x >> logRRR; + while (t > da->rs[l]) { + t -= da->rs[l]; + l++; + } + x = l; +#endif + + x <<= logRRR; + + p = &da->buf[x >> logD]; + while (1) { + m = popcount(getpat(p)); + if (m > t) break; + t -= m; + x += D; + p++; + } + + w = getpat(p); + while (1) { + rr = popCount[w >> (D-8)]; + if (rr > t) break; + t -= rr; + x += 8; + w <<= 8; + } + x += selecttbl[((t-0)<<8)+(w>>(D-8))]; + +#if 0 + if (x != s) { + printf("error x=%d s=%d\n",x,s); + } +#endif + return x; +} + +int darray_pat_rank(darray *da, int i, pb (*getpat)(pb *)) +{ + int r,j; + pb *p; + + r = da->rl[i>>logR] + da->rm[i>>logRR]; + j = (i>>logRRR) & (RR/RRR-1); + while (j > 0) { + r += da->rs[((i>>logRR)<<(logRR-logRRR))+j-1]; + j--; + } + + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + j = i & (RRR-1); + while (j >= D) { + r += popcount(getpat(p)); + p++; + j -= D; + } + r += popcount(getpat(p) >> (D-1-j)); + + return r; +} + + +int darray_select(darray *da, int i,int f) +{ + int p,r; + int il; + int rr; + pb x; + pb *q; + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + //printf("ERROR: m=%d i=%d\n",da->m,i); + //exit(1); + } + + i--; + + il = da->p[i>>logL]; + if (il < 0) { + il = -il-1; + p = da->sl[(il<lp[i>>logL]; + p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + r = i - (i & (LLL-1)); + + q = &(da->buf[p>>logD]); + + if (f == 1) { + rr = p & (D-1); + r -= popcount(*q >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(*q); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = *q; + while (1) { + //rr = popcount(x >> (D-8)); + //rr = popcount(x >> (D-8)); + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } else { + rr = p & (D-1); + r -= popcount((~(*q)) >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(~(*q)); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = ~(*q); + + while (1) { + //rr = popcount(x >> (D-8)); + //rr = popCount[x >> (D-8)]; + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } + } + return p; +} + +int darray_pat_select(darray *da, int i, pb (*getpat)(pb *)) +{ + int p,r; + int il; + int rr; + pb x; + pb *q; + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + //printf("ERROR: m=%d i=%d\n",da->m,i); + //exit(1); + } + + i--; + + il = da->p[i>>logL]; + if (il < 0) { + il = -il-1; + p = da->sl[(il<lp[i>>logL]; + p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + r = i - (i & (LLL-1)); + + q = &(da->buf[p>>logD]); + + rr = p & (D-1); + r -= popcount(getpat(q) >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(getpat(q)); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = getpat(q); + while (1) { + //rr = popcount(x >> (D-8)); + //rr = popCount[x >> (D-8)]; + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } + return p; +} + +int darray_pat_construct(darray *da, int n, pb *buf, int k, pb pat, int opt) +{ + int i; + pb *b; + mymalloc(b,(n+D-1)/D,0); + + for (i=0; ibuf = buf; + + free(b); + + return 0; +}