From: kim Date: Mon, 13 Feb 2012 21:48:19 +0000 (+0000) Subject: Big renaming. Uses the bp namespace everywhere X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2Flibbp.git;a=commitdiff_plain;h=45ff7a2260f890f6ef6a7b56f654ffa1a057a7e7 Big renaming. Uses the bp namespace everywhere * Prepend bp- to filenames * Prepend bp_ to function names * Make non exported function static * Make critical function static inline git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/bp@1214 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index ec8d5b2..542525a 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ CXXFLAGS= $(INC_FLAGS) $(OPT_FLAGS) CC=gcc -OBJECTS_BP=bp.o darray.o bpcore.o +OBJECTS_BP=bp.o bp-utils.o bp-darray.o bp-core.o LIB_BP=libbp.a all: depend $(LIB_BP) diff --git a/bp-core.c b/bp-core.c new file mode 100644 index 0000000..7b35966 --- /dev/null +++ b/bp-core.c @@ -0,0 +1,1036 @@ +#include "bp.h" +#include "bp-utils.h" + + +#define NOTFOUND -2 +#define CONTINUE -3 +#define END -4 +#define FOUND -5 + +#define SBid(i) ((i)>>logSB) +#define SBfirst(i) ((i) & (~(SB-1))) +#define SBlast(i) ((i) | (SB-1)) + +#define MBid(i) ((i)>>logMB) +#define MBfirst(i) ((i) & (~(MB-1))) +#define MBlast(i) ((i) | (MB-1)) +#define max(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) + + +#define min(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a <= _b ? _a : _b; }) + + +pb getpat_preorder(pb *b) +{ + return *b; +} + +pb getpat_postorder(pb *b) +{ + return ~(*b); +} + +pb getpat_leaf(pb *b) +{ + pb w1,w2,w; + w1 = b[0]; + w2 = (w1 << 1) + (b[1] >> (D-1)); + w = w1 & (~w2); + return w; +} + +pb getpat_inorder(pb *b) +{ + pb w1,w2,w; + w1 = b[0]; + w2 = (w1 << 1) + (b[1] >> (D-1)); + w = (~w1) & w2; + return w; +} + +pb getpat_dfuds_leaf(pb *b) +{ + pb w1,w2,w; + w1 = b[0]; + w2 = (w1 << 1) + (b[1] >> (D-1)); + w = (~w1) & (~w2); + return w; +} + + + +/////////////////////////////////////////// +// bp_depth(bp *b, int s) +// returns the depth of s +// The root node has depth 1 +/////////////////////////////////////////// +int bp_depth(bp *b, int s) +{ + int d; + if (s < 0) return 0; + d = 2 * bp_darray_rank(b->da,s) - (s+1); + return d; +} + +int fast_depth(bp *b, int s) +{ + int d; + darray *da; + int r,j; + + s++; + if ((s & (RRR-1)) != 0) { + //printf("fast_depth:warning s=%d\n",s); + return bp_depth(b,s); + } + da = b->da; + //d = 2 * (da->rl[s>>logR] + da->rm[s>>logRR] + da->rs[s>>logRRR]) - s; + + r = da->rl[s>>logR] + da->rm[s>>logRR]; + j = (s>>logRRR) & (RR/RRR-1); + while (j > 0) { + r += da->rs[((s>>logRR)<<(logRR-logRRR))+j-1]; + j--; + } + d = 2 * r - s; + + return d; +} + +int search_SB_r(bp *b, int i, int rel) +{ + int j,r,n,il; + pb *p,x,w; + + n = b->n; + il = min((SBid(i) + 1) << logSB,n); + p = &b->B[i>>logD]; + while (i0) { + w = (x >> (D-ETW)) & ((1<= -ETW && rel <= ETW) { + r = fwdtbl[((rel+ETW)<= n) return NOTFOUND; + return i+r; + } + } + r = min(j,ETW); + rel -= 2*popcount(w)-r; + x <<= r; + i += r; + j -= r; + } + } + return CONTINUE; +} + +int search_MB_r(bp *b, int i, int td) +{ + int il,d; + int m,M,n; + pb *B; + + B = b->B; + n = b->n; + + il = min((MBid(i) + 1) << logMB,n); + for ( ; i < il; i+=SB) { +#if (SB % RRR != 0) + d = bp_depth(b,i-1); +#else + d = fast_depth(b,i-1); +#endif + m = d + b->sm[SBid(i)] - SB; + M = d + b->sM[SBid(i)] - 1; + if (m <= td && td <= M) { + return search_SB_r(b,i,td-d); + } + } + if (i >= n) return NOTFOUND; + return CONTINUE; +} + +/////////////////////////////////////////// +// bp_fwd_excess(bp *b,int s, int rel) +// find the leftmost value depth(s)+rel to the right of s (exclusive) +/////////////////////////////////////////// +int bp_fwd_excess(bp *b,int s, int rel) +{ + int i,n; + int d,td; + int m,M; + int m_ofs; + pb *B; + n = b->n; B = b->B; + + i = s+1; + + d = search_SB_r(b,i,rel); + if (d >= NOTFOUND) return d; + + i = min((SBid(i) + 1) << logSB, n); + td = bp_depth(b,s) + rel; + d = search_MB_r(b,i,td); + if (d >= NOTFOUND) return d; + + m_ofs = b->m_ofs; + + i = MBid(s) + m_ofs; + + while (i > 0) { + if ((i&1) == 0) { + i++; + m = b->mm[i]; + M = b->mM[i]; + + if (m <= td && td <= M) { + while (i < m_ofs) { + i <<= 1; + m = b->mm[i]; + M = b->mM[i]; + if (!(m <= td && td <= M)) i++; + } + i -= m_ofs; + i <<= logMB; + + return search_MB_r(b,i,td); + }; + + } + i >>= 1; + } + return NOTFOUND; +} + + +int degree_SB_slow(bp *b, int i, int t, int rel, int *ans, int ith) +{ + int j,r,n,il; + pb *p,x,w,w2; + int d, deg, v; + + n = t; + il = min((SBid(i) + 1) << logSB,n); + d = deg = 0; + + while (i < il) { + if (bp_getbit(b->B,i)==OP) { + d++; + } else { + d--; + } + if (d < rel) { // reached the end + if (ith > 0) { + return NOTFOUND; + } else { + *ans = deg; + return END; + } + } + if (d == rel) { // found the same depth + deg++; + if (deg == ith) { + *ans = i; + return FOUND; + } + } + i++; + } + *ans = deg; + return CONTINUE; +} + +int degree_SB(bp *b, int i, int t, int rel, int *ans, int ith) +{ + int j,r,n,il; + pb *p,x,w,w2; + int d, deg, v; + int degtmp,itmp; + int ith2,d2; + + n = t; + il = min((SBid(i) + 1) << logSB,n); + d = deg = 0; + + p = &b->B[i>>logD]; + while (i < il) { + x = *p++; + j = i & (D-1); + x <<= j; + j = min(D-j,il-i); + while (j>0) { + w = (x >> (D-ETW)) & ((1< 0) { +#if 0 + for (r = 0; r < ETW; r++) { + if (w & 0x80) { + d++; + } else { + d--; + if (d < rel) break; + if (d == rel) { + ith--; + if (ith == 0) { + *ans = i + r; + return FOUND; + } + } + } + w <<= 1; + } + return NOTFOUND; +#else + r = childtbl2[rel-d+ETW][ith-1][w]; + if (r >= 0) { + *ans = i + r; + return FOUND; + } + return NOTFOUND; +#endif + } + r = ETW-1-minmaxtbl_i[0][w | w2]; + w2 = (1< 0) { + if (ith <= r) { + *ans = i + childtbl[((ith-1)<B; + n = t; + + il = min((MBid(i) + 1) << logMB,n); + deg = 0; + for ( ; i+SB-1 < il; i+=SB) { +#if (SB % RRR != 0) + d = bp_depth(b,i-1); +#else + d = fast_depth(b,i-1); +#endif + m = d + b->sm[SBid(i)] - SB; + if (m < td) { + r = degree_SB(b,i,n,td-d,°tmp,ith); + if (ith > 0) { + if (r == NOTFOUND) return NOTFOUND; + *ans = degtmp; + return FOUND; + } else { + *ans = deg + degtmp; + return END; + } + } + if (m == td) { + if (ith > 0) { + if (ith <= b->sd[SBid(i)]) break; + ith -= b->sd[SBid(i)]; + } + deg += b->sd[SBid(i)]; + } + } + if (i < il) { +#if (SB % RRR != 0) + d = bp_depth(b,i-1); +#else + d = fast_depth(b,i-1); +#endif + r = degree_SB(b,i,n,td-d,°tmp,ith); + if (ith > 0) { + if (r == NOTFOUND) return NOTFOUND; + if (r == FOUND) { + *ans = degtmp; + return FOUND; + } + } else { + deg += degtmp; + } + } + *ans = deg; + if (i >= n) return END; + return CONTINUE; +} + +static int partition_range(int s,int t) +{ + int i,j,h; + + printf("partition [%d,%d] => ",s,t); + h = 1; + while (s <= t) { + if (s & h) { + if (s+h-1 <= t) { + printf("[%d,%d] ",s,s+h-1); + s += h; + } + } else { + if (s+h > t) break; + } + h <<= 1; + } + while (h > 0) { + if (s+h-1 <= t) { + printf("[%d,%d] ",s,s+h-1); + s += h; + } + h >>= 1; + } + printf("\n"); +} + + + + +/////////////////////////////////////////// +// bp_fast_degree(bp *b,int s, int t, int ith) +// returns the number of children of s, to the left of t +// returns the position of (ith)-th child of s if (ith > 0) +/////////////////////////////////////////// +int bp_fast_degree(bp *b,int s, int t, int ith) +{ + int i,j,n; + int d,td; + int m,M; + int m_ofs; + pb *B; + int deg,degtmp; + int sm,tm,ss,h; + + n = t; + B = b->B; + + deg = 0; + + i = s+1; + if (i != SBfirst(i)) { + d = degree_SB(b,i,n,0,°tmp,ith); + if (ith > 0) { + if (d == NOTFOUND) return -1; + if (d == FOUND) return degtmp; + ith -= degtmp; + } + if (d == END) return degtmp; + deg += degtmp; + } + + td = bp_depth(b,s); + + i = SBid(i+SB-1) << logSB; + + if (i != MBfirst(i)) { + d = degree_MB(b,i,n,td,°tmp,ith); + if (ith > 0) { + if (d == NOTFOUND) return -1; + if (d == FOUND) return degtmp; + ith -= degtmp; + deg += degtmp; + } else { + deg += degtmp; + if (d == END) { + return deg; + } + } + } + +#if 0 + // sequential search + + sm = MBid(i+MB-1); + tm = MBid((n-1)+1)-1; // the rightmost MB fully contained in [0,n-1] + + m_ofs = b->m_ofs; + sm += m_ofs; tm += m_ofs; + for (i=sm; i<=tm; i++) { + if (b->mm[i] < td) { + break; + } + if (b->mm[i] == td) { + if (ith > 0) { + if (ith <= b->md[i]) break; + ith -= b->md[i]; + } + deg += b->md[i]; + } + } + ss = i - m_ofs; +#else + sm = MBid(i+MB-1); + tm = MBid((n-1)+1)-1; // the rightmost MB fully contained in [0,n-1] + + m_ofs = b->m_ofs; + sm += m_ofs; tm += m_ofs; + ss = sm; + + //partition_range(sm,tm); + + //printf("partition [%d,%d] => ",sm,tm); + h = 1; + while (sm <= tm) { + if (sm & h) { + if (sm+h-1 <= tm) { + //printf("[%d,%d] ",sm,sm+h-1); + j = sm / h; + if (b->mm[j] < td) { + h >>= 1; + break; + } + if (b->mm[j] == td) { + if (ith > 0) { + if (ith <= b->md[j]) { + h >>= 1; + break; + } + ith -= b->md[j]; + } + deg += b->md[j]; + } + sm += h; + } + } else { + if (sm+h > tm) break; + } + h <<= 1; + } + while (h > 0) { + if (sm+h-1 <= tm) { + //printf("[%d,%d] ",sm,sm+h-1); + j = sm / h; + if (ith > 0) { + if (b->mm[j] >= td) { + if (b->mm[j] == td) { + if (ith > b->md[j]) { + ith -= b->md[j]; + sm += h; + } else { + deg += b->md[j]; + } + } else { + sm += h; + } + } + } else { + if (b->mm[j] >= td) { + if (b->mm[j] == td) { + deg += b->md[j]; + } + sm += h; + } + } + } + h >>= 1; + } + //printf("\n"); + ss = sm; + + ss -= m_ofs; + +#endif + + ss <<= logMB; + + d = degree_MB(b,ss,n,td,°tmp,ith); + if (ith > 0) { + if (d == NOTFOUND) return -1; + if (d == FOUND) return degtmp; + } + deg += degtmp; + if (d == END) return deg; + return deg; + + // unexpected (bug) + printf("degree: ???\n"); + return -99; + +} + +int search_SB_l(bp *b, int i, int rel) +{ + int j,r,il; + pb *p,x,w; + + il = SBfirst(i); + + p = &b->B[i>>logD]; + while (i>=il) { + x = *p--; + j = (i & (D-1))+1; + x >>= D-j; + while (j>0) { + w = x & ((1<= -ETW && rel <= ETW) { + r = bwdtbl[((rel+ETW)<>= r; + i -= r; + j -= r; + } + + } + if (i < 0) return NOTFOUND; + return CONTINUE; +} + +int search_MB_l(bp *b, int i, int td) +{ + int il,d; + int m,M; + pb *B; + +#if 0 + if (i % SB != SB-1) { + printf("search_MB_l:error!!! i=%d SB=%d\n",i,i%SB); + } +#endif + B = b->B; + + il = MBfirst(i); + for ( ; i >= il; i-=SB) { +#if (SB % RRR != 0) + d = bp_depth(b,i-SB); +#else + d = fast_depth(b,i-SB); +#endif + m = d + b->sm[SBid(i)] - SB; + M = d + b->sM[SBid(i)] - 1; + if (m <= td && td <= M) { +#if (SB % RRR != 0) + d = bp_depth(b,i); +#else + d = fast_depth(b,i); +#endif + if (d == td) return i; + return search_SB_l(b,i,td-d); + } + } + return CONTINUE; +} + +/////////////////////////////////////////// +// bwd_excess(bp *b,int s, int rel) +// find the rightmost value depth(s)+rel to the left of s (exclusive) +/////////////////////////////////////////// +int bp_bwd_excess(bp *b,int s, int rel) +{ + int i,n; + int d,td; + int m,M; + int m_ofs; + pb *B; + n = b->n; B = b->B; + + i = s; + d = search_SB_l(b,i,rel); + if (d >= NOTFOUND) return d; + + i = SBfirst(i) -1; + + td = bp_depth(b,s) + rel; + + d = search_MB_l(b,i,td); + if (d >= NOTFOUND) return d; + + m_ofs = b->m_ofs; + i = (s>>logMB) + m_ofs; + while (i > 0) { + if ((i&1) == 1) { + i--; + m = b->mm[i]; + M = b->mM[i]; + if (m <= td && td <= M) break; + } + i >>= 1; + } + if (i == 0) { + if (td == 0) return -1; + else return NOTFOUND; + } + + while (i < m_ofs) { + i = i*2 + 1; + m = b->mm[i]; + M = b->mM[i]; + if (!(m <= td && td <= M)) i--; + } + i -= m_ofs; + i = ((i+1)<= NOTFOUND) return d; + + // unexpected (bug) + printf("bwd_excess: ???\n"); + return -99; + +} + +int rmq_SB(bp *b, int s, int t, int opt, int *dm) +{ + int i,d; + int is,ds; + pb *p,x,w,w2; + int j,v,r; + int lr; + int op; + + lr = 0; if (opt & OPT_RIGHT) lr = 1; + op = opt & (OPT_RIGHT | OPT_MAX); + + is = s; ds = d = 0; + i = s+1; + +#if SB >= ETW + p = &b->B[i>>logD]; + while (i <= t) { + x = *p++; + j = i & (D-1); + x <<= j; + j = min(D-j,t-i+1); + while (j>0) { + w = (x >> (D-ETW)) & ((1< ds) { + ds = d + v; + is = i + minmaxtbl_i[op][w & (~w2)]; + } + } else { + v = minmaxtbl_v[op][w | w2]; + if (d + v < ds +lr) { + ds = d + v; + is = i + minmaxtbl_i[op][w | w2]; + } + } + + r = min(j,ETW); + d += 2*popcount(w)-r; + x <<= r; + i += r; + j -= r; + } + } +#else + while (i <= t) { + if (bp_getbit(b->B,i)==OP) { + d++; + if (op & OPT_MAX) { + if (d + lr > ds) { + ds = d; is = i; + } + } + } else { + d--; + if (!(op & OPT_MAX)) { + if (d < ds + lr) { + ds = d; is = i; + } + } + } + i++; + } +#endif + *dm = ds; + return is; +} + +int rmq_MB(bp *b, int s, int t, int opt, int *dm) +{ + int i,d,m; + int mi,md; + int lr; + + lr = 0; if (opt & OPT_RIGHT) lr = 1; + + md = *dm; mi = -1; + for (i = s; i <= t; i++) { +#if (SB % RRR != 0) + d = bp_depth(b,(i<sM[i] - 1; + if (m + lr > md) { + md = m; mi = i; + } + } else { + m = d + b->sm[i] - SB; + if (m < md + lr) { + md = m; mi = i; + } + } + } + *dm = md; + return mi; +} + + + + +/////////////////////////////////////////// +// bp_rmq(bp *b, int s, int t, int opt) +// returns the index of leftmost/rightmost minimum/maximum value +// in the range [s,t] (inclusive) +// returns the leftmost minimum if opt == 0 +// the rightmost one if (opt & OPT_RIGHT) +// the maximum if (opt & OPT_MAX) +/////////////////////////////////////////// +int bp_rmq(bp *b, int s, int t, int opt) +{ + int ss, ts; // SB index of s and t + int sm, tm; // MB index of s and t + int ds; // current min value + int is; // current min index + int ys; // type of current min index + // 0: is is the index of min + // 1: is is the SB index + // 2: is is the MB index + int m_ofs; + int i,j,il,d,n; + int dm; + int lr; + + lr = 0; if (opt & OPT_RIGHT) lr = 1; + + n = b->n; + if (s < 0 || t >= n || s > t) { + printf("rmq: error s=%d t=%d n=%d\n",s,t,n); + return -1; + } + if (s == t) return s; + + + //////////////////////////////////////////////////////////// + // search the SB of s + //////////////////////////////////////////////////////////// + + il = min(SBlast(s),t); + is = rmq_SB(b,s,il,opt,&dm); + if (il == t) { // scan reached the end of the range + return is; + } + ds = bp_depth(b,s) + dm; ys = 0; + + //////////////////////////////////////////////////////////// + // search the MB of s + //////////////////////////////////////////////////////////// + + ss = SBid(s) + 1; + il = min(SBid(MBlast(s)),SBid(t)-1); + dm = ds; + j = rmq_MB(b,ss,il,opt,&dm); + //if (dm < ds + lr) { + if (j >= 0) { + ds = dm; is = j; ys = 1; + } + + //////////////////////////////////////////////////////////// + // search the tree + //////////////////////////////////////////////////////////// + + sm = MBid(s) + 1; + tm = MBid(t) - 1; + +#if 0 + // sequential search + m_ofs = b->m_ofs; + sm += m_ofs; tm += m_ofs; + for (i=sm; i<=tm; i++) { + if (opt & OPT_MAX) { + if (b->mM[i] + lr > ds) { + ds = b->mM[i]; is = i - m_ofs; ys = 2; + } + } else { + if (b->mm[i] < ds + lr) { + ds = b->mm[i]; is = i - m_ofs; ys = 2; + } + } + } + +#else + if (sm <= tm) { + int h; + h = blog(sm ^ tm); + + m_ofs = b->m_ofs; + sm += m_ofs; tm += m_ofs; + + if (opt & OPT_MAX) { + if (b->mM[sm] + lr > ds) { + ds = b->mM[sm]; is = sm; ys = 2; + } + for (i=0; i<=h-2; i++) { + j = sm>>i; + if ((j&1) == 0) { + if (b->mM[j+1] + lr > ds) { + ds = b->mM[j+1]; is = j+1; ys = 2; + } + } + } + for (i=h-2; i>=0; i--) { + j = tm>>i; + if ((j&1)==1) { + if (b->mM[j-1] + lr > ds) { + ds = b->mM[j-1]; is = j-1; ys = 2; + } + } + } + if (b->mM[tm] + lr > ds) { + ds = b->mM[tm]; is = tm; ys = 2; + } + if (ys == 2) { + while (is < m_ofs) { + is <<= 1; + if (b->mM[is+1] + lr > b->mM[is]) is++; + } + is -= m_ofs; + } + } else { // MIN + if (b->mm[sm] < ds + lr) { + ds = b->mm[sm]; is = sm; ys = 2; + } + for (i=0; i<=h-2; i++) { + j = sm>>i; + if ((j&1) == 0) { + if (b->mm[j+1] < ds + lr) { + ds = b->mm[j+1]; is = j+1; ys = 2; + } + } + } + for (i=h-2; i>=0; i--) { + j = tm>>i; + if ((j&1)==1) { + if (b->mm[j-1] < ds + lr) { + ds = b->mm[j-1]; is = j-1; ys = 2; + } + } + } + if (b->mm[tm] < ds + lr) { + ds = b->mm[tm]; is = tm; ys = 2; + } + if (ys == 2) { + while (is < m_ofs) { + is <<= 1; + if (b->mm[is+1] < b->mm[is] + lr) is++; + } + is -= m_ofs; + } + } + } + +#endif + + //////////////////////////////////////////////////////////// + // search the MB of t + //////////////////////////////////////////////////////////// + + + ts = max(SBid(MBfirst(t)),SBid(s)+1); + il = SBid(SBfirst(t)-1); + dm = ds; + j = rmq_MB(b,ts,il,opt,&dm); + //if (dm < ds + lr) { + if (j >= 0) { + ds = dm; is = j; ys = 1; + } + + //////////////////////////////////////////////////////////// + // search the SB of t + //////////////////////////////////////////////////////////// + + i = SBfirst(t); + j = rmq_SB(b,i,t,opt,&dm); + d = bp_depth(b,i) + dm; + if (opt & OPT_MAX) { + if (d + lr > ds) { + ds = d; is = j; ys = 0; + } + } else { + if (d < ds + lr) { + ds = d; is = j; ys = 0; + } + } + + //////////////////////////////////////////////////////////// + // search the rest + //////////////////////////////////////////////////////////// + + if (ys == 2) { + ss = SBid(is << logMB); + il = SBid(MBlast(is << logMB)); + if (opt & OPT_MAX) { + dm = -n-1; + } else { + dm = n+1; + } + j = rmq_MB(b,ss,il,opt,&dm); + ds = dm; is = j; ys = 1; + } + + if (ys == 1) { + ss = is << logSB; + il = SBlast(is << logSB); + is = rmq_SB(b,ss,il,opt,&dm); + //ds = bp_depth(b,ss) + dm; ys = 0; + } + + return is; +} + diff --git a/bp-darray.c b/bp-darray.c new file mode 100644 index 0000000..c3c06a6 --- /dev/null +++ b/bp-darray.c @@ -0,0 +1,593 @@ +#include +#include +#include "bp-darray.h" +#include "bp-utils.h" + +#define PBS (sizeof(pb)*8) +#define D (1< _b ? _a : _b; }) + + +#define min(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a <= _b ? _a : _b; }) + + + + +static dword getbits(pb *B, int i, int d) +{ + dword j,x; + + x = 0; + for (j=0; j>(k-1-j))); + } + //printf("getpattern(%d) = %d\n",i,x); + return x; +} + +static int selecttbl[8*256]; +static int selecttbl_init = 0; + +static void make_selecttbl(void) +{ + int i,x,r; + pb buf[1]; + if (selecttbl_init) return; + + selecttbl_init = 1; + + for (x = 0; x < 256; x++) { + bp_setbits(buf,0,8,x); + for (r=0; r<8; r++) selecttbl[(r<<8)+x] = -1; + r = 0; + for (i=0; i<8; i++) { + if (bp_getbit(buf,i)) { + selecttbl[(r<<8)+x] = i; + r++; + } + } + } +} + + +darray * bp_darray_construct(int n, pb *buf, int opt) +{ + int i,j,k,m; + int nl; + int p,pp; + int il,is,ml,ms; + int r,m2; + int p1,p2,p3,p4,s1,s2,s3,s4; + darray * da; + + bp_xmalloc(da, 1); + + da->idx_size = 0; + + make_selecttbl(); + + + if (L/LLL == 0) { + printf("ERROR: L=%d LLL=%d\n",L,LLL); + exit(1); + } + + m = 0; + for (i=0; in = n; + da->m = m; + //printf("n=%d m=%d\n",n,m); + + da->buf = buf; + + if (opt & (~OPT_NO_RANK)) { // construct select table + + nl = (m-1) / L + 1; + bp_xmalloc(da->lp, nl+1); + da->idx_size += (nl+1) * sizeof(*da->lp); + bp_xmalloc(da->p, nl+1); + da->idx_size += (nl+1) * sizeof(*da->p); + + for (r = 0; r < 2; r++) { + s1 = s2 = s3 = s4 = 0; + p1 = p2 = p3 = p4 = -1; + + ml = ms = 0; + for (il = 0; il < nl; il++) { + + while (s1 <= il*L) { + if (bp_getbit(buf,p1+1)) s1++; + p1++; + } + pp = p1; + da->lp[il] = pp; + i = min((il+1)*L-1,m-1); + + while (s2 <= i) { + if (bp_getbit(buf,p2+1)) s2++; + p2++; + } + p = p2; + + if (p - pp >= LL) { + if (r == 1) { + for (is = 0; is < L; is++) { + if (il*L+is >= m) break; + + while (s3 <= il*L+is) { + if (bp_getbit(buf,p3+1)) s3++; + p3++; + } + da->sl[ml*L+is] = p3; + } + } + da->p[il] = -(ml+1); + ml++; + } else { + if (r == 1) { + for (is = 0; is < L/LLL; is++) { + if (il*L+is*LLL >= m) break; + while (s4 <= il*L+is*LLL) { + if (bp_getbit(buf,p4+1)) s4++; + p4++; + } + + da->ss[ms*(L/LLL)+is] = p4 - pp; + } + } + da->p[il] = ms; + ms++; + } + } + if (r == 0) { + bp_xmalloc(da->sl,ml*L+1); + da->idx_size += (ml*L+1) * sizeof(*da->sl); + bp_xmalloc(da->ss, ms*(L/LLL)+1); + da->idx_size += (ms*(L/LLL)+1) * sizeof(*da->ss); + } + } + + } else { // no select table + da->lp = NULL; + da->p = da->sl = NULL; + da->ss = NULL; + } + + // construct rank table + + if ((opt & OPT_NO_RANK) == 0) { + bp_xmalloc(da->rl,n/R1+2); + da->idx_size += (n/R1+2) * sizeof(*da->rl); + bp_xmalloc(da->rm,n/RR+2); + + da->idx_size += (n/RR+2) * sizeof(*da->rm); + + bp_xmalloc(da->rs,n/RRR+2); + da->idx_size += (n/RRR+2) * sizeof(*da->rs); + + r = 0; + for (k=0; k<=n; k+=R1) { + da->rl[k/R1] = r; + m2 = 0; + for (i=0; irm[(k+i)/RR] = m2; + m = 0; + for (j=0; jrs[(k+i+j)/RRR] = m; + m2 += m; + m = 0; + } + } + if (m != 0) { + printf("???\n"); + } + } + r += m2; + } + } + + return da; +} + + +void bp_darray_free(darray *da) { + + if (!da) return; + if (da->buf) bp_free(da->buf); + if (da->lp) bp_free(da->lp); + if (da->sl) bp_free(da->sl); + if (da->ss) bp_free(da->ss); + if (da->p) bp_free(da->p); + if (da->rl) bp_free(da->rl); + if (da->rm) bp_free(da->rm); + if (da->rs) bp_free(da->rs); + bp_free(da); + +} + +static int darray_rank0(darray *da, int i) +{ + int r,j; + pb *p; + +#if (RRR == D*2) + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + j = i & (RRR-1); + if (j < D) r += popcount(*p >> (D-1-j)); + else r += popcount(*p) + popcount(p[1] >> (D-1-(j-D))); +#else + + j = i & (RRR-1); + if (j < RRR/2) { + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + while (j >= D) { + r += popcount(*p++); + j -= D; + } + r += popcount(*p >> (D-1-j)); + } else { + j = RRR-1 - (i & (RRR-1)); + i += j+1; + r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + while (j >= D) { + r -= popcount(*--p); + j -= D; + } + if (j > 0) r -= popcount(*--p << (D-j)); + } + +#endif + + return r; +} + +int bp_darray_rank(darray *da, int i) +{ + int r,j,i_rr, i_rrr; + pb *p; + i_rr = i >> logRR; + i_rrr = i >> logRRR; + r = da->rl[i>>logR] + da->rm[i_rr]; + + j = (i_rrr) & (RR/RRR-1); + while (j > 0) { + r += da->rs[((i_rr)<<(logRR-logRRR))+j-1]; + j--; + } + + p = da->buf + ((i_rrr)<<(logRRR-logD)); + j = i & (RRR-1); + while (j >= D) { + r += popcount(*p++); + j -= D; + } + r += popcount(*p >> (D-1-j)); + + return r; +} + +int bp_darray_select_bsearch(darray *da, int i, pb (*getpat)(pb *)) +{ + int j; + int l,r,m,n; + int s; + int t,x,rr; + pb *p,w; + + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + } + + i--; + + + + n = da->m; + + t = i; + + l = 0; r = (n-1)>>logR; + // find the smallest index x s.t. rl[x] >= t + while (l < r) { + m = (l+r)/2; + //printf("m=%d rl[m+1]=%d t=%d\n",m,da->rl[m+1],t); + if (da->rl[m+1] > t) { // m+1 is out of range + r = m; // new r = m >= l + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rl[x]; + + x <<= logR; + + l = x >> logRR; r = (min(x+R1-1,n))>>logRR; + while (l < r) { + m = (l+r)/2; + //printf("m=%d rm[m+1]=%d t=%d\n",m,da->rm[m+1],t); + if (da->rm[m+1] > t) { // m+1 is out of range + r = m; + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rm[x]; + + x <<= logRR; + +#if 0 + l = x >> logRRR; r = (min(x+RR-1,n))>>logRRR; + while (l < r) { + m = (l+r)/2; + //printf("m=%d rs[m+1]=%d t=%d\n",m,da->rs[m+1],t); + if (da->rs[m+1] > t) { // m+1 is out of range + r = m; + } else { + l = m+1; // new l = m+1 <= r + } + } + x = l; + t -= da->rs[x]; +#else + l = x >> logRRR; + while (t > da->rs[l]) { + t -= da->rs[l]; + l++; + } + x = l; +#endif + + x <<= logRRR; + + p = &da->buf[x >> logD]; + while (1) { + m = popcount(getpat(p)); + if (m > t) break; + t -= m; + x += D; + p++; + } + + w = getpat(p); + while (1) { + rr = popcount8(w >> (D-8)); + if (rr > t) break; + t -= rr; + x += 8; + w <<= 8; + } + x += selecttbl[((t-0)<<8)+(w>>(D-8))]; + +#if 0 + if (x != s) { + printf("error x=%d s=%d\n",x,s); + } +#endif + return x; +} + +int bp_darray_pat_rank(darray *da, int i, pb (*getpat)(pb *)) +{ + int r,j; + pb *p; + + r = da->rl[i>>logR] + da->rm[i>>logRR]; + j = (i>>logRRR) & (RR/RRR-1); + while (j > 0) { + r += da->rs[((i>>logRR)<<(logRR-logRRR))+j-1]; + j--; + } + + p = da->buf + ((i>>logRRR)<<(logRRR-logD)); + j = i & (RRR-1); + while (j >= D) { + r += popcount(getpat(p)); + p++; + j -= D; + } + r += popcount(getpat(p) >> (D-1-j)); + + return r; +} + + +int bp_darray_select(darray *da, int i,int f) +{ + int p,r; + int il; + int rr; + pb x; + pb *q; + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + //printf("ERROR: m=%d i=%d\n",da->m,i); + //exit(1); + } + + i--; + + il = da->p[i>>logL]; + if (il < 0) { + il = -il-1; + p = da->sl[(il<lp[i>>logL]; + p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + r = i - (i & (LLL-1)); + + q = &(da->buf[p>>logD]); + + if (f == 1) { + rr = p & (D-1); + r -= popcount(*q >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(*q); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = *q; + while (1) { + //rr = popcount(x >> (D-8)); + //rr = popcount(x >> (D-8)); + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } else { + rr = p & (D-1); + r -= popcount((~(*q)) >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(~(*q)); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = ~(*q); + + while (1) { + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } + } + return p; +} + +int bp_darray_pat_select(darray *da, int i, pb (*getpat)(pb *)) +{ + int p,r; + int il; + int rr; + pb x; + pb *q; + + if (i == 0) return -1; + + if (i > da->m) { + return -1; + //printf("ERROR: m=%d i=%d\n",da->m,i); + //exit(1); + } + + i--; + + il = da->p[i>>logL]; + if (il < 0) { + il = -il-1; + p = da->sl[(il<lp[i>>logL]; + p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + r = i - (i & (LLL-1)); + + q = &(da->buf[p>>logD]); + + rr = p & (D-1); + r -= popcount(getpat(q) >> (D-1-rr)); + p = p - rr; + + while (1) { + rr = popcount(getpat(q)); + if (r + rr >= i) break; + r += rr; + p += D; + q++; + } + + x = getpat(q); + while (1) { + rr = popcount8(x >> (D-8)); + if (r + rr >= i) break; + r += rr; + p += 8; + x <<= 8; + } + p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; + } + return p; +} + + +darray * bp_darray_pat_construct(int n, pb *buf, int k, pb pat, int opt) +{ + int i; + pb *b; + darray *da; + + bp_xmalloc(b, (n+D-1)/D); + + for (i=0; ibuf = buf; + + bp_free(b); + + return da; +} diff --git a/bp-darray.h b/bp-darray.h new file mode 100644 index 0000000..23d44a4 --- /dev/null +++ b/bp-darray.h @@ -0,0 +1,53 @@ +#ifndef BP_DARRAY_H_ +#define BP_DARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +typedef unsigned char byte; +typedef unsigned short word; +typedef unsigned int dword; + +#define OPT_NO_RANK (1<<30) + + +typedef dword pb; + + +typedef struct { + int n,m; + int size; + pb *buf; + dword *lp; + dword *sl; + word *ss; + dword *p; + + dword *rl; + word *rm; + byte *rs; + + int idx_size; +} darray; + + +darray * bp_darray_construct(int n, pb *buf,int opt); +void bp_darray_free(darray *da); + +int bp_darray_select(darray *da, int i,int f); +int bp_darray_rank(darray *da, int i); +darray * bp_darray_pat_construct(int n, pb *buf, int k, pb pat, int opt); +int bp_darray_pat_select(darray *da, int i, pb (*getpat)(pb *)); +int bp_darray_pat_rank(darray *da, int i, pb (*getpat)(pb *)); + +int bp_darray_select_bsearch(darray *da, int i, pb (*getpat)(pb *)); + + + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/bp-utils.c b/bp-utils.c new file mode 100644 index 0000000..b1d68ce --- /dev/null +++ b/bp-utils.c @@ -0,0 +1,53 @@ +#include +#include "bp-utils.h" + +static size_t allocated = 0; + +void * bp_malloc(size_t n) +{ + void * res = malloc(n); + if (!res) { + fprintf(stderr, __FILE__ ": failure to allocate %lu bytes\n", n); + exit(1); + }; + allocated += n; + return res; +} + +void bp_free(void * p) +{ + if (p) free(p); +} +size_t bp_get_alloc_stats(void) +{ + return allocated; +} +void bp_reset_alloc_states(void) +{ + allocated = 0; +} + +int bp_setbit(unsigned int *B, int i,int x) +{ + int j,l; + + j = i / D; + l = i % D; + if (x==0) B[j] &= (~(1<<(D-1-l))); + else if (x==1) B[j] |= (1<<(D-1-l)); + else { + printf("error setbit x=%d\n",x); + exit(1); + } + return x; +} + +int bp_setbits(unsigned int *B, int i, int d, int x) +{ + int j; + + for (j=0; j>(d-j-1))&1); + } + return x; +} diff --git a/bp-utils.h b/bp-utils.h new file mode 100644 index 0000000..b548688 --- /dev/null +++ b/bp-utils.h @@ -0,0 +1,90 @@ +#ifndef BP_UTILS_H_ +#define BP_UTILS_H_ + +#ifdef __cplusplus + +extern "C" { +#endif + +#define logD 5 +#define D (1< + + + +#ifdef HAS_NATIVE_POPCOUNT +static inline unsigned int popcount(unsigned int n){ + asm ("popcnt %1, %0" : "=r" (n) : "0" (n)); + return n; +} + +static inline unsigned int popcount8(unsigned int n) { + return popcount(n & 0xff); +} + +#else + +static unsigned int popcount8(unsigned int x) +{ + unsigned int r; + r = x; + r = ((r & 0xaa)>>1) + (r & 0x55); + r = ((r & 0xcc)>>2) + (r & 0x33); + r = ((r>>4) + r) & 0x0f; + return r; +} + +static inline unsigned int popcount(unsigned int x) +{ + unsigned int m1 = 0x55555555; + unsigned int m2 = 0xc30c30c3; + x -= (x >> 1) & m1; + x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2); + x += x >> 6; + return (x + (x >> 12) + (x >> 24)) & 0x3f; +} + +#endif + + +void * bp_malloc(size_t); + +#define bp_xmalloc(p, n) p = (__typeof__(p)) bp_malloc((n)*sizeof(*p)) + + + +void bp_free(void *); + +size_t bp_alloc_stats(void); + +void bp_reset_alloc_states(void); + +int bp_setbit(unsigned int *B, int i,int x); + +int bp_setbits(unsigned int *B, int i, int d, int x); + +static inline int bp_getbit(unsigned int *B, int i) +{ + int j = i >> logD; + int l = i & (D-1); + return (B[j] >> (D-1-l)) & 1; +} + +#ifdef __cplusplus + } +#endif + +#endif diff --git a/bp.c b/bp.c index b85508b..9d108bb 100644 --- a/bp.c +++ b/bp.c @@ -1,5 +1,9 @@ #include "bp.h" +//#define CHECK +#define RANDOM + + #define max(a,b) \ ({ __typeof__ (a) _a = (a); \ __typeof__ (b) _b = (b); \ @@ -11,26 +15,17 @@ __typeof__ (b) _b = (b); \ _a <= _b ? _a : _b; }) -//#define CHECK -#define RANDOM -int msize=0; -#define mymalloc(p,n,f) { \ - p = (__typeof__(p)) malloc((n)*sizeof(*p)); \ -if ((p)==NULL) {printf("not enough memory (%d bytes) in line %d\n",msize,__LINE__); \ - exit(1);}; \ -msize += (f)*(n)*sizeof(*p); \ -;} -int postorder_select_bsearch(bp *b,int s); +static int postorder_select_bsearch(bp *b,int s); -int naive_depth(bp *b, int s) +static int naive_depth(bp *b, int s) { int i,d; if (s < 0) return 0; d = 0; for (i=0; i<=s; i++) { - if (getbit(b->B,i)==OP) { + if (bp_getbit(b->B,i)==OP) { d++; } else { d--; @@ -39,12 +34,12 @@ int naive_depth(bp *b, int s) return d; } -void printbp(bp *b, int s, int t) +void bp_print(bp *b, int s, int t) { int i,c,d; d = 0; for (i=s; i<=t; i++) { - if (getbit(b->B,i)==OP) { + if (bp_getbit(b->B,i)==OP) { c = '('; d++; } else { @@ -61,9 +56,9 @@ void make_naivetbl(pb *B,int n) int i,v; int *stack,s; - mymalloc(matchtbl,n,0); - mymalloc(parenttbl,n,0); - mymalloc(stack,n,0); + bp_xmalloc(matchtbl,n); + bp_xmalloc(parenttbl,n); + bp_xmalloc(stack,n); for (i=0; i 0) { parenttbl[i] = stack[v-1]; @@ -89,7 +84,7 @@ void make_naivetbl(pb *B,int n) free(stack); } -int popCount[1<=0; i--) { - if (getbit(buf,i)==OP) { + if (bp_getbit(buf,i)==OP) { r--; } else { r++; @@ -143,7 +139,7 @@ void make_matchtbl(void) r = 0; for (i=0; i M) { M = r; @@ -198,7 +189,7 @@ void make_matchtbl(void) minmaxtbl_v[OPT_MAX | OPT_RIGHT][x] = -ETW-1; minmaxtbl_v[OPT_MIN | OPT_RIGHT][x] = ETW+1; for (i=0; i= M) { M = r; @@ -228,7 +219,7 @@ void make_matchtbl(void) ith = 0; r = 0; for (i = 0; i < ETW; i++) { - if (getbit(buf,i)==OP) { + if (bp_getbit(buf,i)==OP) { r++; } else { r--; @@ -242,11 +233,12 @@ void make_matchtbl(void) } } -} +}; -int bp_construct(bp *b,int n, pb *B, int opt) +bp * bp_construct(int n, pb *B, int opt) { + bp *b; int i,j,d; int m,M,ds; int ns,nm; @@ -256,16 +248,7 @@ int bp_construct(bp *b,int n, pb *B, int opt) int *md; int m_ofs; int r; // # of minimum values - -#if 0 - if (SB % D != 0) { - printf("warning: SB=%d should be a multiple of D=%d\n",SB,D); - // not necessarily? - } - if (SB % RRR != 0) { - printf("warning: SB=%d should be a multiple of RRR=%d\n",SB,RRR); - } -#endif + bp_xmalloc(b, 1); b->B = B; b->n = n; @@ -281,33 +264,36 @@ int bp_construct(bp *b,int n, pb *B, int opt) b->da_inorder = NULL; b->da_postorder = NULL; b->da_dfuds_leaf = NULL; - mymalloc(b->da,1,0); - darray_construct(b->da,n,B, opt & OPT_FAST_PREORDER_SELECT); + b->da = bp_darray_construct(n,B, opt & OPT_FAST_PREORDER_SELECT); b->idx_size += b->da->idx_size; - //Kim: comment this and the following, they polute the printing of the xpath library - //printf("preorder rank/select table: %d bytes (%1.2f bpc)\n",b->da->idx_size,(double)b->da->idx_size*8/n); + //TODO check. make_matchtbl(); ns = (n+SB-1)/SB; - mymalloc(sm, ns, 0); b->idx_size += ns * sizeof(*sm); - mymalloc(sM, ns, 0); b->idx_size += ns * sizeof(*sM); + + bp_xmalloc(sm, ns); + b->idx_size += ns * sizeof(*sm); + + bp_xmalloc(sM, ns); + b->idx_size += ns * sizeof(*sM); + b->sm = sm; b->sM = sM; + if (opt & OPT_DEGREE) { - mymalloc(sd, ns, 0); b->idx_size += ns * sizeof(*sd); + bp_xmalloc(sd, ns); + b->idx_size += ns * sizeof(*sd); b->sd = sd; - //printf("SB degree table: %d bytes (%1.2f bpc)\n",ns * sizeof(*sd), (double)ns * sizeof(*sd) * 8/n); } - //printf("SB table: %d bytes (%1.2f bpc)\n",ns * sizeof(*sm) * 2, (double)ns * sizeof(*sm)*2 * 8/n); for (i=0; i M) M = d; } if (i % SB == SB-1 || i==n-1) { - ds = depth(b,(i/SB)*SB-1); + ds = bp_depth(b,(i/SB)*SB-1); if (m - ds + SB < 0 || m - ds + SB > 255) { printf("error m=%d ds=%d\n",m,ds); } @@ -329,30 +315,27 @@ int bp_construct(bp *b,int n, pb *B, int opt) } } -#if 0 - printf("sd: "); - for (i=0;im_ofs = m_ofs; - mymalloc(mm, nm + m_ofs, 0); b->idx_size += (nm+m_ofs) * sizeof(*mm); - mymalloc(mM, nm + m_ofs, 0); b->idx_size += (nm+m_ofs) * sizeof(*mM); + bp_xmalloc(mm, nm + m_ofs); + b->idx_size += (nm+m_ofs) * sizeof(*mm); + + bp_xmalloc(mM, nm + m_ofs); + b->idx_size += (nm+m_ofs) * sizeof(*mM); b->mm = mm; b->mM = mM; + if (opt & OPT_DEGREE) { - mymalloc(md, nm + m_ofs, 0); b->idx_size += (nm+m_ofs) * sizeof(*md); + bp_xmalloc(md, nm + m_ofs); + b->idx_size += (nm+m_ofs) * sizeof(*md); b->md = md; - //printf("MB degree table: %d bytes (%1.2f bpc)\n",(nm+m_ofs) * sizeof(*md), (double)(nm+m_ofs) * sizeof(*md) * 8/n); } - //printf("MB table: %d bytes (%1.2f bpc)\n",(nm+m_ofs) * sizeof(*mm) * 2, (double)(nm+m_ofs) * sizeof(*mm)*2 * 8/n); for (i=0; i 0; j--) { m = 0; if (j*2 < nm + m_ofs) m = mm[j*2]; @@ -396,76 +379,62 @@ int bp_construct(bp *b,int n, pb *B, int opt) } -#if 0 - printf("md: "); - for (i=0;ida_leaf,1,0); - darray_pat_construct(b->da_leaf, n, B, 2, 0x2, opt & OPT_FAST_LEAF_SELECT); - //printf("leaf rank/select table: %d bytes (%1.2f bpc)\n",b->da_leaf->idx_size,(double)b->da_leaf->idx_size*8/n); - b->idx_size += b->da_leaf->idx_size; + b->da_leaf = bp_darray_pat_construct(n, B, 2, 0x2, opt & OPT_FAST_LEAF_SELECT); + b->idx_size += b->da_leaf->idx_size; } else { b->da_leaf = NULL; } if (opt & OPT_INORDER) { - mymalloc(b->da_inorder,1,0); - darray_pat_construct(b->da_inorder, n, B, 2, 0x1, opt & OPT_FAST_INORDER_SELECT); - //printf("inorder rank/select table: %d bytes (%1.2f bpc)\n",b->da_inorder->idx_size,(double)b->da_inorder->idx_size*8/n); + + b->da_inorder = bp_darray_pat_construct(n, B, 2, 0x1, opt & OPT_FAST_INORDER_SELECT); b->idx_size += b->da_inorder->idx_size; + } else { b->da_inorder = NULL; } if (opt & OPT_FAST_POSTORDER_SELECT) { - mymalloc(b->da_postorder,1,0); - darray_pat_construct(b->da_postorder, n, B, 1, 0x0, (opt & OPT_FAST_POSTORDER_SELECT) | OPT_NO_RANK); - //printf("postorder rank/select table: %d bytes (%1.2f bpc)\n",b->da_postorder->idx_size,(double)b->da_postorder->idx_size*8/n); + + b->da_postorder = bp_darray_pat_construct(n, B, 1, 0x0, (opt & OPT_FAST_POSTORDER_SELECT) | OPT_NO_RANK); b->idx_size += b->da_postorder->idx_size; + } else { b->da_postorder = NULL; } if (opt & OPT_DFUDS_LEAF) { - mymalloc(b->da_dfuds_leaf,1,0); - darray_pat_construct(b->da_dfuds_leaf, n, B, 2, 0x0, opt & OPT_FAST_DFUDS_LEAF_SELECT); - //printf("dfuds leaf rank/select table: %d bytes (%1.2f bpc)\n",b->da_dfuds_leaf->idx_size,(double)b->da_dfuds_leaf->idx_size*8/n); + b->da_dfuds_leaf = bp_darray_pat_construct( n, B, 2, 0x0, opt & OPT_FAST_DFUDS_LEAF_SELECT); b->idx_size += b->da_dfuds_leaf->idx_size; + } else { b->da_dfuds_leaf = NULL; } - return 0; + return b; } -// destroyTree: frees the memory of tree. -void destroyTree(bp *b) { +void bp_delete(bp *b) { if (!b) return; // nothing to free - destroyDarray(b->da); // destroys da data structure - if (b->da) free(b->da); - + bp_darray_free(b->da); // destroys da data structure if (b->sm) free(b->sm); if (b->sM) free(b->sM); if (b->sd) free(b->sd); if (b->mm) free(b->mm); if (b->mM) free(b->mM); if (b->md) free(b->md); - - destroyDarray(b->da_leaf); - if (b->da_leaf) free(b->da_leaf); - - destroyDarray(b->da_inorder); - if (b->da_inorder) free(b->da_inorder); - - destroyDarray(b->da_postorder); - if (b->da_postorder) free(b->da_postorder); - - destroyDarray(b->da_dfuds_leaf); - if (b->da_dfuds_leaf) free(b->da_dfuds_leaf); + + bp_darray_free(b->da_leaf); + + bp_darray_free(b->da_inorder); + + bp_darray_free(b->da_postorder); + + bp_darray_free(b->da_dfuds_leaf); + + bp_free(b); } @@ -491,7 +460,7 @@ void saveTree(bp *b, FILE *fp) { // loadTree: load parentheses data structure from file // By Diego Arroyuelo -void loadTree(bp *b, FILE *fp) { +bp * loadTree(FILE *fp) { pb *B; int n, opt; @@ -501,8 +470,8 @@ void loadTree(bp *b, FILE *fp) { exit(1); } - mymalloc(B,(n+D-1)/D,0); - + bp_xmalloc(B, (n+D-1)/D); + if (fread(B, sizeof(pb), (n+D-1)/D, fp) != ((n+D-1)/D)) { printf("Error: cannot read parentheses sequence from file\n"); exit(1); @@ -512,21 +481,20 @@ void loadTree(bp *b, FILE *fp) { printf("Error: cannot read opt in parentheses from file\n"); exit(1); } - - bp_construct(b, n, B, opt); - -} + return bp_construct(n, B, opt); + +} -int naive_fwd_excess(bp *b,int s, int rel) +static int naive_fwd_excess(bp *b,int s, int rel) { int i,v,n; pb *B; n = b->n; B = b->B; v = 0; for (i=s+1; iB; v = 0; for (i=s; i>=0; i--) { - if (getbit(B,i)==OP) { + if (bp_getbit(B,i)==OP) { v--; } else { v++; @@ -553,13 +521,13 @@ int naive_bwd_excess(bp *b,int s, int rel) return -2; } -int naive_search_SB_l(bp *b, int i, int rel) +static int naive_search_SB_l(bp *b, int i, int rel) { int il,v; il = (i / SB) * SB; for (; i>=il; i--) { - if (getbit(b->B,i)==OP) { + if (bp_getbit(b->B,i)==OP) { rel++; } else { rel--; @@ -570,15 +538,15 @@ int naive_search_SB_l(bp *b, int i, int rel) return -3; } -int naive_rmq(bp *b, int s, int t,int opt) +int bp_naive_rmq(bp *b, int s, int t,int opt) { int d,i,dm,im; if (opt & OPT_RIGHT) { - d = dm = depth(b,t); im = t; + d = dm = bp_depth(b,t); im = t; i = t-1; while (i >= s) { - if (getbit(b->B,i+1)==CP) { + if (bp_getbit(b->B,i+1)==CP) { d++; if (opt & OPT_MAX) { if (d > dm) { @@ -596,10 +564,10 @@ int naive_rmq(bp *b, int s, int t,int opt) i--; } } else { - d = dm = depth(b,s); im = s; + d = dm = bp_depth(b,s); im = s; i = s+1; while (i <= t) { - if (getbit(b->B,i)==OP) { + if (bp_getbit(b->B,i)==OP) { d++; if (opt & OPT_MAX) { if (d > dm) { @@ -620,141 +588,52 @@ int naive_rmq(bp *b, int s, int t,int opt) return im; } -int root_node(bp *b) -{ - return 0; -} -int rank_open(bp *b, int s) +int bp_rank_open(bp *b, int s) { - return darray_rank(b->da,s); + return bp_darray_rank(b->da, s); } -int rank_close(bp *b, int s) +int bp_rank_close(bp *b, int s) { - return s+1 - darray_rank(b->da,s); + return s+1 - bp_darray_rank(b->da, s); } -int select_open(bp *b, int s) +int bp_select_open(bp *b, int s) { if (b->opt & OPT_FAST_PREORDER_SELECT) { - return darray_select(b->da,s,1); + return bp_darray_select(b->da,s,1); } else { - return darray_select_bsearch(b->da,s,getpat_preorder); + return bp_darray_select_bsearch(b->da, s, getpat_preorder); } } -int select_close(bp *b, int s) +int bp_select_close(bp *b, int s) { if (b->opt & OPT_FAST_POSTORDER_SELECT) { - return darray_pat_select(b->da_postorder,s,getpat_postorder); + return bp_darray_pat_select(b->da_postorder, s, getpat_postorder); } else { return postorder_select_bsearch(b,s); } } -/////////////////////////////////////////// -// find_close(bp *b,int s) -// returns the matching close parenthesis of s -/////////////////////////////////////////// -int find_close(bp *b,int s) -{ - return fwd_excess(b,s,-1); -} - -/////////////////////////////////////////// -// find_open(bp *b,int s) -// returns the matching open parenthesis of s -/////////////////////////////////////////// -int find_open(bp *b,int s) -{ - int r; - r = bwd_excess(b,s,0); - if (r >= -1) return r+1; - return -1; -} - -/////////////////////////////////////////// -// parent(bp *b,int s) -// returns the parent of s -// -1 if s is the root -/////////////////////////////////////////// -int parent(bp *b,int s) -{ - int r; - r = bwd_excess(b,s,-2); - if (r >= -1) return r+1; - return -1; -} - -int enclose(bp *b,int s) -{ - return parent(b,s); -} - -/////////////////////////////////////////// -// level_ancestor(bp *b,int s,int d) -// returns the ancestor of s with relative depth d (d < 0) -// -1 if no such node -/////////////////////////////////////////// -int level_ancestor(bp *b,int s,int d) -{ - int r; - r = bwd_excess(b,s,d-1); - if (r >= -1) return r+1; - return -1; -} /////////////////////////////////////////// -// lca(bp *b, int s, int t) -// returns the lowest common ancestor of s and t -/////////////////////////////////////////// -int lca(bp *b, int s, int t) -{ - return parent(b,rmq(b,s,t,0)+1); -} - - -/////////////////////////////////////////// -// preorder_rank(bp *b,int s) -// returns the preorder (>= 1) of node s (s >= 0) -/////////////////////////////////////////// -int preorder_rank(bp *b,int s) -{ - return darray_rank(b->da,s); -} - -/////////////////////////////////////////// -// preorder_select(bp *b,int s) -// returns the node with preorder s (s >= 1) -// -1 if no such node -/////////////////////////////////////////// -int preorder_select(bp *b,int s) -{ - // no error handling - if (b->opt & OPT_FAST_PREORDER_SELECT) { - return darray_select(b->da,s,1); - } else { - return darray_select_bsearch(b->da,s,getpat_preorder); - } -} - -/////////////////////////////////////////// -// postorder_rank(bp *b,int s) +// bp_postorder_rank(bp *b,int s) // returns the postorder (>= 1) of node s (s >= 0) // -1 if s-th bit is not OP /////////////////////////////////////////// -int postorder_rank(bp *b,int s) +int bp_postorder_rank(bp *b,int s) { int t; - if (inspect(b,s) == CP) return -1; - t = find_close(b,s); + if (bp_inspect(b,s) == CP) return -1; + t = bp_find_close(b,s); // return t+1 - darray_rank(b->da,t); - return rank_close(b,t); + return bp_rank_close(b,t); } -int postorder_select_bsearch(bp *b,int s) +static int postorder_select_bsearch(bp *b,int s) { int l,r,m; @@ -768,7 +647,7 @@ int postorder_select_bsearch(bp *b,int s) while (l < r) { m = (l+r)/2; //printf("m=%d rank=%d s=%d\n",m,m+1 - darray_rank(b->da,m),s); - if (m+1 - darray_rank(b->da,m) >= s) { + if (m+1 - bp_darray_rank(b->da,m) >= s) { r = m; } else { l = m+1; @@ -778,27 +657,27 @@ int postorder_select_bsearch(bp *b,int s) } /////////////////////////////////////////// -// postorder_select(bp *b,int s) +// bp_postorder_select(bp *b,int s) // returns the position of CP of the node with postorder s (>= 1) /////////////////////////////////////////// -int postorder_select(bp *b,int s) +int bp_postorder_select(bp *b,int s) { #if 0 if (b->opt & OPT_FAST_POSTORDER_SELECT) { - return darray_pat_select(b->da_postorder,s,getpat_postorder); + return bp_darray_pat_select(b->da_postorder,s,getpat_postorder); } else { return postorder_select_bsearch(b->da,s); } #else - return select_close(b,s); + return bp_select_close(b,s); #endif } /////////////////////////////////////////// -// leaf_rank(bp *b,int s) +// bp_leaf_rank(bp *b,int s) // returns the number of leaves to the left of s /////////////////////////////////////////// -int leaf_rank(bp *b,int s) +int bp_leaf_rank(bp *b,int s) { if ((b->opt & OPT_LEAF) == 0) { printf("leaf_rank: error!!! not supported\n"); @@ -807,14 +686,14 @@ int leaf_rank(bp *b,int s) if (s >= b->n-1) { s = b->n-2; } - return darray_pat_rank(b->da_leaf,s,getpat_leaf); + return bp_darray_pat_rank(b->da_leaf,s,getpat_leaf); } /////////////////////////////////////////// // leaf_select(bp *b,int s) // returns the position of s-th leaf /////////////////////////////////////////// -int leaf_select(bp *b,int s) +int bp_leaf_select(bp *b,int s) { if ((b->opt & OPT_LEAF) == 0) { printf("leaf_select: error!!! not supported\n"); @@ -822,18 +701,18 @@ int leaf_select(bp *b,int s) } if (s > b->da_leaf->m) return -1; if (b->opt & OPT_FAST_LEAF_SELECT) { - return darray_pat_select(b->da_leaf,s,getpat_leaf); + return bp_darray_pat_select(b->da_leaf,s,getpat_leaf); } else { - return darray_select_bsearch(b->da_leaf,s,getpat_leaf); + return bp_darray_select_bsearch(b->da_leaf,s,getpat_leaf); } } /////////////////////////////////////////// -// inorder_rank(bp *b,int s) +// bp_inorder_rank(bp *b,int s) // returns the number of ")(" (s >= 0) /////////////////////////////////////////// -int inorder_rank(bp *b,int s) +int bp_inorder_rank(bp *b,int s) { if ((b->opt & OPT_INORDER) == 0) { printf("inorder_rank: error!!! not supported\n"); @@ -842,320 +721,244 @@ int inorder_rank(bp *b,int s) if (s >= b->n-1) { s = b->n-2; } - return darray_pat_rank(b->da_inorder,s,getpat_inorder); + return bp_darray_pat_rank(b->da_inorder,s,getpat_inorder); } /////////////////////////////////////////// -// inorder_select(bp *b,int s) +// bp_inorder_select(bp *b,int s) // returns the s-th position of ")(" (s >= 1) /////////////////////////////////////////// -int inorder_select(bp *b,int s) +int bp_inorder_select(bp *b,int s) { if ((b->opt & OPT_INORDER) == 0) { printf("inorder_select: error!!! not supported\n"); return -1; } if (b->opt & OPT_FAST_INORDER_SELECT) { - return darray_pat_select(b->da_inorder,s,getpat_inorder); + return bp_darray_pat_select(b->da_inorder,s,getpat_inorder); } else { - return darray_select_bsearch(b->da_inorder,s,getpat_inorder); + return bp_darray_select_bsearch(b->da_inorder,s,getpat_inorder); } } /////////////////////////////////////////// -// leftmost_leaf(bp *b, int s) +// bp_leftmost_leaf(bp *b, int s) /////////////////////////////////////////// -int leftmost_leaf(bp *b, int s) +int bp_leftmost_leaf(bp *b, int s) { if ((b->opt & OPT_LEAF) == 0) { printf("leftmost_leaf: error!!! not supported\n"); return -1; } - return leaf_select(b,leaf_rank(b,s)+1); + return bp_leaf_select(b, bp_leaf_rank(b,s)+1); } /////////////////////////////////////////// // rightmost_leaf(bp *b, int s) /////////////////////////////////////////// -int rightmost_leaf(bp *b, int s) +int bp_rightmost_leaf(bp *b, int s) { int t; if ((b->opt & OPT_LEAF) == 0) { printf("leftmost_leaf: error!!! not supported\n"); return -1; } - t = find_close(b,s); - return leaf_select(b,leaf_rank(b,t)); -} - - - -/////////////////////////////////////////// -// inspect(bp *b, int s) -// returns OP (==1) or CP (==0) at s-th bit (0 <= s < n) -/////////////////////////////////////////// -int inspect(bp *b, int s) -{ - if (s < 0 || s >= b->n) { - printf("inspect: error s=%d is out of [%d,%d]\n",s,0,b->n-1); - } - return getbit(b->B,s); + t = bp_find_close(b,s); + return bp_leaf_select(b, bp_leaf_rank(b,t)); } -int isleaf(bp *b, int s) -{ - if (inspect(b,s) != OP) { - printf("isleaf: error!!! B[%d] = OP\n",s); - } - if (inspect(b,s+1) == CP) return 1; - else return 0; -} - -/////////////////////////////////////////// -// subtree_size(bp *b, int s) -// returns the number of nodes in the subtree of s /////////////////////////////////////////// -int subtree_size(bp *b, int s) -{ - return (find_close(b,s) - s + 1) / 2; -} - -/////////////////////////////////////////// -// first_child(bp *b, int s) -// returns the first child -// -1 if s is a leaf -/////////////////////////////////////////// -int first_child(bp *b, int s) -{ - if (inspect(b,s+1) == CP) return -1; - return s+1; -} - -/////////////////////////////////////////// -// next_sibling(bp *b,int s) -// returns the next sibling of parent(s) -// -1 if s is the last child -////////////////////////////////////////// -int next_sibling(bp *b, int s) -{ - int t; - t = find_close(b,s)+1; - if (t >= b->n) { - printf("next_sibling: error s=%d t=%d\n",s,t); - } - if (inspect(b,t) == CP) return -1; - return t; -} - -/////////////////////////////////////////// -// prev_sibling(bp *b,int s) +// bp_prev_sibling(bp *b,int s) // returns the previous sibling of parent(s) // -1 if s is the first child ////////////////////////////////////////// -int prev_sibling(bp *b, int s) +int bp_prev_sibling(bp *b, int s) { int t; - if (s < 0) { - printf("prev_sibling: error s=%d\n",s); - } if (s == 0) return -1; - if (inspect(b,s-1) == OP) return -1; - t = find_open(b,s-1); + if (bp_inspect(b,s-1) == OP) return -1; + t = bp_find_open(b,s-1); return t; } /////////////////////////////////////////// -// deepest_node(bp *b,int s) +// bp_deepest_node(bp *b,int s) // returns the first node with the largest depth in the subtree of s /////////////////////////////////////////// -int deepest_node(bp *b,int s) +int bp_deepest_node(bp *b,int s) { int t,m; - t = find_close(b,s); - m = rmq(b,s,t, OPT_MAX); + t = bp_find_close(b,s); + m = bp_rmq(b,s,t, OPT_MAX); return m; } /////////////////////////////////////////// -// subtree_height(bp *b,int s) +// bp_subtree_height(bp *b,int s) // returns the height of the subtree of s // 0 if s is a leaf /////////////////////////////////////////// -int subtree_height(bp *b,int s) +int bp_subtree_height(bp *b,int s) { int t; - t = deepest_node(b,s); - return depth(b,t) - depth(b,s); + t = bp_deepest_node(b,s); + return bp_depth(b,t) - bp_depth(b,s); } -int naive_degree(bp *b, int s) +int bp_naive_degree(bp *b, int s) { int t,d; d = 0; - t = first_child(b,s); + t = bp_first_child(b,s); while (t >= 0) { d++; - t = next_sibling(b,t); + t = bp_next_sibling(b,t); } return d; } /////////////////////////////////////////// -// degree(bp *b, int s) +// bp_degree(bp *b, int s) // returns the number of children of s // 0 if s is a leaf /////////////////////////////////////////// -int degree(bp *b, int s) +int bp_degree(bp *b, int s) { if (b->opt & OPT_DEGREE) { - return fast_degree(b,s,b->n,0); + return bp_fast_degree(b,s,b->n,0); } else { - return naive_degree(b,s); + return bp_naive_degree(b,s); } } -int naive_child(bp *b, int s, int d) +int bp_naive_child(bp *b, int s, int d) { int t,i; - t = first_child(b,s); + t = bp_first_child(b,s); for (i = 1; i < d; i++) { if (t == -1) break; - t = next_sibling(b,t); + t = bp_next_sibling(b,t); } return t; } /////////////////////////////////////////// -// child(bp *b, int s, int d) +// bp_child(bp *b, int s, int d) // returns the d-th child of s (1 <= d <= degree(s)) // -1 if no such node /////////////////////////////////////////// -int child(bp *b, int s, int d) +int bp_child(bp *b, int s, int d) { int r; if (b->opt & OPT_DEGREE) { //return find_open(b,fast_degree(b,s,b->n,d)); - if (d==1) return first_child(b,s); - r = fast_degree(b,s,b->n,d-1)+1; - if (inspect(b,r) == CP) return -1; + if (d==1) return bp_first_child(b,s); + r = bp_fast_degree(b,s,b->n,d-1)+1; + if (bp_inspect(b,r) == CP) return -1; return r; } else { - return naive_child(b,s,d); + return bp_naive_child(b,s,d); } } -int naive_child_rank(bp *b, int t) +int bp_naive_child_rank(bp *b, int t) { int v,d; d = 0; while (t != -1) { d++; - t = prev_sibling(b,t); + t = bp_prev_sibling(b,t); } return d; } /////////////////////////////////////////// -// child_rank(bp *b, int t) +// bp_child_rank(bp *b, int t) // returns d if t is the d-th child of the parent of t (d >= 1) // 1 if t is the root /////////////////////////////////////////// -int child_rank(bp *b, int t) +int bp_child_rank(bp *b, int t) { int r; - if (t == root_node(b)) return 1; + if (t == bp_root_node(b)) return 1; if (b->opt & OPT_DEGREE) { - r = parent(b,t); - return fast_degree(b,r,t,0)+1; + r = bp_parent(b,t); + return bp_fast_degree(b,r,t,0)+1; } else { - return naive_child_rank(b,t); + return bp_naive_child_rank(b,t); } } - -/////////////////////////////////////////// -// is_ancestor(bp *b, int s, int t) -// returns 1 if s is an ancestor of t -// 0 otherwise -/////////////////////////////////////////// -int is_ancestor(bp *b, int s, int t) -{ - int v; - v = find_close(b,s); - if (s <= t && t <= v) return 1; - return 0; -} - /////////////////////////////////////////// // distance(bp *b, int s, int t) // returns the length of the shortest path from s to t in the tree /////////////////////////////////////////// -int distance(bp *b, int s, int t) +int bp_distance(bp *b, int s, int t) { int v,d; - v = lca(b,s,t); - d = depth(b,v); - return (depth(b,s) - d) + (depth(b,t) - d); + v = bp_lca(b,s,t); + d = bp_depth(b,v); + return (bp_depth(b,s) - d) + (bp_depth(b,t) - d); } /////////////////////////////////////////// // level_next(bp *b, int d) /////////////////////////////////////////// -int level_next(bp *b,int s) +int bp_level_next(bp *b,int s) { int t; - t = fwd_excess(b,s,0); + t = bp_fwd_excess(b,s,0); return t; } /////////////////////////////////////////// // level_prev(bp *b, int d) /////////////////////////////////////////// -int level_prev(bp *b,int s) +int bp_level_prev(bp *b,int s) { int t; - t = bwd_excess(b,s,0); + t = bp_bwd_excess(b,s,0); return t; } /////////////////////////////////////////// -// level_leftmost(bp *b, int d) +// bp_level_leftmost(bp *b, int d) /////////////////////////////////////////// -int level_leftmost(bp *b, int d) +int bp_level_leftmost(bp *b, int d) { int t; if (d < 1) return -1; if (d == 1) return 0; - t = fwd_excess(b,0,d); + t = bp_fwd_excess(b,0,d); return t; } /////////////////////////////////////////// -// level_rigthmost(bp *b, int d) +// bp_level_rigthmost(bp *b, int d) /////////////////////////////////////////// int level_rigthmost(bp *b, int d) { int t; if (d < 1) return -1; if (d == 1) return 0; - t = bwd_excess(b,0,d-1); - return find_open(b,t); + t = bp_bwd_excess(b,0,d-1); + return bp_find_open(b,t); } /////////////////////////////////////////// // leaf_size(bp *b, int s) /////////////////////////////////////////// -int leaf_size(bp *b, int s) +int bp_leaf_size(bp *b, int s) { int t; if ((b->opt & OPT_LEAF) == 0) { printf("leaf_size: error!!! not supported\n"); return -1; } - t = find_close(b,s); - return leaf_rank(b,t) - leaf_rank(b,s); + t = bp_find_close(b,s); + return bp_leaf_rank(b,t) - bp_leaf_rank(b,s); } diff --git a/bp.h b/bp.h index 9979345..00aecf7 100644 --- a/bp.h +++ b/bp.h @@ -1,6 +1,7 @@ #ifndef BP_H_ #define BP_H_ + #ifdef __cplusplus extern "C" { #endif @@ -8,7 +9,9 @@ extern "C" { #include #include -#include "darray.h" +#include "bp-darray.h" +#include "bp-utils.h" + #define OP 1 #define CP 0 @@ -61,75 +64,233 @@ typedef struct { int opt; } bp; -int bp_construct(bp *b,int n, pb *B, int opt); -void printbp(bp *b, int s, int t); - - -int rank_open(bp *b, int s); -int rank_close(bp *b, int s); -int select_open(bp *b, int s); -int select_close(bp *b, int s); - - -int root_node(bp *b); -int find_close(bp *b,int s); -int find_open(bp *b,int s); -int enclose(bp *b,int s); -int parent(bp *b,int s); -int level_ancestor(bp *b,int s,int d); -int depth(bp *b, int s); -int preorder_rank(bp *b,int s); -int postorder_rank(bp *b,int s); -int inspect(bp *b, int s); -int isleaf(bp *b, int s); -int rmq(bp *b, int s, int t, int opt); -int subtree_size(bp *b, int s); -int first_child(bp *b, int s); -int next_sibling(bp *b, int s); -int prev_sibling(bp *b, int s); -int deepest_node(bp *b,int s); -int subtree_height(bp *b,int s); -int is_ancestor(bp *b, int s, int t); -int distance(bp *b, int s, int t); -int level_lefthmost(bp *b, int d); -int level_rigthmost(bp *b, int d); -int degree(bp *b,int s); +pb getpat_preorder(pb *b); +pb getpat_leaf(pb *b); +pb getpat_inorder(pb *b); +pb getpat_postorder(pb *b); + +bp * bp_construct(int n, pb *B, int opt); +void bp_print(bp *b, int s, int t); + +int bp_fwd_excess(bp *b,int s, int rel); +int bp_bwd_excess(bp *b,int s, int rel); +int bp_rmq(bp *b, int s, int t, int opt); +int bp_depth(bp *b, int s); + +int bp_rank_open(bp *b, int s); +int bp_rank_close(bp *b, int s); +int bp_select_open(bp *b, int s); +int bp_select_close(bp *b, int s); + + +static inline int bp_root_node(bp *b) +{ + return 0; +} + + +/////////////////////////////////////////// +// find_close(bp *b,int s) +// returns the matching close parenthesis of s +/////////////////////////////////////////// +static inline int bp_find_close(bp *b,int s) +{ + return bp_fwd_excess(b, s, -1); +} + +/////////////////////////////////////////// +// bp_find_open(bp *b,int s) +// returns the matching open parenthesis of s +/////////////////////////////////////////// +static inline int bp_find_open(bp *b,int s) +{ + int r = bp_bwd_excess(b, s, 0); + return (r >= -1) ? r+1 : -1; +} + + +/////////////////////////////////////////// +// bp_parent(bp *b,int s) +// returns the parent of s +// -1 if s is the root +/////////////////////////////////////////// +static inline int bp_parent(bp *b, int s) +{ + int r = bp_bwd_excess(b,s,-2); + return (r >= -1) ? r+1 : -1; +} + +/////////////////////////////////////////// +// bp_parent_close(bp *b,int s) +// returns the closing parenthesis of the parent +// of s +// -1 if s is the root +/////////////////////////////////////////// + +static inline int bp_parent_close(bp *b, int s) +{ + return bp_fwd_excess(b,s,-2); +} + + +static inline int bp_enclose(bp *b, int s) +{ + return bp_parent(b, s);; +} + + +/////////////////////////////////////////// +// bp_level_ancestor(bp *b,int s,int d) +// returns the ancestor of s with relative depth d (d < 0) +// -1 if no such node +/////////////////////////////////////////// +static inline int bp_level_ancestor(bp *b,int s,int d) +{ + int r = bp_bwd_excess(b,s,d-1); + return (r >= -1) ? r+1 :-1; +} + +/////////////////////////////////////////// +// bp_lca(bp *b, int s, int t) +// returns the lowest common ancestor of s and t +/////////////////////////////////////////// +static inline int bp_lca(bp *b, int s, int t) +{ + return bp_parent(b, bp_rmq(b,s,t,0)+1); +} + +/////////////////////////////////////////// +// preorder_rank(bp *b,int s) +// returns the preorder (>= 1) of node s (s >= 0) +/////////////////////////////////////////// + +static inline int bp_preorder_rank(bp *b,int s) +{ + return bp_darray_rank(b->da,s); +} + +/////////////////////////////////////////// +// preorder_select(bp *b,int s) +// returns the node with preorder s (s >= 1) +// -1 if no such node +/////////////////////////////////////////// +static inline int bp_preorder_select(bp *b,int s) +{ + // no error handling + if (b->opt & OPT_FAST_PREORDER_SELECT) { + return bp_darray_select(b->da,s,1); + } else { + return bp_darray_select_bsearch(b->da,s, getpat_preorder); + } +} + + +int bp_postorder_rank(bp *b,int s); + +/////////////////////////////////////////// +// inspect(bp *b, int s) +// returns OP (==1) or CP (==0) at s-th bit (0 <= s < n) +/////////////////////////////////////////// +static inline int bp_inspect(bp *b, int s) +{ + return bp_getbit(b->B,s); +} + +static inline int bp_isleaf(bp *b, int s) +{ + return (bp_inspect(b, s+1) == CP); +} + +/////////////////////////////////////////// +// bp_subtree_size(bp *b, int s) +// returns the number of nodes in the subtree of s +/////////////////////////////////////////// +static inline int bp_subtree_size(bp *b, int s) +{ + return (bp_find_close(b, s) - s + 1) / 2; +} + +/////////////////////////////////////////// +// first_child(bp *b, int s) +// returns the first child +// -1 if s is a leaf +/////////////////////////////////////////// + +static inline int bp_first_child(bp *b, int s) +{ + return (bp_inspect(b, s+1) == CP) ? -1 : s+1; +} + + +/////////////////////////////////////////// +// next_sibling(bp *b,int s) +// returns the next sibling of parent(s) +// -1 if s is the last child +////////////////////////////////////////// +static inline int bp_next_sibling(bp *b, int s) +{ + int t; + t = bp_find_close(b,s) + 1; + return (bp_inspect(b, t) == CP) ? -1 : t; + +} + +int bp_prev_sibling(bp *b, int s); +int bp_deepest_node(bp *b,int s); +int bp_subtree_height(bp *b,int s); +/////////////////////////////////////////// +// bp_is_ancestor(bp *b, int s, int t) +// returns 1 if s is an ancestor of t +// 0 otherwise +/////////////////////////////////////////// +static inline int bp_is_ancestor(bp *b, int s, int t) +{ + int v; + if (s == 0) return 1; + v = bp_find_close(b, s); + return (s <= t && t <= v); +} + +int bp_distance(bp *b, int s, int t); +int bp_level_lefthmost(bp *b, int d); +int bp_level_rigthmost(bp *b, int d); +int bp_degree(bp *b,int s); // not efficient -int naive_degree(bp *b, int s); -int naive_child(bp *b, int s, int d); -int naive_child_rank(bp *b, int t); -int naive_rmq(bp *b, int s, int t,int opt); -int postorder_select(bp *b,int s); +int bp_naive_degree(bp *b, int s); +int bp_naive_child(bp *b, int s, int d); +int bp_naive_child_rank(bp *b, int t); +int bp_naive_rmq(bp *b, int s, int t,int opt); +int bp_postorder_select(bp *b,int s); // using preorder select index int preorder_select(bp *b,int s); // using leaf index -int leaf_rank(bp *b,int s); -int leaf_size(bp *b, int s); -int leftmost_leaf(bp *b, int s); -int rightmost_leaf(bp *b, int s); +int bp_leaf_rank(bp *b,int s); +int bp_leaf_size(bp *b, int s); +int bp_leftmost_leaf(bp *b, int s); +int bp_rightmost_leaf(bp *b, int s); // using leaf select index -int leaf_select(bp *b,int s); +int bp_leaf_select(bp *b,int s); // using inorder index -int inorder_rank(bp *b,int s); +int bp_inorder_rank(bp *b,int s); // using inorder select index -int inorder_select(bp *b,int s); +int bp_inorder_select(bp *b,int s); // using degree index -int fast_degree(bp *b,int s, int t, int ith); -int child_rank(bp *b, int t); -int child(bp *b, int s, int d); +int bp_fast_degree(bp *b,int s, int t, int ith); +int bp_child_rank(bp *b, int t); +int bp_child(bp *b, int s, int d); // new functions for persistence purposes, added by Diego Arroyuelo void saveTree(bp *b, FILE *fp); -void loadTree(bp *b, FILE *fp); -void destroyTree(bp *b); +bp * loadTree(FILE *fp); +void bp_delete(bp *b); static inline int blog(int x) @@ -143,19 +304,13 @@ static inline int blog(int x) return l; } -pb getpat_preorder(pb *b); -pb getpat_leaf(pb *b); -pb getpat_inorder(pb *b); -pb getpat_postorder(pb *b); -int fwd_excess(bp *b,int s, int rel); -int bwd_excess(bp *b,int s, int rel); extern int *matchtbl,*parenttbl; void make_naivetbl(pb *B,int n); -extern int popCount[1<(y)?(x):(y)) -#endif - -#define NOTFOUND -2 -#define CONTINUE -3 -#define END -4 -#define FOUND -5 - -#define SBid(i) ((i)>>logSB) -#define SBfirst(i) ((i) & (~(SB-1))) -#define SBlast(i) ((i) | (SB-1)) - -#define MBid(i) ((i)>>logMB) -#define MBfirst(i) ((i) & (~(MB-1))) -#define MBlast(i) ((i) | (MB-1)) - -pb getpat_preorder(pb *b) -{ - return *b; -} - -pb getpat_postorder(pb *b) -{ - return ~(*b); -} - -pb getpat_leaf(pb *b) -{ - pb w1,w2,w; - w1 = b[0]; - w2 = (w1 << 1) + (b[1] >> (D-1)); - w = w1 & (~w2); - return w; -} - -pb getpat_inorder(pb *b) -{ - pb w1,w2,w; - w1 = b[0]; - w2 = (w1 << 1) + (b[1] >> (D-1)); - w = (~w1) & w2; - return w; -} - -pb getpat_dfuds_leaf(pb *b) -{ - pb w1,w2,w; - w1 = b[0]; - w2 = (w1 << 1) + (b[1] >> (D-1)); - w = (~w1) & (~w2); - return w; -} - - - -/////////////////////////////////////////// -// depth(bp *b, int s) -// returns the depth of s -// The root node has depth 1 -/////////////////////////////////////////// -int depth(bp *b, int s) -{ - int d; - if (s < 0) return 0; - d = 2 * darray_rank(b->da,s) - (s+1); -#if 0 - if (d != naive_depth(b,s)) { - d = naive_depth(b,s); - darray_rank(b->da,s); - } - //printf("depth(%d)=%d\n",s,d); -#endif - return d; -} - -int fast_depth(bp *b, int s) -{ - int d; - darray *da; - int r,j; - - s++; - if ((s & (RRR-1)) != 0) { - //printf("fast_depth:warning s=%d\n",s); - return depth(b,s); - } - da = b->da; - //d = 2 * (da->rl[s>>logR] + da->rm[s>>logRR] + da->rs[s>>logRRR]) - s; - - r = da->rl[s>>logR] + da->rm[s>>logRR]; - j = (s>>logRRR) & (RR/RRR-1); - while (j > 0) { - r += da->rs[((s>>logRR)<<(logRR-logRRR))+j-1]; - j--; - } - d = 2 * r - s; - - return d; -} - -int search_SB_r(bp *b, int i, int rel) -{ - int j,r,n,il; - pb *p,x,w; - - n = b->n; - il = min((SBid(i) + 1) << logSB,n); - p = &b->B[i>>logD]; - while (i0) { - w = (x >> (D-ETW)) & ((1<= -ETW && rel <= ETW) { - r = fwdtbl[((rel+ETW)<= n) return NOTFOUND; - return i+r; - } - } - r = min(j,ETW); - rel -= 2*popcount(w)-r; - x <<= r; - i += r; - j -= r; - } - } - return CONTINUE; -} - -int search_MB_r(bp *b, int i, int td) -{ - int il,d; - int m,M,n; - pb *B; - - B = b->B; - n = b->n; - - il = min((MBid(i) + 1) << logMB,n); - for ( ; i < il; i+=SB) { -#if (SB % RRR != 0) - d = depth(b,i-1); -#else - d = fast_depth(b,i-1); -#endif - m = d + b->sm[SBid(i)] - SB; - M = d + b->sM[SBid(i)] - 1; - if (m <= td && td <= M) { - return search_SB_r(b,i,td-d); - } - } - if (i >= n) return NOTFOUND; - return CONTINUE; -} - -/////////////////////////////////////////// -// fwd_excess(bp *b,int s, int rel) -// find the leftmost value depth(s)+rel to the right of s (exclusive) -/////////////////////////////////////////// -int fwd_excess(bp *b,int s, int rel) -{ - int i,n; - int d,td; - int m,M; - int m_ofs; - pb *B; - n = b->n; B = b->B; - - i = s+1; - - d = search_SB_r(b,i,rel); - if (d >= NOTFOUND) return d; - - i = min((SBid(i) + 1) << logSB, n); - td = depth(b,s) + rel; - d = search_MB_r(b,i,td); - if (d >= NOTFOUND) return d; - - m_ofs = b->m_ofs; - - i = MBid(s) + m_ofs; - - while (i > 0) { - if ((i&1) == 0) { - i++; - m = b->mm[i]; - M = b->mM[i]; - - if (m <= td && td <= M) { - while (i < m_ofs) { - i <<= 1; - m = b->mm[i]; - M = b->mM[i]; - if (!(m <= td && td <= M)) i++; - } - i -= m_ofs; - i <<= logMB; - - return search_MB_r(b,i,td); - }; - - } - i >>= 1; - } - return NOTFOUND; -} - - -int degree_SB_slow(bp *b, int i, int t, int rel, int *ans, int ith) -{ - int j,r,n,il; - pb *p,x,w,w2; - int d, deg, v; - - n = t; - il = min((SBid(i) + 1) << logSB,n); - d = deg = 0; - - while (i < il) { - if (getbit(b->B,i)==OP) { - d++; - } else { - d--; - } - if (d < rel) { // reached the end - if (ith > 0) { - return NOTFOUND; - } else { - *ans = deg; - return END; - } - } - if (d == rel) { // found the same depth - deg++; - if (deg == ith) { - *ans = i; - return FOUND; - } - } - i++; - } - *ans = deg; - return CONTINUE; -} - -int degree_SB(bp *b, int i, int t, int rel, int *ans, int ith) -{ - int j,r,n,il; - pb *p,x,w,w2; - int d, deg, v; - int degtmp,itmp; - int ith2,d2; - - n = t; - il = min((SBid(i) + 1) << logSB,n); - d = deg = 0; - - p = &b->B[i>>logD]; - while (i < il) { - x = *p++; - j = i & (D-1); - x <<= j; - j = min(D-j,il-i); - while (j>0) { - w = (x >> (D-ETW)) & ((1< 0) { -#if 0 - for (r = 0; r < ETW; r++) { - if (w & 0x80) { - d++; - } else { - d--; - if (d < rel) break; - if (d == rel) { - ith--; - if (ith == 0) { - *ans = i + r; - return FOUND; - } - } - } - w <<= 1; - } - return NOTFOUND; -#else - r = childtbl2[rel-d+ETW][ith-1][w]; - if (r >= 0) { - *ans = i + r; - return FOUND; - } - return NOTFOUND; -#endif - } - r = ETW-1-minmaxtbl_i[0][w | w2]; - w2 = (1< 0) { - if (ith <= r) { - *ans = i + childtbl[((ith-1)<B; - n = t; - - il = min((MBid(i) + 1) << logMB,n); - deg = 0; - for ( ; i+SB-1 < il; i+=SB) { -#if (SB % RRR != 0) - d = depth(b,i-1); -#else - d = fast_depth(b,i-1); -#endif - m = d + b->sm[SBid(i)] - SB; - if (m < td) { - r = degree_SB(b,i,n,td-d,°tmp,ith); - if (ith > 0) { - if (r == NOTFOUND) return NOTFOUND; - *ans = degtmp; - return FOUND; - } else { - *ans = deg + degtmp; - return END; - } - } - if (m == td) { - if (ith > 0) { - if (ith <= b->sd[SBid(i)]) break; - ith -= b->sd[SBid(i)]; - } - deg += b->sd[SBid(i)]; - } - } - if (i < il) { -#if (SB % RRR != 0) - d = depth(b,i-1); -#else - d = fast_depth(b,i-1); -#endif - r = degree_SB(b,i,n,td-d,°tmp,ith); - if (ith > 0) { - if (r == NOTFOUND) return NOTFOUND; - if (r == FOUND) { - *ans = degtmp; - return FOUND; - } - } else { - deg += degtmp; - } - } - *ans = deg; - if (i >= n) return END; - return CONTINUE; -} - -static int partition_range(int s,int t) -{ - int i,j,h; - - printf("partition [%d,%d] => ",s,t); - h = 1; - while (s <= t) { - if (s & h) { - if (s+h-1 <= t) { - printf("[%d,%d] ",s,s+h-1); - s += h; - } - } else { - if (s+h > t) break; - } - h <<= 1; - } - while (h > 0) { - if (s+h-1 <= t) { - printf("[%d,%d] ",s,s+h-1); - s += h; - } - h >>= 1; - } - printf("\n"); -} - - - - -/////////////////////////////////////////// -// fast_degree(bp *b,int s, int t, int ith) -// returns the number of children of s, to the left of t -// returns the position of (ith)-th child of s if (ith > 0) -/////////////////////////////////////////// -int fast_degree(bp *b,int s, int t, int ith) -{ - int i,j,n; - int d,td; - int m,M; - int m_ofs; - pb *B; - int deg,degtmp; - int sm,tm,ss,h; - - n = t; - B = b->B; - - deg = 0; - - i = s+1; - if (i != SBfirst(i)) { - d = degree_SB(b,i,n,0,°tmp,ith); - if (ith > 0) { - if (d == NOTFOUND) return -1; - if (d == FOUND) return degtmp; - ith -= degtmp; - } - if (d == END) return degtmp; - deg += degtmp; - } - - td = depth(b,s); - - i = SBid(i+SB-1) << logSB; - - if (i != MBfirst(i)) { - d = degree_MB(b,i,n,td,°tmp,ith); - if (ith > 0) { - if (d == NOTFOUND) return -1; - if (d == FOUND) return degtmp; - ith -= degtmp; - deg += degtmp; - } else { - deg += degtmp; - if (d == END) { - return deg; - } - } - } - -#if 0 - // sequential search - - sm = MBid(i+MB-1); - tm = MBid((n-1)+1)-1; // the rightmost MB fully contained in [0,n-1] - - m_ofs = b->m_ofs; - sm += m_ofs; tm += m_ofs; - for (i=sm; i<=tm; i++) { - if (b->mm[i] < td) { - break; - } - if (b->mm[i] == td) { - if (ith > 0) { - if (ith <= b->md[i]) break; - ith -= b->md[i]; - } - deg += b->md[i]; - } - } - ss = i - m_ofs; -#else - sm = MBid(i+MB-1); - tm = MBid((n-1)+1)-1; // the rightmost MB fully contained in [0,n-1] - - m_ofs = b->m_ofs; - sm += m_ofs; tm += m_ofs; - ss = sm; - - //partition_range(sm,tm); - - //printf("partition [%d,%d] => ",sm,tm); - h = 1; - while (sm <= tm) { - if (sm & h) { - if (sm+h-1 <= tm) { - //printf("[%d,%d] ",sm,sm+h-1); - j = sm / h; - if (b->mm[j] < td) { - h >>= 1; - break; - } - if (b->mm[j] == td) { - if (ith > 0) { - if (ith <= b->md[j]) { - h >>= 1; - break; - } - ith -= b->md[j]; - } - deg += b->md[j]; - } - sm += h; - } - } else { - if (sm+h > tm) break; - } - h <<= 1; - } - while (h > 0) { - if (sm+h-1 <= tm) { - //printf("[%d,%d] ",sm,sm+h-1); - j = sm / h; - if (ith > 0) { - if (b->mm[j] >= td) { - if (b->mm[j] == td) { - if (ith > b->md[j]) { - ith -= b->md[j]; - sm += h; - } else { - deg += b->md[j]; - } - } else { - sm += h; - } - } - } else { - if (b->mm[j] >= td) { - if (b->mm[j] == td) { - deg += b->md[j]; - } - sm += h; - } - } - } - h >>= 1; - } - //printf("\n"); - ss = sm; - - ss -= m_ofs; - -#endif - - ss <<= logMB; - - d = degree_MB(b,ss,n,td,°tmp,ith); - if (ith > 0) { - if (d == NOTFOUND) return -1; - if (d == FOUND) return degtmp; - } - deg += degtmp; - if (d == END) return deg; - return deg; - - // unexpected (bug) - printf("degree: ???\n"); - return -99; - -} - -int search_SB_l(bp *b, int i, int rel) -{ - int j,r,il; - pb *p,x,w; - - il = SBfirst(i); - - p = &b->B[i>>logD]; - while (i>=il) { - x = *p--; - j = (i & (D-1))+1; - x >>= D-j; - while (j>0) { - w = x & ((1<= -ETW && rel <= ETW) { - r = bwdtbl[((rel+ETW)<>= r; - i -= r; - j -= r; - } - - } - if (i < 0) return NOTFOUND; - return CONTINUE; -} - -int search_MB_l(bp *b, int i, int td) -{ - int il,d; - int m,M; - pb *B; - -#if 0 - if (i % SB != SB-1) { - printf("search_MB_l:error!!! i=%d SB=%d\n",i,i%SB); - } -#endif - B = b->B; - - il = MBfirst(i); - for ( ; i >= il; i-=SB) { -#if (SB % RRR != 0) - d = depth(b,i-SB); -#else - d = fast_depth(b,i-SB); -#endif - m = d + b->sm[SBid(i)] - SB; - M = d + b->sM[SBid(i)] - 1; - if (m <= td && td <= M) { -#if (SB % RRR != 0) - d = depth(b,i); -#else - d = fast_depth(b,i); -#endif - if (d == td) return i; - return search_SB_l(b,i,td-d); - } - } - return CONTINUE; -} - -/////////////////////////////////////////// -// bwd_excess(bp *b,int s, int rel) -// find the rightmost value depth(s)+rel to the left of s (exclusive) -/////////////////////////////////////////// -int bwd_excess(bp *b,int s, int rel) -{ - int i,n; - int d,td; - int m,M; - int m_ofs; - pb *B; - n = b->n; B = b->B; - - i = s; - d = search_SB_l(b,i,rel); - if (d >= NOTFOUND) return d; - - i = SBfirst(i) -1; - - td = depth(b,s) + rel; - - d = search_MB_l(b,i,td); - if (d >= NOTFOUND) return d; - - m_ofs = b->m_ofs; - i = (s>>logMB) + m_ofs; - while (i > 0) { - if ((i&1) == 1) { - i--; - m = b->mm[i]; - M = b->mM[i]; - if (m <= td && td <= M) break; - } - i >>= 1; - } - if (i == 0) { - if (td == 0) return -1; - else return NOTFOUND; - } - - while (i < m_ofs) { - i = i*2 + 1; - m = b->mm[i]; - M = b->mM[i]; - if (!(m <= td && td <= M)) i--; - } - i -= m_ofs; - i = ((i+1)<= NOTFOUND) return d; - - // unexpected (bug) - printf("bwd_excess: ???\n"); - return -99; - -} - -int rmq_SB(bp *b, int s, int t, int opt, int *dm) -{ - int i,d; - int is,ds; - pb *p,x,w,w2; - int j,v,r; - int lr; - int op; - - lr = 0; if (opt & OPT_RIGHT) lr = 1; - op = opt & (OPT_RIGHT | OPT_MAX); - - is = s; ds = d = 0; - i = s+1; - -#if SB >= ETW - p = &b->B[i>>logD]; - while (i <= t) { - x = *p++; - j = i & (D-1); - x <<= j; - j = min(D-j,t-i+1); - while (j>0) { - w = (x >> (D-ETW)) & ((1< ds) { - ds = d + v; - is = i + minmaxtbl_i[op][w & (~w2)]; - } - } else { - v = minmaxtbl_v[op][w | w2]; - if (d + v < ds +lr) { - ds = d + v; - is = i + minmaxtbl_i[op][w | w2]; - } - } - - r = min(j,ETW); - d += 2*popcount(w)-r; - x <<= r; - i += r; - j -= r; - } - } -#else - while (i <= t) { - if (getbit(b->B,i)==OP) { - d++; - if (op & OPT_MAX) { - if (d + lr > ds) { - ds = d; is = i; - } - } - } else { - d--; - if (!(op & OPT_MAX)) { - if (d < ds + lr) { - ds = d; is = i; - } - } - } - i++; - } -#endif - *dm = ds; - return is; -} - -int rmq_MB(bp *b, int s, int t, int opt, int *dm) -{ - int i,d,m; - int mi,md; - int lr; - - lr = 0; if (opt & OPT_RIGHT) lr = 1; - - md = *dm; mi = -1; - for (i = s; i <= t; i++) { -#if (SB % RRR != 0) - d = depth(b,(i<sM[i] - 1; - if (m + lr > md) { - md = m; mi = i; - } - } else { - m = d + b->sm[i] - SB; - if (m < md + lr) { - md = m; mi = i; - } - } - } - *dm = md; - return mi; -} - - - - -/////////////////////////////////////////// -// rmq(bp *b, int s, int t, int opt) -// returns the index of leftmost/rightmost minimum/maximum value -// in the range [s,t] (inclusive) -// returns the leftmost minimum if opt == 0 -// the rightmost one if (opt & OPT_RIGHT) -// the maximum if (opt & OPT_MAX) -/////////////////////////////////////////// -int rmq(bp *b, int s, int t, int opt) -{ - int ss, ts; // SB index of s and t - int sm, tm; // MB index of s and t - int ds; // current min value - int is; // current min index - int ys; // type of current min index - // 0: is is the index of min - // 1: is is the SB index - // 2: is is the MB index - int m_ofs; - int i,j,il,d,n; - int dm; - int lr; - - lr = 0; if (opt & OPT_RIGHT) lr = 1; - - n = b->n; - if (s < 0 || t >= n || s > t) { - printf("rmq: error s=%d t=%d n=%d\n",s,t,n); - return -1; - } - if (s == t) return s; - - - //////////////////////////////////////////////////////////// - // search the SB of s - //////////////////////////////////////////////////////////// - - il = min(SBlast(s),t); - is = rmq_SB(b,s,il,opt,&dm); - if (il == t) { // scan reached the end of the range - return is; - } - ds = depth(b,s) + dm; ys = 0; - - //////////////////////////////////////////////////////////// - // search the MB of s - //////////////////////////////////////////////////////////// - - ss = SBid(s) + 1; - il = min(SBid(MBlast(s)),SBid(t)-1); - dm = ds; - j = rmq_MB(b,ss,il,opt,&dm); - //if (dm < ds + lr) { - if (j >= 0) { - ds = dm; is = j; ys = 1; - } - - //////////////////////////////////////////////////////////// - // search the tree - //////////////////////////////////////////////////////////// - - sm = MBid(s) + 1; - tm = MBid(t) - 1; - -#if 0 - // sequential search - m_ofs = b->m_ofs; - sm += m_ofs; tm += m_ofs; - for (i=sm; i<=tm; i++) { - if (opt & OPT_MAX) { - if (b->mM[i] + lr > ds) { - ds = b->mM[i]; is = i - m_ofs; ys = 2; - } - } else { - if (b->mm[i] < ds + lr) { - ds = b->mm[i]; is = i - m_ofs; ys = 2; - } - } - } - -#else - if (sm <= tm) { - int h; - h = blog(sm ^ tm); - - m_ofs = b->m_ofs; - sm += m_ofs; tm += m_ofs; - - if (opt & OPT_MAX) { - if (b->mM[sm] + lr > ds) { - ds = b->mM[sm]; is = sm; ys = 2; - } - for (i=0; i<=h-2; i++) { - j = sm>>i; - if ((j&1) == 0) { - if (b->mM[j+1] + lr > ds) { - ds = b->mM[j+1]; is = j+1; ys = 2; - } - } - } - for (i=h-2; i>=0; i--) { - j = tm>>i; - if ((j&1)==1) { - if (b->mM[j-1] + lr > ds) { - ds = b->mM[j-1]; is = j-1; ys = 2; - } - } - } - if (b->mM[tm] + lr > ds) { - ds = b->mM[tm]; is = tm; ys = 2; - } - if (ys == 2) { - while (is < m_ofs) { - is <<= 1; - if (b->mM[is+1] + lr > b->mM[is]) is++; - } - is -= m_ofs; - } - } else { // MIN - if (b->mm[sm] < ds + lr) { - ds = b->mm[sm]; is = sm; ys = 2; - } - for (i=0; i<=h-2; i++) { - j = sm>>i; - if ((j&1) == 0) { - if (b->mm[j+1] < ds + lr) { - ds = b->mm[j+1]; is = j+1; ys = 2; - } - } - } - for (i=h-2; i>=0; i--) { - j = tm>>i; - if ((j&1)==1) { - if (b->mm[j-1] < ds + lr) { - ds = b->mm[j-1]; is = j-1; ys = 2; - } - } - } - if (b->mm[tm] < ds + lr) { - ds = b->mm[tm]; is = tm; ys = 2; - } - if (ys == 2) { - while (is < m_ofs) { - is <<= 1; - if (b->mm[is+1] < b->mm[is] + lr) is++; - } - is -= m_ofs; - } - } - } - -#endif - - //////////////////////////////////////////////////////////// - // search the MB of t - //////////////////////////////////////////////////////////// - - - ts = max(SBid(MBfirst(t)),SBid(s)+1); - il = SBid(SBfirst(t)-1); - dm = ds; - j = rmq_MB(b,ts,il,opt,&dm); - //if (dm < ds + lr) { - if (j >= 0) { - ds = dm; is = j; ys = 1; - } - - //////////////////////////////////////////////////////////// - // search the SB of t - //////////////////////////////////////////////////////////// - - i = SBfirst(t); - j = rmq_SB(b,i,t,opt,&dm); - d = depth(b,i) + dm; - if (opt & OPT_MAX) { - if (d + lr > ds) { - ds = d; is = j; ys = 0; - } - } else { - if (d < ds + lr) { - ds = d; is = j; ys = 0; - } - } - - //////////////////////////////////////////////////////////// - // search the rest - //////////////////////////////////////////////////////////// - - if (ys == 2) { - ss = SBid(is << logMB); - il = SBid(MBlast(is << logMB)); - if (opt & OPT_MAX) { - dm = -n-1; - } else { - dm = n+1; - } - j = rmq_MB(b,ss,il,opt,&dm); - ds = dm; is = j; ys = 1; - } - - if (ys == 1) { - ss = is << logSB; - il = SBlast(is << logSB); - is = rmq_SB(b,ss,il,opt,&dm); - //ds = depth(b,ss) + dm; ys = 0; - } - - return is; -} - diff --git a/darray.c b/darray.c deleted file mode 100644 index 890de45..0000000 --- a/darray.c +++ /dev/null @@ -1,682 +0,0 @@ -#include -#include -#include "darray.h" -#include "utils.h" - -//typedef unsigned char byte; -//typedef unsigned short word; -//typedef unsigned int dword; - -//typedef dword pb; -//#define logD 5 - -#define PBS (sizeof(pb)*8) -#define D (1<> logD; - l = i & (D-1); - B[j] &= (~(1<<(D-1-l))); - return 0; -} - -int setbit_one(pb *B, int i) -{ - int j,l; - j = i >> logD; - l = i & (D-1); - B[j] |= (1<<(D-1-l)); - return 1; - -} - - - -int setbits(pb *B, int i, int d, int x) -{ - int j; - - for (j=0; j>(d-j-1))&1); - } - return x; -} - -int getbit(pb *B, int i) -{ - int j,l; - - //j = i / D; - //l = i % D; - j = i >> logD; - l = i & (D-1); - return (B[j] >> (D-1-l)) & 1; -} - -dword getbits(pb *B, int i, int d) -{ - dword j,x; - - x = 0; - for (j=0; j>(k-1-j))); - } - //printf("getpattern(%d) = %d\n",i,x); - return x; -} - - -static const unsigned int popCount[] = { -0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4, -1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, -1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, -2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, -1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, -2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, -2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, -3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, -1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5, -2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, -2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, -3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, -2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6, -3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, -3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7, -4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8 -}; - -static int selecttbl[8*256]; - -void make_selecttbl(void) -{ - int i,x,r; - pb buf[1]; - - for (x = 0; x < 256; x++) { - setbits(buf,0,8,x); - for (r=0; r<8; r++) selecttbl[(r<<8)+x] = -1; - r = 0; - for (i=0; i<8; i++) { - if (getbit(buf,i)) { - selecttbl[(r<<8)+x] = i; - r++; - } - } - } -} - - -int darray_construct(darray *da, int n, pb *buf, int opt) -{ - int i,j,k,m; - int nl; - int p,pp; - int il,is,ml,ms; - int r,m2; - dword *s; - int p1,p2,p3,p4,s1,s2,s3,s4; - - da->idx_size = 0; - - make_selecttbl(); - - - if (L/LLL == 0) { - printf("ERROR: L=%d LLL=%d\n",L,LLL); - exit(1); - } - - m = 0; - for (i=0; in = n; - da->m = m; - //printf("n=%d m=%d\n",n,m); - - da->buf = buf; - - if (opt & (~OPT_NO_RANK)) { // construct select table -#if 0 - mymalloc(s,m,0); - m = 0; - for (i=0; ilp,nl+1,1); da->idx_size += (nl+1)*sizeof(*da->lp); - mymalloc(da->p,nl+1,1); da->idx_size += (nl+1)*sizeof(*da->p); -#if 0 - printf("lp table: %d bytes (%1.2f bpc)\n",(nl+1)*sizeof(*da->lp), (double)(nl+1)*sizeof(*da->lp) * 8/n); - printf("p table: %d bytes (%1.2f bpc)\n",(nl+1)*sizeof(*da->p), (double)(nl+1)*sizeof(*da->p) * 8/n); -#endif - - for (r = 0; r < 2; r++) { - s1 = s2 = s3 = s4 = 0; - p1 = p2 = p3 = p4 = -1; - - ml = ms = 0; - for (il = 0; il < nl; il++) { - //pp = s[il*L]; - while (s1 <= il*L) { - if (getbit(buf,p1+1)) s1++; - p1++; - } - pp = p1; - da->lp[il] = pp; - i = min((il+1)*L-1,m-1); - //p = s[i]; - while (s2 <= i) { - if (getbit(buf,p2+1)) s2++; - p2++; - } - p = p2; - //printf("%d ",p-pp); - if (p - pp >= LL) { - if (r == 1) { - for (is = 0; is < L; is++) { - if (il*L+is >= m) break; - //da->sl[ml*L+is] = s[il*L+is]; - while (s3 <= il*L+is) { - if (getbit(buf,p3+1)) s3++; - p3++; - } - da->sl[ml*L+is] = p3; - } - } - da->p[il] = -(ml+1); - ml++; - } else { - if (r == 1) { - for (is = 0; is < L/LLL; is++) { - if (il*L+is*LLL >= m) break; - while (s4 <= il*L+is*LLL) { - if (getbit(buf,p4+1)) s4++; - p4++; - } - //da->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp; - da->ss[ms*(L/LLL)+is] = p4 - pp; - } - } - da->p[il] = ms; - ms++; - } - } - if (r == 0) { - mymalloc(da->sl,ml*L+1,1); da->idx_size += (ml*L+1)*sizeof(*da->sl); - mymalloc(da->ss,ms*(L/LLL)+1,1); da->idx_size += (ms*(L/LLL)+1)*sizeof(*da->ss); -#if 0 - printf("sl table: %d bytes (%1.2f bpc)\n",(ml*L+1)*sizeof(*da->sl), (double)(ml*L+1)*sizeof(*da->sl) * 8/n); - printf("ss table: %d bytes (%1.2f bpc)\n",(ms*(L/LLL)+1)*sizeof(*da->ss), (double)(ms*(L/LLL)+1)*sizeof(*da->ss) * 8/n); -#endif - } - } - //free(s); - } else { // no select table - da->lp = NULL; - da->p = da->sl = NULL; - da->ss = NULL; - } - - // construct rank table - - if ((opt & OPT_NO_RANK) == 0) { - mymalloc(da->rl,n/R1+2,1); da->idx_size += (n/R1+2)*sizeof(*da->rl); - mymalloc(da->rm,n/RR+2,1); da->idx_size += (n/RR+2)*sizeof(*da->rm); - mymalloc(da->rs,n/RRR+2,1); da->idx_size += (n/RRR+2)*sizeof(*da->rs); -#if 0 - printf("rl table: %d bytes (%1.2f bpc)\n",(n/R1+2)*sizeof(*da->rl), (double)(n/R1+2)*sizeof(*da->rl) * 8/n); - printf("rm table: %d bytes (%1.2f bpc)\n",(n/RR+2)*sizeof(*da->rm), (double)(n/RR+2)*sizeof(*da->rm) * 8/n); - printf("rs table: %d bytes (%1.2f bpc)\n",(n/RRR+2)*sizeof(*da->rs), (double)(n/RRR+2)*sizeof(*da->rs) * 8/n); -#endif - r = 0; - for (k=0; k<=n; k+=R1) { - da->rl[k/R1] = r; - m2 = 0; - for (i=0; irm[(k+i)/RR] = m2; - m = 0; - for (j=0; jrs[(k+i+j)/RRR] = m; - m2 += m; - m = 0; - } - } - if (m != 0) { - printf("???\n"); - } - //m2 += m; - } - r += m2; - } - } - - return 0; -} - -// destroyDarray: frees the memory of darray -// Added by Diego Arroyuelo -void destroyDarray(darray *da) { - if (!da) return; // nothing to free - - if (da->buf) free(da->buf); - if (da->lp) free(da->lp); - if (da->sl) free(da->sl); - if (da->ss) free(da->ss); - if (da->p) free(da->p); - if (da->rl) free(da->rl); - if (da->rm) free(da->rm); - if (da->rs) free(da->rs); -} - -int darray_rank0(darray *da, int i) -{ - int r,j; - pb *p; - -#if (RRR == D*2) - r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; - p = da->buf + ((i>>logRRR)<<(logRRR-logD)); - j = i & (RRR-1); - if (j < D) r += popcount(*p >> (D-1-j)); - else r += popcount(*p) + popcount(p[1] >> (D-1-(j-D))); -#else - - j = i & (RRR-1); - if (j < RRR/2) { - r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; - p = da->buf + ((i>>logRRR)<<(logRRR-logD)); - while (j >= D) { - r += popcount(*p++); - j -= D; - } - r += popcount(*p >> (D-1-j)); - } else { - j = RRR-1 - (i & (RRR-1)); - i += j+1; - r = da->rl[i>>logR] + da->rm[i>>logRR] + da->rs[i>>logRRR]; - p = da->buf + ((i>>logRRR)<<(logRRR-logD)); - while (j >= D) { - r -= popcount(*--p); - j -= D; - } - if (j > 0) r -= popcount(*--p << (D-j)); - } - -#endif - - return r; -} - -int darray_rank(darray *da, int i) -{ - int r,j,i_rr, i_rrr; - pb *p; - i_rr = i >> logRR; - i_rrr = i >> logRRR; - r = da->rl[i>>logR] + da->rm[i_rr]; - - j = (i_rrr) & (RR/RRR-1); - while (j > 0) { - r += da->rs[((i_rr)<<(logRR-logRRR))+j-1]; - j--; - } - - p = da->buf + ((i_rrr)<<(logRRR-logD)); - j = i & (RRR-1); - while (j >= D) { - r += popcount(*p++); - j -= D; - } - r += popcount(*p >> (D-1-j)); - - return r; -} - -int darray_select_bsearch(darray *da, int i, pb (*getpat)(pb *)) -{ - int j; - int l,r,m,n; - int s; - int t,x,rr; - pb *p,w; - - // for debug - //s = darray_select(da,i,1); - // - //printf("select(%d)=%d\n",i,s); - - - - if (i == 0) return -1; - - if (i > da->m) { - return -1; - } - - i--; - - - - n = da->m; - - t = i; - - l = 0; r = (n-1)>>logR; - // find the smallest index x s.t. rl[x] >= t - while (l < r) { - m = (l+r)/2; - //printf("m=%d rl[m+1]=%d t=%d\n",m,da->rl[m+1],t); - if (da->rl[m+1] > t) { // m+1 is out of range - r = m; // new r = m >= l - } else { - l = m+1; // new l = m+1 <= r - } - } - x = l; - t -= da->rl[x]; - - x <<= logR; - - l = x >> logRR; r = (min(x+R1-1,n))>>logRR; - while (l < r) { - m = (l+r)/2; - //printf("m=%d rm[m+1]=%d t=%d\n",m,da->rm[m+1],t); - if (da->rm[m+1] > t) { // m+1 is out of range - r = m; - } else { - l = m+1; // new l = m+1 <= r - } - } - x = l; - t -= da->rm[x]; - - x <<= logRR; - -#if 0 - l = x >> logRRR; r = (min(x+RR-1,n))>>logRRR; - while (l < r) { - m = (l+r)/2; - //printf("m=%d rs[m+1]=%d t=%d\n",m,da->rs[m+1],t); - if (da->rs[m+1] > t) { // m+1 is out of range - r = m; - } else { - l = m+1; // new l = m+1 <= r - } - } - x = l; - t -= da->rs[x]; -#else - l = x >> logRRR; - while (t > da->rs[l]) { - t -= da->rs[l]; - l++; - } - x = l; -#endif - - x <<= logRRR; - - p = &da->buf[x >> logD]; - while (1) { - m = popcount(getpat(p)); - if (m > t) break; - t -= m; - x += D; - p++; - } - - w = getpat(p); - while (1) { - rr = popCount[w >> (D-8)]; - if (rr > t) break; - t -= rr; - x += 8; - w <<= 8; - } - x += selecttbl[((t-0)<<8)+(w>>(D-8))]; - -#if 0 - if (x != s) { - printf("error x=%d s=%d\n",x,s); - } -#endif - return x; -} - -int darray_pat_rank(darray *da, int i, pb (*getpat)(pb *)) -{ - int r,j; - pb *p; - - r = da->rl[i>>logR] + da->rm[i>>logRR]; - j = (i>>logRRR) & (RR/RRR-1); - while (j > 0) { - r += da->rs[((i>>logRR)<<(logRR-logRRR))+j-1]; - j--; - } - - p = da->buf + ((i>>logRRR)<<(logRRR-logD)); - j = i & (RRR-1); - while (j >= D) { - r += popcount(getpat(p)); - p++; - j -= D; - } - r += popcount(getpat(p) >> (D-1-j)); - - return r; -} - - -int darray_select(darray *da, int i,int f) -{ - int p,r; - int il; - int rr; - pb x; - pb *q; - - if (i == 0) return -1; - - if (i > da->m) { - return -1; - //printf("ERROR: m=%d i=%d\n",da->m,i); - //exit(1); - } - - i--; - - il = da->p[i>>logL]; - if (il < 0) { - il = -il-1; - p = da->sl[(il<lp[i>>logL]; - p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; - r = i - (i & (LLL-1)); - - q = &(da->buf[p>>logD]); - - if (f == 1) { - rr = p & (D-1); - r -= popcount(*q >> (D-1-rr)); - p = p - rr; - - while (1) { - rr = popcount(*q); - if (r + rr >= i) break; - r += rr; - p += D; - q++; - } - - x = *q; - while (1) { - //rr = popcount(x >> (D-8)); - //rr = popcount(x >> (D-8)); - rr = popcount8(x >> (D-8)); - if (r + rr >= i) break; - r += rr; - p += 8; - x <<= 8; - } - p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; - } else { - rr = p & (D-1); - r -= popcount((~(*q)) >> (D-1-rr)); - p = p - rr; - - while (1) { - rr = popcount(~(*q)); - if (r + rr >= i) break; - r += rr; - p += D; - q++; - } - - x = ~(*q); - - while (1) { - //rr = popcount(x >> (D-8)); - //rr = popCount[x >> (D-8)]; - rr = popcount8(x >> (D-8)); - if (r + rr >= i) break; - r += rr; - p += 8; - x <<= 8; - } - p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; - } - } - return p; -} - -int darray_pat_select(darray *da, int i, pb (*getpat)(pb *)) -{ - int p,r; - int il; - int rr; - pb x; - pb *q; - - if (i == 0) return -1; - - if (i > da->m) { - return -1; - //printf("ERROR: m=%d i=%d\n",da->m,i); - //exit(1); - } - - i--; - - il = da->p[i>>logL]; - if (il < 0) { - il = -il-1; - p = da->sl[(il<lp[i>>logL]; - p += da->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; - r = i - (i & (LLL-1)); - - q = &(da->buf[p>>logD]); - - rr = p & (D-1); - r -= popcount(getpat(q) >> (D-1-rr)); - p = p - rr; - - while (1) { - rr = popcount(getpat(q)); - if (r + rr >= i) break; - r += rr; - p += D; - q++; - } - - x = getpat(q); - while (1) { - //rr = popcount(x >> (D-8)); - //rr = popCount[x >> (D-8)]; - rr = popcount8(x >> (D-8)); - if (r + rr >= i) break; - r += rr; - p += 8; - x <<= 8; - } - p += selecttbl[((i-r-1)<<8)+(x>>(D-8))]; - } - return p; -} - -int darray_pat_construct(darray *da, int n, pb *buf, int k, pb pat, int opt) -{ - int i; - pb *b; - mymalloc(b,(n+D-1)/D,0); - - for (i=0; ibuf = buf; - - free(b); - - return 0; -} diff --git a/darray.h b/darray.h deleted file mode 100644 index 4c5aaa3..0000000 --- a/darray.h +++ /dev/null @@ -1,67 +0,0 @@ -#ifndef DARRAY_H_ -#define DARRAY_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -typedef unsigned char byte; -typedef unsigned short word; -typedef unsigned int dword; - -#define OPT_NO_RANK (1<<30) - - -typedef dword pb; - -#define logD 5 -#define D (1<>1) + (r & 0x55); - r = ((r & 0xcc)>>2) + (r & 0x33); - r = ((r>>4) + r) & 0x0f; - return r; -} - -static inline unsigned int -popcount(pb x) -{ - uint m1 = 0x55555555; - uint m2 = 0xc30c30c3; - x -= (x >> 1) & m1; - x = (x & m2) + ((x >> 2) & m2) + ((x >> 4) & m2); - x += x >> 6; - return (x + (x >> 12) + (x >> 24)) & 0x3f; -} - -#endif - -#ifdef __cplusplus - } -#endif - - -#endif