From bbeeb03980ddf51c0f32fd3c42321dc9ef708c27 Mon Sep 17 00:00:00 2001 From: fclaude Date: Sat, 21 Mar 2009 17:09:30 +0000 Subject: [PATCH] New rank/select data structure based on sadakane's code git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@260 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- libcds/src/Makefile | 6 +- libcds/src/static_bitsequence/sdarray.cpp | 705 ++++++++++++++++++ libcds/src/static_bitsequence/sdarray.h | 48 ++ .../static_bitsequence/static_bitsequence.cpp | 14 +- .../static_bitsequence/static_bitsequence.h | 2 + .../static_bitsequence_builder.h | 1 + .../static_bitsequence_builder_sdarray.cpp | 7 + .../static_bitsequence_builder_sdarray.h | 36 + .../static_bitsequence_rrr02_light.cpp | 2 - .../static_bitsequence_sdarray.cpp | 71 ++ .../static_bitsequence_sdarray.h | 26 + .../src/static_sequence/static_sequence.cpp | 1 + libcds/src/static_sequence/static_sequence.h | 2 + .../static_sequence/static_sequence_bs.cpp | 105 +++ .../src/static_sequence/static_sequence_bs.h | 64 ++ .../src/static_sequence/wt_node_internal.cpp | 8 +- libcds/src/static_sequence/wt_node_leaf.cpp | 2 +- libcds/tests/Makefile | 10 +- libcds/tests/static_sequence_bs_test.cpp | 76 ++ 19 files changed, 1168 insertions(+), 18 deletions(-) create mode 100644 libcds/src/static_bitsequence/sdarray.cpp create mode 100644 libcds/src/static_bitsequence/sdarray.h create mode 100644 libcds/src/static_bitsequence/static_bitsequence_builder_sdarray.cpp create mode 100644 libcds/src/static_bitsequence/static_bitsequence_builder_sdarray.h create mode 100644 libcds/src/static_bitsequence/static_bitsequence_sdarray.cpp create mode 100644 libcds/src/static_bitsequence/static_bitsequence_sdarray.h create mode 100644 libcds/src/static_sequence/static_sequence_bs.cpp create mode 100644 libcds/src/static_sequence/static_sequence_bs.h create mode 100644 libcds/tests/static_sequence_bs_test.cpp diff --git a/libcds/src/Makefile b/libcds/src/Makefile index 0f7549b..d7d6ea5 100644 --- a/libcds/src/Makefile +++ b/libcds/src/Makefile @@ -1,7 +1,7 @@ CPP=g++ #CPPFLAGS=-g3 -Wall -CPPFLAGS=-O9 -w -DNDEBUG +CPPFLAGS=-O9 -Wall -DNDEBUG INCL=-I../includes/ @@ -12,10 +12,10 @@ STATIC_PERMUTATION_DIR=static_permutation STATIC_PERMUTATION_OBJECTS=$(STATIC_PERMUTATION_DIR)/perm.o $(STATIC_PERMUTATION_DIR)/static_permutation.o $(STATIC_PERMUTATION_DIR)/static_permutation_mrrr.o $(STATIC_PERMUTATION_DIR)/static_permutation_builder_mrrr.o STATIC_BITSEQUENCE_DIR=static_bitsequence -STATIC_BITSEQUENCE_OBJECTS=$(STATIC_BITSEQUENCE_DIR)/static_bitsequence.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_naive.o $(STATIC_BITSEQUENCE_DIR)/table_offset.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02_light.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02_light.o +STATIC_BITSEQUENCE_OBJECTS=$(STATIC_BITSEQUENCE_DIR)/static_bitsequence.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_naive.o $(STATIC_BITSEQUENCE_DIR)/table_offset.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02_light.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02_light.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_sdarray.o $(STATIC_BITSEQUENCE_DIR)/sdarray.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_sdarray.o STATIC_SEQUENCE_DIR=static_sequence -STATIC_SEQUENCE_OBJECTS=$(STATIC_SEQUENCE_DIR)/static_sequence.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree.o $(STATIC_SEQUENCE_DIR)/wt_coder_binary.o $(STATIC_SEQUENCE_DIR)/wt_coder_huff.o $(STATIC_SEQUENCE_DIR)/wt_node_internal.o $(STATIC_SEQUENCE_DIR)/wt_node_leaf.o $(STATIC_SEQUENCE_DIR)/wt_coder.o $(STATIC_SEQUENCE_DIR)/wt_node.o $(STATIC_SEQUENCE_DIR)/static_sequence_gmr_chunk.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_gmr_chunk.o $(STATIC_SEQUENCE_DIR)/static_sequence_gmr.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_wvtree.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_gmr.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree_noptrs.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_wvtree_noptrs.o +STATIC_SEQUENCE_OBJECTS=$(STATIC_SEQUENCE_DIR)/static_sequence.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree.o $(STATIC_SEQUENCE_DIR)/wt_coder_binary.o $(STATIC_SEQUENCE_DIR)/wt_coder_huff.o $(STATIC_SEQUENCE_DIR)/wt_node_internal.o $(STATIC_SEQUENCE_DIR)/wt_node_leaf.o $(STATIC_SEQUENCE_DIR)/wt_coder.o $(STATIC_SEQUENCE_DIR)/wt_node.o $(STATIC_SEQUENCE_DIR)/static_sequence_gmr_chunk.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_gmr_chunk.o $(STATIC_SEQUENCE_DIR)/static_sequence_gmr.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_wvtree.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_gmr.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree_noptrs.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_wvtree_noptrs.o $(STATIC_SEQUENCE_DIR)/static_sequence_bs.o UTILS_DIR=utils UTILS_OBJECTS=$(UTILS_DIR)/alphabet_mapper_none.o $(UTILS_DIR)/alphabet_mapper.o $(UTILS_DIR)/alphabet_mapper_cont.o diff --git a/libcds/src/static_bitsequence/sdarray.cpp b/libcds/src/static_bitsequence/sdarray.cpp new file mode 100644 index 0000000..dc01b89 --- /dev/null +++ b/libcds/src/static_bitsequence/sdarray.cpp @@ -0,0 +1,705 @@ + +#include + +#if 0 +typedef unsigned int qword; +#define logD 4 +#else +typedef unsigned long long qword; +#define logD 5 +#endif +#define PBS (sizeof(uint)*8) +#define D (1<0) { + x>>=1; + l++; + } + return l; +} + + +int setbit(uint *B, int i,int x) { + int j,l; +//printf("%u\n",D); + j = i / D; + l = i % D; + if (x==0) B[j] &= (~(1<<(D-1-l))); + else if (x==1) B[j] |= (1<<(D-1-l)); + else { + printf("error setbit x=%d\n",x); + exit(1); + } + return x; +} + + +int setbit2(uchar *B, int i,int x) { + int j,l; + + j = i / 8; + l = i % 8; + if (x==0) B[j] &= (~(1<<(8-1-l))); + else if (x==1) B[j] |= (1<<(8-1-l)); + else { + printf("error setbit2 x=%d\n",x); + exit(1); + } + return x; +} + + +int setbits(uint *B, int i, int d, int x) { + int j; + + for (j=0; j>(d-j-1))&1); + } + return x; +} + + +int getbit(uint *B, int i) { + int j,l; + + //j = i / D; + //l = i % D; + j = i >> logD; + l = i & (D-1); + return (B[j] >> (D-1-l)) & 1; +} + + +int getbit2(uchar *B, int i) { + int j,l; + + //j = i / D; + //l = i % D; + j = i >> 3; + l = i & (8-1); + return (B[j] >> (8-1-l)) & 1; +} + + +#if 1 +uint getbits(uint *B, int i, int d) { + qword x,z; + + B += (i >> logD); + i &= (D-1); + if (i+d <= 2*D) { + x = (((qword)B[0]) << D) + B[1]; + x <<= i; + x >>= (D*2-1-d); + x >>= 1; + } + else { + x = (((qword)B[0])<>= D; + x += z; + x >>= (2*D-d); + } + + return x; +} +#endif + +#if 0 +uint getbits(uint *B, int i, int d) { + uint j,x; + + x = 0; + for (j=0; j>1) & 0x77777777) - ((r>>2) & 0x33333333) - ((r>>3) & 0x11111111); + r = ((r + (r>>4)) & 0x0f0f0f0f) % 0xff; + #elif 1 + r = x; + r = ((r & 0xaaaaaaaa)>>1) + (r & 0x55555555); + r = ((r & 0xcccccccc)>>2) + (r & 0x33333333); + //r = ((r & 0xf0f0f0f0)>>4) + (r & 0x0f0f0f0f); + r = ((r>>4) + r) & 0x0f0f0f0f; + //r = ((r & 0xff00ff00)>>8) + (r & 0x00ff00ff); + r = (r>>8) + r; + //r = ((r & 0xffff0000)>>16) + (r & 0x0000ffff); + r = ((r>>16) + r) & 63; + #else + r = popCount[x & 0xff]; + x >>= 8; + r += popCount[x & 0xff]; + x >>= 8; + r += popCount[x & 0xff]; + x >>= 8; + r += popCount[x & 0xff]; + #endif + return r; +} + + +unsigned int popcount8(uint x) { + uint r; + #if 1 + r = x; + r = ((r & 0xaa)>>1) + (r & 0x55); + r = ((r & 0xcc)>>2) + (r & 0x33); + r = ((r>>4) + r) & 0x0f; + #else + r = popCount[x & 0xff]; + #endif + return r; +} + +int selectd2_save(selectd2 * s, FILE * fp) { + uint wr = 0; + wr += fwrite(&s->n,sizeof(uint),1,fp); + wr += fwrite(&s->m,sizeof(uint),1,fp); + wr += fwrite(&s->size,sizeof(uint),1,fp); + wr += fwrite(&s->ss_len,sizeof(uint),1,fp); + wr += fwrite(&s->sl_len,sizeof(uint),1,fp); + wr += fwrite(s->buf,sizeof(uchar),(s->n+7)/8+1,fp); + uint nl = (s->m-1) / L + 1; + wr += fwrite(s->lp,sizeof(uint),nl+1,fp); + wr += fwrite(s->p,sizeof(uint),nl+1,fp); + wr += fwrite(s->ss,sizeof(ushort),s->ss_len,fp); + wr += fwrite(s->sl,sizeof(uint),s->sl_len,fp); + if(wr!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5) + return 1; + return 0; +} + +int selectd2_load(selectd2 * s, FILE * fp) { + uint rd = 0; + rd += fread(&s->n,sizeof(uint),1,fp); + rd += fread(&s->m,sizeof(uint),1,fp); + rd += fread(&s->size,sizeof(uint),1,fp); + rd += fread(&s->ss_len,sizeof(uint),1,fp); + rd += fread(&s->sl_len,sizeof(uint),1,fp); + s->buf = new uchar[(s->n+7)/8+1]; + rd += fread(s->buf,sizeof(uchar),(s->n+7)/8+1,fp); + uint nl = (s->m-1) / L + 1; + s->lp = new uint[nl+1]; + rd += fread(s->lp,sizeof(uint),nl+1,fp); + s->p = new uint[nl+1]; + rd += fread(s->p,sizeof(uint),nl+1,fp); + s->ss = new ushort[s->ss_len]; + rd += fread(s->ss,sizeof(ushort),s->ss_len,fp); + s->sl = new uint[s->sl_len]; + rd += fread(s->sl,sizeof(uint),s->sl_len,fp); + if(rd!=s->sl_len+s->ss_len+2*(nl+1)+(s->n+7)/8+1+5) + return 1; + return 0; +} + +void selectd2_free(selectd2 * s) { + //delete [] s->buf; + delete [] s->lp; + delete [] s->p; + delete [] s->ss; + delete [] s->sl; +} + +int selectd2_construct(selectd2 *select, int n, uchar *buf) { + int i,m; + int nl; + int p,pp; + int il,is,ml,ms; + int r; + uint *s; + + make_selecttbl(); + + if (L/LLL == 0) { + printf("ERROR: L=%d LLL=%d\n",L,LLL); + exit(1); + } + + m = 0; + for (i=0; in = n; + select->m = m; + //printf("n=%d m=%d\n",n,m); + + select->buf = buf; + + s = new uint[m]; + m = 0; + for (i=0; isize = (n+7)/8; + select->lp = new uint[nl+1]; + for(int k=0;klp[k]=0; + select->size += (nl+1)*sizeof(uint); + select->p = new uint[nl+1]; + for(int k=0;kp[k]=0; + select->size += (nl+1)*sizeof(uint); + + for (r = 0; r < 2; r++) { + ml = ms = 0; + for (il = 0; il < nl; il++) { + pp = s[il*L]; + select->lp[il] = pp; + i = min((il+1)*L-1,m-1); + p = s[i]; + //printf("%d ",p-pp); + if (p - pp >= LL) { + if (r == 1) { + for (is = 0; is < L; is++) { + if (il*L+is >= m) break; + select->sl[ml*L+is] = s[il*L+is]; + } + } + select->p[il] = -((ml<= m) break; + select->ss[ms*(L/LLL)+is] = s[il*L+is*LLL] - pp; + } + } + select->p[il] = ms << (logL-logLLL); + ms++; + } + } + if (r == 0) { + select->sl = new uint[ml*L+1]; + for(int k=0;ksl[k]=0; + select->size += sizeof(uint)*(ml*L+1); + select->sl_len = ml*L+1; + select->ss = new ushort[ms*(L/LLL)+1]; + for(int k=0;kss[k]=0; + select->ss_len = ms*(L/LLL)+1; + select->size += sizeof(ushort)*(ms*(L/LLL)+1); + } + } + delete [] s; + return 0; +} + + +int selectd2_select(selectd2 *select, int i,int f) { + int p,r; + int il; + int rr; + uchar *q; + + if (i == 0) return -1; + + #if 0 + if (i > select->m) { + printf("ERROR: m=%d i=%d\n",select->m,i); + exit(1); + } + #endif + + i--; + + il = select->p[i>>logL]; + if (il < 0) { + il = -il-1; + //p = select->sl[(il<sl[il+(i & (L-1))]; + } + else { + p = select->lp[i>>logL]; + //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + p += select->ss[il+((i & (L-1))>>logLLL)]; + r = i - (i & (LLL-1)); + + q = &(select->buf[p>>3]); + + if (f == 1) { + rr = p & (8-1); + r -= popCount[*q >> (8-1-rr)]; + //p = p - rr; + + while (1) { + rr = popCount[*q]; + if (r + rr >= i) break; + r += rr; + //p += 8; + q++; + } + p = (q - select->buf) << 3; + p += selecttbl[((i-r-1)<<8)+(*q)]; + } + else { + rr = p & (8-1); + r -= popCount[(*q ^ 0xff) >> (8-1-rr)]; + //p = p - rr; + + while (1) { + rr = popCount[*q ^ 0xff]; + if (r + rr >= i) break; + r += rr; + //p += 8; + q++; + } + p = (q - select->buf) << 3; + p += selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + } + } + return p; +} + + +int selectd2_select2(selectd2 *select, int i,int f, int *st, int *en) { + int p,r,p2; + int il; + int rr; + uchar *q; + + if (i == 0) { + *st = -1; + return -1; + } + + #if 0 + if (i > select->m) { + printf("ERROR: m=%d i=%d\n",select->m,i); + exit(1); + } + #endif + + i--; + + il = select->p[i>>logL]; + if (il < 0) { + il = -il-1; + //p = select->sl[(il<sl[il+(i & (L-1))]; + + if ((i>>logL) == ((i+1)>>logL)) { + p2 = select->sl[il+((i+1) & (L-1))]; + } + else { + p2 = selectd2_select(select,i+2,f); + } + } + else { + p = select->lp[i>>logL]; + //p += select->ss[(il<<(logL-logLLL))+(i & (L-1))/LLL]; + p += select->ss[il+((i & (L-1))>>logLLL)]; + r = i - (i & (LLL-1)); + + q = &(select->buf[p>>3]); + + if (f == 1) { + rr = p & (8-1); + r -= popCount[*q >> (8-1-rr)]; + //p = p - rr; + + while (1) { + rr = popCount[*q]; + if (r + rr >= i) break; + r += rr; + //p += 8; + q++; + } + p = (q - select->buf) << 3; + p += selecttbl[((i-r-1)<<8)+(*q)]; + + if ((i>>logL) == ((i+1)>>logL)) { + i++; + while (1) { + rr = popCount[*q]; + if (r + rr >= i) break; + r += rr; + q++; + } + p2 = (q - select->buf) << 3; + p2 += selecttbl[((i-r-1)<<8)+(*q)]; + } + else { + p2 = selectd2_select(select,i+2,f); + } + + } + else { + rr = p & (8-1); + r -= popCount[(*q ^ 0xff) >> (8-1-rr)]; + //p = p - rr; + + while (1) { + rr = popCount[*q ^ 0xff]; + if (r + rr >= i) break; + r += rr; + //p += 8; + q++; + } + p = (q - select->buf) << 3; + p += selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + + if ((i>>logL) == ((i+1)>>logL)) { + i++; + while (1) { + rr = popCount[*q ^ 0xff]; + if (r + rr >= i) break; + r += rr; + q++; + } + p2 = (q - select->buf) << 3; + p2 += selecttbl[((i-r-1)<<8)+(*q ^ 0xff)]; + } + else { + p2 = selectd2_select(select,i+2,f); + } + } + } + *st = p; + *en = p2; + return p; +} + + +int selects3_save(selects3 * s, FILE * fp) { + uint wr = 0; + wr += fwrite(&s->n,sizeof(uint),1,fp); + wr += fwrite(&s->m,sizeof(uint),1,fp); + wr += fwrite(&s->size,sizeof(uint),1,fp); + wr += fwrite(&s->d,sizeof(uint),1,fp); + wr += fwrite(&s->hi_len,sizeof(uint),1,fp); + wr += fwrite(&s->low_len,sizeof(uint),1,fp); + wr += fwrite(s->hi,sizeof(uchar),s->hi_len,fp); + wr += fwrite(s->low,sizeof(uint),s->low_len,fp); + if(wr!=(6+s->hi_len+s->low_len)) + return 1; + if(selectd2_save(s->sd0,fp)) return 2; + if(selectd2_save(s->sd1,fp)) return 3; + return 0; +} + +int selects3_load(selects3 * s, FILE * fp) { + uint rd = 0; + rd += fread(&s->n,sizeof(uint),1,fp); + rd += fread(&s->m,sizeof(uint),1,fp); + rd += fread(&s->size,sizeof(uint),1,fp); + rd += fread(&s->d,sizeof(uint),1,fp); + rd += fread(&s->hi_len,sizeof(uint),1,fp); + rd += fread(&s->low_len,sizeof(uint),1,fp); + s->hi = new uchar[s->hi_len]; + rd += fread(s->hi,sizeof(uchar),s->hi_len,fp); + s->low = new uint[s->low_len]; + rd += fread(s->low,sizeof(uint),s->low_len,fp); + if(rd!=(6+s->hi_len+s->low_len)) + return 1; + s->sd0 = new selectd2; + if(selectd2_load(s->sd0,fp)) return 2; + s->sd1 = new selectd2; + if(selectd2_load(s->sd1,fp)) return 3; + delete [] s->sd0->buf; + delete [] s->sd1->buf; + s->sd0->buf = s->hi; + s->sd1->buf = s->hi; + return 0; +} + +void selects3_free(selects3 * s) { + delete [] s->hi; + delete [] s->low; + //delete [] s->sd0->buf; + selectd2_free(s->sd0); + delete s->sd0; + selectd2_free(s->sd1); + delete s->sd1; +} + +int selects3_construct(selects3 *select, int n, uint *buf) { + int i,m; + int d,mm; + uint *low; + uchar *buf2; + selectd2 *sd0,*sd1; + + m = 0; + for (i=0; in = n; + select->m = m; + + if (m == 0) return 0; + + mm = m; + d = 0; + while (mm < n) { + mm <<= 1; + d++; + } + + select->d = d; + + buf2 = new uchar[(2*m+8-1)/8+1]; + for(int k=0;k<(2*m+8-1)/8+1;k++) buf2[k]=0; + select->hi_len = (2*m+8-1)/8+1; + low = new uint[(d*m+PBS-1)/PBS+1]; + for(uint k=0;k<(d*m+PBS-1)/PBS+1;k++) low[k]=0; + select->low_len = (d*m+PBS-1)/PBS+1; + + select->hi = buf2; + select->low = low; + select->size = sizeof(uchar)*((2*m+8-1)/8+1) + sizeof(uint)*((d*m+PBS-1)/PBS+1); + + for (i=0; i>d)+m,1); + setbits(low,m*d,d,i & ((1<size += 2*sizeof(selectd2); + + selectd2_construct(sd1,m*2,buf2); + select->sd1 = sd1; + + for (i=0; isd0 = sd0; + + for (i=0; i select->m) { + printf("ERROR: m=%d i=%d\n",select->m,i); + exit(1); + } + #endif + + if (i == 0) return -1; + + d = select->d; + + x = selectd2_select(select->sd1,i,1) - (i-1); + x <<= d; + x += getbits(select->low,(i-1)*d,d); + return x; + +} + +int selects3_rank(selects3 *select, int i) { + int d,x,w,y; + int r,j; + int z,ii; + uint *q; + + d = select->d; + q = select->low; + + ii = i>>d; + y = selectd2_select(select->sd0,ii,0)+1; + // selectd2_select2(select->sd0,ii,0,&y1,&y2); + //y1++; y2++; + //printf("y %d y1 %d %d\n",y,y1,y2-y1); + + x = y - ii; + + j = i - (ii<>= 3; + z = select->hi[y]; + while (1) { + if (((z << r) & 0x80) == 0) break; + w = getbits(q,x*d,d); + if (w >= j) { + if (w == j) x++; + break; + } + x++; + r++; + if (r == 8) { + r = 0; + y++; + z = select->hi[y]; + } + } + + return x; +} diff --git a/libcds/src/static_bitsequence/sdarray.h b/libcds/src/static_bitsequence/sdarray.h new file mode 100644 index 0000000..a6d28b6 --- /dev/null +++ b/libcds/src/static_bitsequence/sdarray.h @@ -0,0 +1,48 @@ + +#ifndef SDARRAY_H +#define SDARRAY_H + +#include +#include +#include +#include +#include + +typedef struct { + int n,m; + int size; + uchar *buf; + uint *lp; + uint *sl; + ushort *ss; + uint ss_len, sl_len; + uint *p; +} selectd2; + +typedef struct { + int n,m,d; + int size; + uchar *hi; + uint *low; + selectd2 *sd0,*sd1; + uint hi_len, low_len; + +} selects3; + +int selects3_construct(selects3 *select, int n, uint *buf); +int selects3_select(selects3 *select, int i); +int selects3_rank(selects3 *select, int i); + +int setbit(uint *B, int i,int x); +int selectd2_save(selectd2 * s, FILE * fp); +int selects3_save(selects3 * s, FILE * fp); + +int selectd2_load(selectd2 * s, FILE * fp); +int selects3_load(selects3 * s, FILE * fp); + +void selectd2_free(selectd2 * s); +void selects3_free(selects3 * s); + + +#endif + diff --git a/libcds/src/static_bitsequence/static_bitsequence.cpp b/libcds/src/static_bitsequence/static_bitsequence.cpp index 7813d8c..e4740ea 100644 --- a/libcds/src/static_bitsequence/static_bitsequence.cpp +++ b/libcds/src/static_bitsequence/static_bitsequence.cpp @@ -26,8 +26,9 @@ uint static_bitsequence::rank0(uint i) { } uint static_bitsequence::rank1(uint i) { - if(i>=len) return ones; - if(ones==0) return -1; + if(i>=len) return (uint)-1; + if(ones==0) return 0; + if(ones==len) return i+1; uint ini = 1; uint fin = ones; while(inilen-ones) return len; + if(i>len-ones) return -1; if(i==0) return -1; + if(ones==0) return i-1; uint ini = 0; uint fin = len-1; while(iniones) return len; - if(i==0) return 0; + if(i>ones) return -1; + if(i==0) return -1; + if(ones==len) return i-1; uint ini = 0; uint fin = len-1; while(ini #include @@ -90,5 +91,6 @@ protected: #include #include #include +#include #endif /* _STATIC_BITSEQUENCE_H */ diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder.h b/libcds/src/static_bitsequence/static_bitsequence_builder.h index 877dd2c..72135a6 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_builder.h +++ b/libcds/src/static_bitsequence/static_bitsequence_builder.h @@ -32,5 +32,6 @@ class static_bitsequence_builder { #include #include #include +#include #endif /* _STATIC_BITSEQUENCE_BUILDER_H */ diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder_sdarray.cpp b/libcds/src/static_bitsequence/static_bitsequence_builder_sdarray.cpp new file mode 100644 index 0000000..da9b233 --- /dev/null +++ b/libcds/src/static_bitsequence/static_bitsequence_builder_sdarray.cpp @@ -0,0 +1,7 @@ + +#include + +static_bitsequence * static_bitsequence_builder_sdarray::build(uint * buff, uint len) { + return new static_bitsequence_sdarray(buff,len); +} + diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder_sdarray.h b/libcds/src/static_bitsequence/static_bitsequence_builder_sdarray.h new file mode 100644 index 0000000..f16da32 --- /dev/null +++ b/libcds/src/static_bitsequence/static_bitsequence_builder_sdarray.h @@ -0,0 +1,36 @@ +/* static_bitsequence_builder.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_BITSEQUENCE_BUILDER_SDARRAY_H +#define _STATIC_BITSEQUENCE_BUILDER_SDARRAY_H + +#include +#include + +class static_bitsequence_builder_sdarray : public static_bitsequence_builder { + public: + static_bitsequence_builder_sdarray() {} + virtual ~static_bitsequence_builder_sdarray() {} + /** Builds a static_bitsequence for the bitmap bitsequence of length len */ + virtual static_bitsequence * build(uint * bitsequence, uint len); +}; + +#endif /* _STATIC_BITSEQUENCE_BUILDER_H */ diff --git a/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.cpp b/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.cpp index 746f5cf..f073e69 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.cpp +++ b/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.cpp @@ -119,13 +119,11 @@ void static_bitsequence_rrr02_light::create_sampling(uint sample_rate) { } bool static_bitsequence_rrr02_light::access(uint i) { - uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); uint C_field_bits = bits(BLOCK_SIZE_LIGHT); uint O_pos_field_bits = bits(O_bits_len); uint nearest_sampled_value = i/BLOCK_SIZE_LIGHT/sample_rate; uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value); uint pos = i/BLOCK_SIZE_LIGHT; - assert(pos<=C_len); for(uint k=nearest_sampled_value*sample_rate;kget_log2binomial(BLOCK_SIZE_LIGHT,aux); diff --git a/libcds/src/static_bitsequence/static_bitsequence_sdarray.cpp b/libcds/src/static_bitsequence/static_bitsequence_sdarray.cpp new file mode 100644 index 0000000..e50e074 --- /dev/null +++ b/libcds/src/static_bitsequence/static_bitsequence_sdarray.cpp @@ -0,0 +1,71 @@ + +#include + +static_bitsequence_sdarray::static_bitsequence_sdarray(uint * buff, uint len) { + uint * tmp_seq = new uint[uint_len(len,1)+1]; + ones = 0; + for(uint i=0;ilen = len; + //this->ones = sd.m; + delete [] tmp_seq; +} + +static_bitsequence_sdarray::static_bitsequence_sdarray() {} + +static_bitsequence_sdarray::~static_bitsequence_sdarray() { + if(ones) + selects3_free(&sd); +} + +uint static_bitsequence_sdarray::rank1(uint i) { + if(i>len) return -1; + if(ones) + return selects3_rank(&sd,i); + else + return 0; +} + +uint static_bitsequence_sdarray::select1(uint i) { + if(i>ones) return -1; + if(ones) + return selects3_select(&sd,i); + else + return (uint)-1; +} + +uint static_bitsequence_sdarray::size() { + return sizeof(static_bitsequence_sdarray)+(ones?(sd.size + sd.sd0->size + sd.sd1->size):0); +} + +int static_bitsequence_sdarray::save(FILE * fp) { + uint wr = SDARRAY_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&ones,sizeof(uint),1,fp); + if(wr!=3 || (ones?(selects3_save(&sd,fp)):false)) + return 1; + return 0; +} + +static_bitsequence_sdarray * static_bitsequence_sdarray::load(FILE * fp) { + uint id; + if(fread(&id,sizeof(uint),1,fp)!=1) return NULL; + if(id!=SDARRAY_HDR) return NULL; + static_bitsequence_sdarray * ret = new static_bitsequence_sdarray(); + id = fread(&ret->len,sizeof(uint),1,fp); + id += fread(&ret->ones,sizeof(uint),1,fp); + if(ret->ones && selects3_load(&ret->sd,fp)) { + delete ret; + return NULL; + } + return ret; +} + diff --git a/libcds/src/static_bitsequence/static_bitsequence_sdarray.h b/libcds/src/static_bitsequence/static_bitsequence_sdarray.h new file mode 100644 index 0000000..6dc23d5 --- /dev/null +++ b/libcds/src/static_bitsequence/static_bitsequence_sdarray.h @@ -0,0 +1,26 @@ + +#ifndef _STATIC_BITSEQUENCE_SDARRAY_H +#define _STATIC_BITSEQUENCE_SDARRAY_H + +#include +#include +#include + +class static_bitsequence_sdarray: public static_bitsequence { + public: + static_bitsequence_sdarray(uint * buff, uint len); + virtual ~static_bitsequence_sdarray(); + virtual uint select1(uint i); + virtual uint rank1(uint i); + virtual uint size(); + virtual int save(FILE * fp); + static static_bitsequence_sdarray * load(FILE * fp); + + protected: + selects3 sd; + static_bitsequence_sdarray(); + +}; + +#endif + diff --git a/libcds/src/static_sequence/static_sequence.cpp b/libcds/src/static_sequence/static_sequence.cpp index 05b7753..71622fb 100644 --- a/libcds/src/static_sequence/static_sequence.cpp +++ b/libcds/src/static_sequence/static_sequence.cpp @@ -38,6 +38,7 @@ static_sequence * static_sequence::load(FILE * fp) { case GMR_CHUNK_HDR: return static_sequence_gmr_chunk::load(fp); case GMR_HDR: return static_sequence_gmr::load(fp); case WVTREE_NOPTRS_HDR: return static_sequence_wvtree_noptrs::load(fp); + case BS_HDR: return static_sequence_bs::load(fp); } return NULL; } diff --git a/libcds/src/static_sequence/static_sequence.h b/libcds/src/static_sequence/static_sequence.h index e3b72dc..363437b 100644 --- a/libcds/src/static_sequence/static_sequence.h +++ b/libcds/src/static_sequence/static_sequence.h @@ -30,6 +30,7 @@ #define GMR_CHUNK_HDR 3 #define GMR_HDR 4 #define WVTREE_NOPTRS_HDR 5 +#define BS_HDR 6 using namespace std; @@ -92,5 +93,6 @@ protected: #include #include #include +#include #endif /* _STATIC_SEQUENCE_H */ diff --git a/libcds/src/static_sequence/static_sequence_bs.cpp b/libcds/src/static_sequence/static_sequence_bs.cpp new file mode 100644 index 0000000..fecca78 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_bs.cpp @@ -0,0 +1,105 @@ + +#include + +static_sequence_bs::static_sequence_bs(uint * seq, uint n, alphabet_mapper * am, static_bitsequence_builder * bmb) { + sigma = 0; + len = n; + this->am = am; + am->use(); + for(uint i=0;imap(seq[i])); + bitmaps = new static_bitsequence*[++sigma]; + uint ** bm = new uint*[sigma]; + for(uint i=0;imap(seq[i])],i); + for(uint i=0;ibuild(bm[i],len); + for(uint i=0;iunuse(); +} + +uint static_sequence_bs::rank(uint c, uint i) { + if(am->map(c)>=sigma) return (uint)-1; + return bitmaps[am->map(c)]->rank1(i); +} + +uint static_sequence_bs::select(uint c, uint i) { + if(am->map(c)>=sigma) return (uint)-1; + return bitmaps[am->map(c)]->select1(i); +} + +uint static_sequence_bs::access(uint i) { + for(uint j=0;jaccess(i)) return am->unmap(j); + } + return (uint)-1; +} + +uint static_sequence_bs::size() { + uint size = sizeof(static_sequence_bs)+am->size(); + for(uint i=0;isize(); + return size; +} + +uint static_sequence_bs::save(FILE * fp) { + uint wr = BS_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&sigma,sizeof(uint),1,fp); + if(wr!=3) return 1; + for(uint i=0;isave(fp)) return 2; + if(am->save(fp)) return 3; + return 0; +} + +static_sequence_bs * static_sequence_bs::load(FILE * fp) { + uint rd = 0; + uint type = 0; + rd += fread(&type,sizeof(uint),1,fp); + static_sequence_bs * ret = new static_sequence_bs(); + rd += fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->sigma,sizeof(uint),1,fp); + if(rd!=3 || type != BS_HDR) { + delete ret; + return NULL; + } + ret->bitmaps = new static_bitsequence*[ret->sigma]; + for(uint i=0;isigma;i++) + ret->bitmaps[i] = NULL; + for(uint i=0;isigma;i++) + if((ret->bitmaps[i]=static_bitsequence::load(fp))==NULL) { + delete ret; + return NULL; + } + if((ret->am = alphabet_mapper::load(fp))==NULL) { + delete ret; + return NULL; + } + ret->am->use(); + return ret; +} + diff --git a/libcds/src/static_sequence/static_sequence_bs.h b/libcds/src/static_sequence/static_sequence_bs.h new file mode 100644 index 0000000..7161667 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_bs.h @@ -0,0 +1,64 @@ +/* static_sequence.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BS_H +#define _STATIC_SEQUENCE_BS_H + + +#include +#include +#include + +/** static_sequence represented using one bitmap per symbol, doesn't support efficient access + * + * @author Francisco Claude + */ +class static_sequence_bs : public static_sequence { + +public: + static_sequence_bs(uint * seq, uint n, alphabet_mapper * am, static_bitsequence_builder * bmb); + virtual ~static_sequence_bs(); + + virtual uint rank(uint c, uint i); + + virtual uint select(uint c, uint i); + + virtual uint access(uint i); + + virtual uint size(); + + virtual uint save(FILE * fp); + + /** Reads a bitmap determining the type */ + static static_sequence_bs * load(FILE * fp); + +protected: + uint sigma; + static_bitsequence ** bitmaps; + alphabet_mapper * am; + + static_sequence_bs(); + +}; + + +#endif /* _STATIC_SEQUENCE_BS_H */ + diff --git a/libcds/src/static_sequence/wt_node_internal.cpp b/libcds/src/static_sequence/wt_node_internal.cpp index 6f67501..16dc441 100644 --- a/libcds/src/static_sequence/wt_node_internal.cpp +++ b/libcds/src/static_sequence/wt_node_internal.cpp @@ -183,7 +183,7 @@ uint wt_node_internal::rankLessThan(uint &symbol, uint pos, uint l, wt_coder * c // cout << "recursion to leftchild at l = " << l << ", symbol = " << (uchar)symbol << ", rank0 = " << bitmap->rank0(pos) << ", rank1 = " << bitmap->rank1(pos) << endl; // check left child for symbols <= givenSymbol - if (result != -1 || left_child == NULL) + if (result != (uint)-1 || left_child == NULL) return result; return left_child->rankLessThan(symbol, bitmap->rank0(pos)-1); } @@ -195,11 +195,11 @@ uint wt_node_internal::rankLessThan(uint &symbol, uint pos) using std::endl; // cout << "pos = " << pos << ", symbol = " << (uchar)symbol << endl; - if (pos == -1) - return -1; + if (pos == (uint)-1) + return (uint)-1; if(right_child!=NULL) result = right_child->rankLessThan(symbol, bitmap->rank1(pos)-1); - if(result == -1 && left_child!=NULL) + if(result == (uint)-1 && left_child!=NULL) return left_child->rankLessThan(symbol, bitmap->rank0(pos)-1); return result; } diff --git a/libcds/src/static_sequence/wt_node_leaf.cpp b/libcds/src/static_sequence/wt_node_leaf.cpp index 02bc9cc..f712f85 100644 --- a/libcds/src/static_sequence/wt_node_leaf.cpp +++ b/libcds/src/static_sequence/wt_node_leaf.cpp @@ -46,7 +46,7 @@ uint wt_node_leaf::rankLessThan(uint &symbol, uint pos, uint l, wt_coder * c) { uint wt_node_leaf::rankLessThan(uint &symbol, uint pos) { // std::cout <<"this-symbol: " << (uchar)this->symbol << ", symbol = " << (uchar)symbol << ", pos = " << pos << std::endl; - if (pos == -1) + if (pos == (uint)-1) return -1; symbol = this->symbol; pos++; diff --git a/libcds/tests/Makefile b/libcds/tests/Makefile index f91201f..b1ac75b 100644 --- a/libcds/tests/Makefile +++ b/libcds/tests/Makefile @@ -1,10 +1,10 @@ CPP=g++ #CPPFLAGS=-g3 -Wall -I../includes/ -CPPFLAGS=-O9 -w -DNDEBUG -I../includes/ +CPPFLAGS=-O9 -Wall -DNDEBUG -I../includes/ -OBJECTS=make_bitmap.o static_bitsequence_tester.o static_sequence_tester.o static_sequence_wvtree_test.o static_sequence_gmr_test.o static_sequence_gmr_chunk_test.o static_sequence_wvtree_noptrs_test.o static_bitsequence_test.o text_to_int.o -BIN=make_bitmap static_sequence_wvtree_test static_sequence_gmr_test static_sequence_gmr_chunk_test static_sequence_wvtree_noptrs_test static_bitsequence_test text_to_int +OBJECTS=make_bitmap.o static_bitsequence_tester.o static_sequence_tester.o static_sequence_wvtree_test.o static_sequence_gmr_test.o static_sequence_gmr_chunk_test.o static_sequence_wvtree_noptrs_test.o static_bitsequence_test.o static_sequence_bs_test.o text_to_int.o +BIN=make_bitmap static_sequence_wvtree_test static_sequence_gmr_test static_sequence_gmr_chunk_test static_sequence_wvtree_noptrs_test static_bitsequence_test text_to_int static_sequence_bs_test LIB=../lib/libcds.a @@ -42,6 +42,10 @@ static_sequence_gmr_chunk_test: @echo " [C++] Building static_sequence_gmr_chunk_test" @$(CPP) $(CPPFLAGS) -o static_sequence_gmr_chunk_test static_sequence_gmr_chunk_test.o static_sequence_tester.o $(LIB) +static_sequence_bs_test: + @echo " [C++] Building static_sequence_bs_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_bs_test static_sequence_bs_test.o static_sequence_tester.o $(LIB) + clean: @echo " [CLN] Cleaning object files" @rm -f $(OBJECTS) $(BIN) diff --git a/libcds/tests/static_sequence_bs_test.cpp b/libcds/tests/static_sequence_bs_test.cpp new file mode 100644 index 0000000..ffe2e76 --- /dev/null +++ b/libcds/tests/static_sequence_bs_test.cpp @@ -0,0 +1,76 @@ +/* static_sequence_wvtree_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=5) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[3]; + uint samp; + ss >> samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + alphabet_mapper * am = new alphabet_mapper_none(); + + static_bitsequence_builder * bmb; + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else if(string(argv[2])==string("s")) + bmb = new static_bitsequence_builder_sdarray(); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + static_sequence * sseq = new static_sequence_bs(text,n,am,bmb); + delete bmb; + //am->unuse(); + + sseq = savetest(argv[1], sseq); + if(string(argv[4])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete sseq; + delete [] text; +} + -- 2.17.1