From 52cb7bbcda67f4676335cdd4eb96d4d87ad1445d Mon Sep 17 00:00:00 2001 From: kim Date: Tue, 27 Jan 2009 12:18:59 +0000 Subject: [PATCH] Missing files from the latest version of libcds.googlecode.com git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@70 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ...static_bitsequence_builder_rrr02_light.cpp | 30 ++ .../static_bitsequence_builder_rrr02_light.h | 40 ++ .../static_bitsequence_rrr02_light.cpp | 375 ++++++++++++++++++ .../static_bitsequence_rrr02_light.h | 100 +++++ .../static_sequence/static_sequence_builder.h | 43 ++ .../static_sequence_builder_gmr.cpp | 32 ++ .../static_sequence_builder_gmr.h | 44 ++ .../static_sequence_builder_gmr_chunk.cpp | 31 ++ .../static_sequence_builder_gmr_chunk.h | 44 ++ .../static_sequence_builder_wvtree.cpp | 32 ++ .../static_sequence_builder_wvtree.h | 46 +++ .../static_sequence_builder_wvtree_noptrs.cpp | 31 ++ .../static_sequence_builder_wvtree_noptrs.h | 44 ++ .../static_sequence/static_sequence_gmr.cpp | 173 ++++++++ .../src/static_sequence/static_sequence_gmr.h | 56 +++ .../static_sequence_wvtree_noptrs.cpp | 283 +++++++++++++ .../static_sequence_wvtree_noptrs.h | 85 ++++ libcds/src/utils/alphabet_mapper_cont.cpp | 77 ++++ libcds/src/utils/alphabet_mapper_cont.h | 52 +++ libcds/tests/static_bitsequence_test.cpp | 71 ++++ libcds/tests/static_bitsequence_tester.cpp | 179 +++++++++ libcds/tests/static_bitsequence_tester.h | 43 ++ .../tests/static_sequence_gmr_chunk_test.cpp | 80 ++++ libcds/tests/static_sequence_gmr_test.cpp | 95 +++++ libcds/tests/static_sequence_tester.cpp | 241 +++++++++++ libcds/tests/static_sequence_tester.h | 42 ++ .../static_sequence_wvtree_noptrs_test.cpp | 80 ++++ libcds/tests/static_sequence_wvtree_test.cpp | 81 ++++ libcds/tests/text_to_int.cpp | 74 ++++ 29 files changed, 2604 insertions(+) create mode 100644 libcds/src/static_bitsequence/static_bitsequence_builder_rrr02_light.cpp create mode 100644 libcds/src/static_bitsequence/static_bitsequence_builder_rrr02_light.h create mode 100644 libcds/src/static_bitsequence/static_bitsequence_rrr02_light.cpp create mode 100644 libcds/src/static_bitsequence/static_bitsequence_rrr02_light.h create mode 100644 libcds/src/static_sequence/static_sequence_builder.h create mode 100644 libcds/src/static_sequence/static_sequence_builder_gmr.cpp create mode 100644 libcds/src/static_sequence/static_sequence_builder_gmr.h create mode 100644 libcds/src/static_sequence/static_sequence_builder_gmr_chunk.cpp create mode 100644 libcds/src/static_sequence/static_sequence_builder_gmr_chunk.h create mode 100644 libcds/src/static_sequence/static_sequence_builder_wvtree.cpp create mode 100644 libcds/src/static_sequence/static_sequence_builder_wvtree.h create mode 100644 libcds/src/static_sequence/static_sequence_builder_wvtree_noptrs.cpp create mode 100644 libcds/src/static_sequence/static_sequence_builder_wvtree_noptrs.h create mode 100644 libcds/src/static_sequence/static_sequence_gmr.cpp create mode 100644 libcds/src/static_sequence/static_sequence_gmr.h create mode 100644 libcds/src/static_sequence/static_sequence_wvtree_noptrs.cpp create mode 100644 libcds/src/static_sequence/static_sequence_wvtree_noptrs.h create mode 100644 libcds/src/utils/alphabet_mapper_cont.cpp create mode 100644 libcds/src/utils/alphabet_mapper_cont.h create mode 100644 libcds/tests/static_bitsequence_test.cpp create mode 100644 libcds/tests/static_bitsequence_tester.cpp create mode 100644 libcds/tests/static_bitsequence_tester.h create mode 100644 libcds/tests/static_sequence_gmr_chunk_test.cpp create mode 100644 libcds/tests/static_sequence_gmr_test.cpp create mode 100644 libcds/tests/static_sequence_tester.cpp create mode 100644 libcds/tests/static_sequence_tester.h create mode 100644 libcds/tests/static_sequence_wvtree_noptrs_test.cpp create mode 100644 libcds/tests/static_sequence_wvtree_test.cpp create mode 100644 libcds/tests/text_to_int.cpp diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02_light.cpp b/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02_light.cpp new file mode 100644 index 0000000..6aee2be --- /dev/null +++ b/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02_light.cpp @@ -0,0 +1,30 @@ +/* static_bitsequence_builder_rrr02_light.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_rrr02_light definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_bitsequence_builder_rrr02_light::static_bitsequence_builder_rrr02_light(uint sampling) { + sample_rate=sampling; +} + +static_bitsequence * static_bitsequence_builder_rrr02_light::build(uint * bitsequence, uint len) { + return new static_bitsequence_rrr02_light(bitsequence,len,sample_rate); +} diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02_light.h b/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02_light.h new file mode 100644 index 0000000..3bbf232 --- /dev/null +++ b/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02_light.h @@ -0,0 +1,40 @@ +/* static_bitsequence_builder_rrr02_light.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_rrr02_light definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_BITSEQUENCE_BUILDER_RRR02_LIGHT_H +#define _STATIC_BITSEQUENCE_BUILDER_RRR02_LIGHT_H + +#include +#include +#include + +class static_bitsequence_builder_rrr02_light : public static_bitsequence_builder { + public: + /** Defines the sample rate used to build the bitmaps (rrr02) */ + static_bitsequence_builder_rrr02_light(uint sampling); + virtual ~static_bitsequence_builder_rrr02_light() {} + virtual static_bitsequence * build(uint * bitsequence, uint len); + + protected: + uint sample_rate; +}; + +#endif /* _STATIC_BITSEQUENCE_BUILDER_RRR02_LIGHT_H */ diff --git a/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.cpp b/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.cpp new file mode 100644 index 0000000..746f5cf --- /dev/null +++ b/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.cpp @@ -0,0 +1,375 @@ +/* static_bitsequence_rrr02_light.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_rrr02_light definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +#define VARS_NEEDED uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0);\ +uint C_field_bits = bits(BLOCK_SIZE_LIGHT);\ +uint O_len = uint_len(1,O_bits_len);\ +uint C_sampling_len = C_len/sample_rate+2;\ +uint C_sampling_field_bits = bits(ones);\ +uint O_pos_len = C_len/sample_rate+1;\ +uint O_pos_field_bits = bits(O_bits_len); + + +table_offset * static_bitsequence_rrr02_light::E = NULL; + +static_bitsequence_rrr02_light::static_bitsequence_rrr02_light() { + ones=0; + len=0; + if(E==NULL) E = new table_offset(BLOCK_SIZE_LIGHT); + E->use(); + C = NULL; + O = NULL; + C_sampling = NULL; + O_pos = NULL; + sample_rate = DEFAULT_SAMPLING_LIGHT; + O_bits_len = 0; +} + +static_bitsequence_rrr02_light::static_bitsequence_rrr02_light(uint * bitseq, uint len, uint sample_rate) { + ones = 0; + this->len = len; + if(E==NULL) E = new table_offset(BLOCK_SIZE_LIGHT); + E->use(); + // Table C + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + C = new uint[uint_len(C_len,C_field_bits)]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE_LIGHT,value); + } + // Table O + uint O_len = uint_len(1,O_bits_len); + O = new uint[O_len]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE_LIGHT,popcount(value))-1,E->compute_offset((ushort)value)); + O_pos += E->get_log2binomial(BLOCK_SIZE_LIGHT,popcount(value)); + } + C_sampling = NULL; + this->O_pos = NULL; + + create_sampling(sample_rate); +} + +void static_bitsequence_rrr02_light::create_sampling(uint sample_rate) { + this->sample_rate = sample_rate; +/* for(uint i=0;iget_log2binomial(BLOCK_SIZE_LIGHT,get_field(C,C_field_bits,i)); + }*/ + // Sampling for C + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint C_sampling_len = C_len/sample_rate+2; + uint C_sampling_field_bits = bits(ones); + if(C_sampling!=NULL) delete [] C_sampling; + C_sampling = new uint[max((uint)1,uint_len(C_sampling_len,C_sampling_field_bits))]; + for(uint i=0;iget_log2binomial(BLOCK_SIZE_LIGHT,get_field(C,C_field_bits,i)); + } +} + +bool static_bitsequence_rrr02_light::access(uint i) { + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint O_pos_field_bits = bits(O_bits_len); + uint nearest_sampled_value = i/BLOCK_SIZE_LIGHT/sample_rate; + uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value); + uint pos = i/BLOCK_SIZE_LIGHT; + assert(pos<=C_len); + for(uint k=nearest_sampled_value*sample_rate;kget_log2binomial(BLOCK_SIZE_LIGHT,aux); + } + uint c = get_field(C,C_field_bits,pos); + return ((1<<(i%BLOCK_SIZE_LIGHT))&E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE_LIGHT,c)-1)))!=0; +} + +uint static_bitsequence_rrr02_light::rank0(uint i) { + if(i+1==0) return 0; + return 1+i-rank1(i); +} + +uint static_bitsequence_rrr02_light::rank1(uint i) { + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint C_sampling_field_bits = bits(ones); + uint O_pos_field_bits = bits(O_bits_len); + if(i+1==0) return 0; + uint nearest_sampled_value = i/BLOCK_SIZE_LIGHT/sample_rate; + uint sum = get_field(C_sampling,C_sampling_field_bits,nearest_sampled_value); + uint pos_O = get_field(O_pos,O_pos_field_bits,nearest_sampled_value); + uint pos = i/BLOCK_SIZE_LIGHT; + uint k=nearest_sampled_value*sample_rate; + if(k%2==1 && kget_log2binomial(BLOCK_SIZE_LIGHT,aux); + k++; + } + uchar * a = (uchar *)C; + uint mask = 0x0F; + a += k/2; + while(k<(uint)max(0,(int)pos-1)) { + assert(((*a)&mask)==get_field(C,C_field_bits,k)); + assert((*a)/16==get_field(C,C_field_bits,k+1)); + sum += ((*a)&mask)+(*a)/16; + pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,((*a)&mask))+E->get_log2binomial(BLOCK_SIZE_LIGHT,((*a)/16)); + a++; + k+=2; + } + if(kget_log2binomial(BLOCK_SIZE_LIGHT,aux); + k++; + } + uint c = get_field(C,C_field_bits,pos); + sum += popcount(((2<<(i%BLOCK_SIZE_LIGHT))-1) & E->short_bitmap(c,get_var_field(O,pos_O,pos_O+E->get_log2binomial(BLOCK_SIZE_LIGHT,c)-1))); + return sum; +} + +uint static_bitsequence_rrr02_light::select0(uint i) { + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint C_sampling_len = C_len/sample_rate+2; + uint C_sampling_field_bits = bits(ones); + uint O_pos_field_bits = bits(O_bits_len); + if(i==0) return -1; + if(i>len-ones) return len; + // Search over partial sums + uint start=0; + uint end=C_sampling_len-1; + uint med, acc=0, pos; + while(start=i) break; + pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,s); + acc += BLOCK_SIZE_LIGHT-s; + } + pos = (pos)*BLOCK_SIZE_LIGHT; + // Search inside the block + + while(accget_log2binomial(BLOCK_SIZE_LIGHT,s); + uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1)); + pos_O = new_posO; + new_posO = 0; + while(accones) return len; + // Search over partial sums + uint start=0; + uint end=C_sampling_len-1; + uint med, acc=0, pos; + while(start=i) break; + pos_O += E->get_log2binomial(BLOCK_SIZE_LIGHT,s); + acc += s; + } + pos = (pos)*BLOCK_SIZE_LIGHT; + //cout << "pos=" << pos << endl; + // Search inside the block + while(accget_log2binomial(BLOCK_SIZE_LIGHT,s); + uint block = E->short_bitmap(s,get_var_field(O,pos_O,new_posO-1)); + pos_O = new_posO; + new_posO = 0; + while(accsize() << endl;*/ + uint sum = sizeof(uint)*8;//sizeof(static_bitsequence_rrr02_light); + sum += uint_len(C_len,C_field_bits)*sizeof(uint); + sum += O_len*sizeof(uint); + sum += uint_len(C_sampling_len,C_sampling_field_bits)*sizeof(uint); + sum += uint_len(O_pos_len,O_pos_field_bits)*sizeof(uint); + //sum += E->size(); + return sum; +} + +static_bitsequence_rrr02_light::~static_bitsequence_rrr02_light() { + if(C!=NULL) delete [] C; + if(O!=NULL) delete [] O; + if(C_sampling!=NULL) delete [] C_sampling; + if(O_pos!=NULL) delete [] O_pos; + E = E->unuse(); +} + +int static_bitsequence_rrr02_light::save(FILE * fp) { + uint C_len = len/BLOCK_SIZE_LIGHT + (len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint O_len = uint_len(1,O_bits_len); + uint wr = RRR02_LIGHT_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&ones,sizeof(uint),1,fp); + wr += fwrite(&O_bits_len,sizeof(uint),1,fp); + wr += fwrite(&sample_rate,sizeof(uint),1,fp); + if(wr!=5) return -1; + wr = fwrite(C,sizeof(uint),uint_len(C_len,C_field_bits),fp); + if(wr!=uint_len(C_len,C_field_bits)) return -1; + wr = fwrite(O,sizeof(uint),O_len,fp); + if(wr!=O_len) return -1; + return 0; +} + +static_bitsequence_rrr02_light * static_bitsequence_rrr02_light::load(FILE * fp) { + static_bitsequence_rrr02_light * ret = new static_bitsequence_rrr02_light(); + uint rd = 0, type; + rd += fread(&type,sizeof(uint),1,fp); + rd += fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->ones,sizeof(uint),1,fp); + rd += fread(&ret->O_bits_len,sizeof(uint),1,fp); + rd += fread(&ret->sample_rate,sizeof(uint),1,fp); + uint C_len = ret->len/BLOCK_SIZE_LIGHT + (ret->len%BLOCK_SIZE_LIGHT!=0); + uint C_field_bits = bits(BLOCK_SIZE_LIGHT); + uint O_len = uint_len(1,ret->O_bits_len); + if(rd!=5 || type!=RRR02_LIGHT_HDR) { + delete ret; + return NULL; + } + ret->C = new uint[uint_len(C_len,C_field_bits)]; + rd = fread(ret->C,sizeof(uint),uint_len(C_len,C_field_bits),fp); + if(rd!=uint_len(C_len,C_field_bits)) { + ret->C=NULL; + delete ret; + return NULL; + } + ret->O = new uint[O_len]; + rd = fread(ret->O,sizeof(uint),O_len,fp); + if(rd!=O_len) { + ret->O=NULL; + delete ret; + return NULL; + } + ret->create_sampling(ret->sample_rate); + return ret; +} diff --git a/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.h b/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.h new file mode 100644 index 0000000..15da0a9 --- /dev/null +++ b/libcds/src/static_bitsequence/static_bitsequence_rrr02_light.h @@ -0,0 +1,100 @@ +/* static_bitsequence_rrr02_light.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * RRR02 Bitsequence - light version + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + + +#ifndef _STATIC_BITSEQUENCE_RRR02_LIGHT_H +#define _STATIC_BITSEQUENCE_RRR02_LIGHT_H + +#define BLOCK_SIZE_LIGHT 15 +#define DEFAULT_SAMPLING_LIGHT 32 + +#include +#include +#include +#include + +using namespace std; + +/** Implementation of Raman, Raman and Rao's [1] proposal for rank/select capable + * data structures, it achieves space nH_0, O(sample_rate) time for rank and O(log len) + * for select. The practial implementation is based on [2] + * + * [1] R. Raman, V. Raman and S. Rao. Succinct indexable dictionaries with applications + * to encoding $k$-ary trees and multisets. SODA02. + * [2] F. Claude and G. Navarro. Practical Rank/Select over Arbitrary Sequences. SPIRE08. + * + * @author Francisco Claude + */ +class static_bitsequence_rrr02_light: public static_bitsequence { +public: + static_bitsequence_rrr02_light(uint * bitseq, uint len, uint sample_rate=DEFAULT_SAMPLING_LIGHT); + virtual ~static_bitsequence_rrr02_light(); + + /** Returns the number of zeros until position i */ + virtual uint rank0(uint i); + + /** Returns the number of ones until position i */ + virtual uint rank1(uint i); + + /** Returns the position of the i-th zero + * @return (uint)-1 if i=0, len if i>num_zeros or the position */ + virtual uint select0(uint i); + + /** Returns the position of the i-th one + * @return (uint)-1 if i=0, len if i>num_ones or the position */ + virtual uint select1(uint i); + + /** Returns the i-th bit */ + virtual bool access(uint i); + + /** Returns the size of the structure in bytes */ + virtual uint size(); + + /** Stores the bitmap given a file pointer, return 0 in case of success */ + virtual int save(FILE * fp); + + /** Reads the bitmap from a file pointer, returns NULL in case of error */ + static static_bitsequence_rrr02_light * load(FILE * fp); + + /** Creates a new sampling for the queries */ + void create_sampling(uint sampling_rate); + + /** Frees the space required by the table E, which is static and global + * to all instances. + */ + static void delete_E() { + delete E; + } + +protected: + static_bitsequence_rrr02_light(); + /** Classes and offsets */ + uint *C, *O; + uint O_bits_len; + /** C and O samplings */ + uint *C_sampling, *O_pos; + /** Sample rate */ + uint sample_rate; + + static table_offset * E; +}; + +#endif /* _STATIC_BITSEQUENCE_RRR02_H */ diff --git a/libcds/src/static_sequence/static_sequence_builder.h b/libcds/src/static_sequence/static_sequence_builder.h new file mode 100644 index 0000000..6d0b340 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder.h @@ -0,0 +1,43 @@ +/* static_sequence_builder.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_H +#define _STATIC_SEQUENCE_BUILDER_H + +#include +#include + +/** Base class for static sequence builders + * @author Francisco Claude + */ +class static_sequence_builder { + public: + virtual ~static_sequence_builder() {} + /** Returns a new sequence build for seq */ + virtual static_sequence * build(uint * seq, uint len)=0; +}; + +#include +#include +#include +#include + +#endif diff --git a/libcds/src/static_sequence/static_sequence_builder_gmr.cpp b/libcds/src/static_sequence/static_sequence_builder_gmr.cpp new file mode 100644 index 0000000..b6781e1 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder_gmr.cpp @@ -0,0 +1,32 @@ +/* static_sequence_builder_gmr.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder gmr + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_builder_gmr::static_sequence_builder_gmr(uint chunk_length, static_bitsequence_builder *bmb, static_sequence_builder *ssb) { + this->chunk_length = chunk_length; + this->bmb = bmb; + this->ssb = ssb; +} + +static_sequence * static_sequence_builder_gmr::build(uint * seq, uint len) { + return new static_sequence_gmr(seq,len,chunk_length,bmb,ssb); +} diff --git a/libcds/src/static_sequence/static_sequence_builder_gmr.h b/libcds/src/static_sequence/static_sequence_builder_gmr.h new file mode 100644 index 0000000..1588818 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder_gmr.h @@ -0,0 +1,44 @@ +/* static_sequence_builder_gmr.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder gmr + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_GMR_H +#define _STATIC_SEQUENCE_BUILDER_GMR_H + +#include +#include +#include + +/** gmr builder + * @author Francisco Claude + */ +class static_sequence_builder_gmr { + public: + static_sequence_builder_gmr(uint chunk_length, static_bitsequence_builder *bmb, static_sequence_builder *ssb); + virtual ~static_sequence_builder_gmr() {} + virtual static_sequence * build(uint * seq, uint len); + + protected: + static_bitsequence_builder *bmb; + static_sequence_builder *ssb; + uint chunk_length; +}; + +#endif diff --git a/libcds/src/static_sequence/static_sequence_builder_gmr_chunk.cpp b/libcds/src/static_sequence/static_sequence_builder_gmr_chunk.cpp new file mode 100644 index 0000000..3d3b758 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder_gmr_chunk.cpp @@ -0,0 +1,31 @@ +/* static_sequence_builder_gmr_chunk.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder gmr chunk + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_builder_gmr_chunk::static_sequence_builder_gmr_chunk(static_bitsequence_builder *bmb, static_permutation_builder *pmb) { + this->bmb = bmb; + this->pmb = pmb; +} + +static_sequence * static_sequence_builder_gmr_chunk::build(uint * seq, uint len) { + return new static_sequence_gmr_chunk(seq,len,bmb,pmb); +} diff --git a/libcds/src/static_sequence/static_sequence_builder_gmr_chunk.h b/libcds/src/static_sequence/static_sequence_builder_gmr_chunk.h new file mode 100644 index 0000000..6eccdc8 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder_gmr_chunk.h @@ -0,0 +1,44 @@ +/* static_sequence_builder_gmr_chunk.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder gmr chunk + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_GMR_CHUNK_H +#define _STATIC_SEQUENCE_BUILDER_GMR_CHUNK_H + +#include +#include +#include +#include + +/** gmr chunk builder + * @author Francisco Claude + */ +class static_sequence_builder_gmr_chunk : public static_sequence_builder { + public: + static_sequence_builder_gmr_chunk(static_bitsequence_builder *bmb, static_permutation_builder *pmb); + virtual ~static_sequence_builder_gmr_chunk() {} + virtual static_sequence * build(uint * seq, uint len); + + protected: + static_bitsequence_builder *bmb; + static_permutation_builder *pmb; +}; + +#endif diff --git a/libcds/src/static_sequence/static_sequence_builder_wvtree.cpp b/libcds/src/static_sequence/static_sequence_builder_wvtree.cpp new file mode 100644 index 0000000..d539cdf --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder_wvtree.cpp @@ -0,0 +1,32 @@ +/* static_sequence_builder_wvtree.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder wavelet tree + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_builder_wvtree::static_sequence_builder_wvtree(wt_coder * wc, static_bitsequence_builder *bmb, alphabet_mapper * am) { + this->bmb = bmb; + this->wc = wc; + this->am = am; +} + +static_sequence * static_sequence_builder_wvtree::build(uint * seq, uint len) { + return new static_sequence_wvtree(seq,len,wc,bmb,am); +} diff --git a/libcds/src/static_sequence/static_sequence_builder_wvtree.h b/libcds/src/static_sequence/static_sequence_builder_wvtree.h new file mode 100644 index 0000000..9c76be2 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder_wvtree.h @@ -0,0 +1,46 @@ +/* static_sequence_builder_wvtree.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder wavelet tree + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_WVTREE_H +#define _STATIC_SEQUENCE_BUILDER_WVTREE_H + +#include +#include +#include +#include +#include + +/** Builder for wavelet trees + * @author Francisco Claude + */ +class static_sequence_builder_wvtree : public static_sequence_builder { + public: + static_sequence_builder_wvtree(wt_coder * wc, static_bitsequence_builder *bmb, alphabet_mapper * am); + virtual ~static_sequence_builder_wvtree() {} + virtual static_sequence * build(uint * seq, uint len); + + protected: + alphabet_mapper * am; + wt_coder * wc; + static_bitsequence_builder *bmb; +}; + +#endif diff --git a/libcds/src/static_sequence/static_sequence_builder_wvtree_noptrs.cpp b/libcds/src/static_sequence/static_sequence_builder_wvtree_noptrs.cpp new file mode 100644 index 0000000..5f34970 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder_wvtree_noptrs.cpp @@ -0,0 +1,31 @@ +/* static_sequence_builder_wvtree_noptrs.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder wavelet tree without pointers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_builder_wvtree_noptrs::static_sequence_builder_wvtree_noptrs(static_bitsequence_builder *bmb, alphabet_mapper * am) { + this->bmb = bmb; + this->am = am; +} + +static_sequence * static_sequence_builder_wvtree_noptrs::build(uint * seq, uint len) { + return new static_sequence_wvtree_noptrs(seq,len,bmb,am); +} diff --git a/libcds/src/static_sequence/static_sequence_builder_wvtree_noptrs.h b/libcds/src/static_sequence/static_sequence_builder_wvtree_noptrs.h new file mode 100644 index 0000000..16bd22b --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_builder_wvtree_noptrs.h @@ -0,0 +1,44 @@ +/* static_sequence_builder_wvtree_noptrs.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * Sequence builder wavelet tree without pointers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_BUILDER_WVTREE_NOPTRS_H +#define _STATIC_SEQUENCE_BUILDER_WVTREE_NOPTRS_H + +#include +#include +#include +#include + +/** Builder for wavelet trees without pointers + * @author Francisco Claude + */ +class static_sequence_builder_wvtree_noptrs : public static_sequence_builder { + public: + static_sequence_builder_wvtree_noptrs(static_bitsequence_builder *bmb, alphabet_mapper * am); + virtual ~static_sequence_builder_wvtree_noptrs() {} + virtual static_sequence * build(uint * seq, uint len); + + protected: + alphabet_mapper * am; + static_bitsequence_builder *bmb; +}; + +#endif diff --git a/libcds/src/static_sequence/static_sequence_gmr.cpp b/libcds/src/static_sequence/static_sequence_gmr.cpp new file mode 100644 index 0000000..ead7748 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_gmr.cpp @@ -0,0 +1,173 @@ +/* static_sequence_gmr.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * GMR + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_gmr::static_sequence_gmr(uint * sequence, uint n, uint chunk_length, static_bitsequence_builder * bmb, static_sequence_builder * ssb) { + len = n; + if(len%chunk_length) len+=chunk_length-len%chunk_length; + uint * new_seq = new uint[len]; + sigma = 0; + for(uint i=0;ichunk_length = chunk_length; + build(new_seq,bmb,ssb); + delete [] new_seq; +} + +static_sequence_gmr::static_sequence_gmr() { +} + +static_sequence_gmr::~static_sequence_gmr() { + delete B; + for (uint i=0;ibuild(sequence+i*chunk_length, chunk_length); + //cout << "1." << i << endl; cout.flush(); + assert(chunk[i]!=NULL); + } + uint * ones = get_ones(sequence); + uint *B_bitmap = new uint[(2+len+(unsigned long long)num_chunks*sigma)/W+1]; + assert(B_bitmap!=NULL); + for (uint i=0;i<(2+len+(unsigned long long)num_chunks*sigma)/W+1;i++) + B_bitmap[i] = 0; + uint pos=0; + for (unsigned long long i=0;i<(unsigned long long)num_chunks*sigma;i++) { + for (uint j=0;jselect0(bp); + uint prev = rank_pos-bp+1; + uint sum = B->rank1(B->select0(bp+i)) - prev; + uint cr = chunk[i]->rank(c,j-i*chunk_length); + return sum + cr; +} + + +uint static_sequence_gmr::select(uint c, uint j) { +// c++; + uint rank_pos = B->select0(c*(len/chunk_length)); + uint prev = B->rank1(rank_pos); + uint sel = prev+j; + uint block = (B->select1(sel)); + uint i = block-sel+1; + uint desp = B->rank1(B->select0((i)))-prev; + if (desp+1==0) desp=0; + uint rchunk = i%(len/chunk_length); + return (rchunk*chunk_length)+chunk[rchunk]->select(c, j-desp); +} + + +uint static_sequence_gmr::access(uint j) { + return chunk[j/chunk_length]->access(j%chunk_length); +} + + +uint static_sequence_gmr::size() { + uint s = 0; + for (uint i=0;isize(); + return s+B->size()+sizeof(static_sequence_gmr); +} + +uint static_sequence_gmr::save(FILE *fp) { + uint wr = GMR_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&sigma,sizeof(uint),1,fp); + wr += fwrite(&chunk_length,sizeof(uint),1,fp); + if(wr!=4) return 1; + if(B->save(fp)) return 1; + for(uint i=0;isave(fp)) return 1; + return 0; +} + +static_sequence_gmr * static_sequence_gmr::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=GMR_HDR) return NULL; + static_sequence_gmr * ret = new static_sequence_gmr(); + rd = fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->sigma,sizeof(uint),1,fp); + rd += fread(&ret->chunk_length,sizeof(uint),1,fp); + if(rd!=3) { + delete ret; + return NULL; + } + ret->B = static_bitsequence::load(fp); + if(ret->B==NULL) { + delete ret; + return NULL; + } + ret->chunk = new static_sequence*[ret->len/ret->chunk_length]; + for(uint i=0;ilen/ret->chunk_length;i++) { + ret->chunk[i] = static_sequence::load(fp); + if(ret->chunk[i]==NULL) { + delete ret; + return NULL; + } + } + return ret; +} diff --git a/libcds/src/static_sequence/static_sequence_gmr.h b/libcds/src/static_sequence/static_sequence_gmr.h new file mode 100644 index 0000000..76e0d65 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_gmr.h @@ -0,0 +1,56 @@ +/* static_sequence_gmr.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * GMR + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _STATIC_SEQUENCE_GMR_H +#define _STATIC_SEQUENCE_GMR_H + +#include +#include +#include +#include +#include +#include +#include + +using namespace std; + +class static_sequence_gmr : public static_sequence { + public: + static_sequence_gmr(uint * sequence, uint n, uint chunk_length, static_bitsequence_builder * bmb, static_sequence_builder * ssb); + ~static_sequence_gmr(); + virtual uint rank(uint c, uint j); + virtual uint select(uint c, uint j); + virtual uint access(uint j); + virtual uint size(); + virtual uint save(FILE *fp); + static static_sequence_gmr * load(FILE *fp); + + protected: + static_sequence_gmr(); + void build(uint * sequence, static_bitsequence_builder * bmb, static_sequence_builder * ssb); + uint * get_ones(uint * sequence); + + uint sigma, chunk_length; + static_sequence ** chunk; + static_bitsequence * B; +}; + +#endif diff --git a/libcds/src/static_sequence/static_sequence_wvtree_noptrs.cpp b/libcds/src/static_sequence/static_sequence_wvtree_noptrs.cpp new file mode 100644 index 0000000..72eeac8 --- /dev/null +++ b/libcds/src/static_sequence/static_sequence_wvtree_noptrs.cpp @@ -0,0 +1,283 @@ +/* static_sequence_wvtree_noptrs.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree_noptrs definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs(uint * symbols, uint n, static_bitsequence_builder * bmb, alphabet_mapper * am) { + this->n=n; + this->am=am; + am->use(); + for(uint i=0;imap(symbols[i]); + max_v=max_value(symbols,n); + height=bits(max_v); + uint *occurrences=new uint[max_v+1]; + for(uint i=0;i<=max_v;i++) occurrences[i]=0; + for(uint i=0;ibuild(oc,new_n+1); + delete [] occurrences; + this->n = new_n; + uint ** _bm=new uint*[height]; + for(uint i=0;ibuild(_bm[i],new_n); + delete [] _bm[i]; + } + delete [] _bm; + for(uint i=0;iunmap(symbols[i]); + delete [] new_symb; + delete [] oc; +} + +static_sequence_wvtree_noptrs::static_sequence_wvtree_noptrs() { +} + +static_sequence_wvtree_noptrs::~static_sequence_wvtree_noptrs() { + for(uint i=0;iunuse(); +} + +uint static_sequence_wvtree_noptrs::save(FILE *fp) { + uint wr = WVTREE_NOPTRS_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&n,sizeof(uint),1,fp); + wr += fwrite(&max_v,sizeof(uint),1,fp); + wr += fwrite(&height,sizeof(uint),1,fp); + if(wr!=4) return 1; + if(am->save(fp)) return 1; + for(uint i=0;isave(fp)) return 1; + if(occ->save(fp)) return 1; + return 0; +} + +static_sequence_wvtree_noptrs * static_sequence_wvtree_noptrs::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=WVTREE_NOPTRS_HDR) return NULL; + static_sequence_wvtree_noptrs * ret = new static_sequence_wvtree_noptrs(); + rd = fread(&ret->n,sizeof(uint),1,fp); + rd += fread(&ret->max_v,sizeof(uint),1,fp); + rd += fread(&ret->height,sizeof(uint),1,fp); + if(rd!=3) { + delete ret; + return NULL; + } + ret->am = alphabet_mapper::load(fp); + if(ret->am==NULL) { + delete ret; + return NULL; + } + ret->am->use(); + ret->bitstring = new static_bitsequence*[ret->height]; + for(uint i=0;iheight;i++) { + ret->bitstring[i] = static_bitsequence::load(fp); + if(ret->bitstring[i]==NULL){ + delete ret; + return NULL; + } + } + ret->occ = static_bitsequence::load(fp); + if(ret->occ==NULL) { + delete ret; + return NULL; + } + return ret; +} + +uint static_sequence_wvtree_noptrs::access(uint pos) { + uint level=0; + uint ret=0; + uint start=0; + uint end=n-1; + while(level=start && pos<=end); + if(bitstring[level]->access(pos)) { + ret=set(ret,level); + pos=bitstring[level]->rank1(pos-1)-bitstring[level]->rank1(start-1); + start=(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1)); + start=end-start+1; + pos+=start; + } + else { + pos=pos-start-(bitstring[level]->rank1(pos)-bitstring[level]->rank1(start-1)); + end=end-start-(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1)); + end+=start; + pos+=start; + } + level++; + } + return am->unmap(ret); +} + +uint static_sequence_wvtree_noptrs::rank(uint symbol, uint pos) { + symbol = am->map(symbol); + uint level=0; + uint start=0; + uint end=n-1; + uint count=0; + while(levelrank1(pos)-bitstring[level]->rank1(start-1)-1; + count=pos+1; + start=(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1)); + start=end-start+1; + pos+=start; + } + else { + pos=pos-start+bitstring[level]->rank1(start-1)-bitstring[level]->rank1(pos); + count=pos+1; + end=end-start-(bitstring[level]->rank1(end)-bitstring[level]->rank1(start-1)); + end+=start; + pos+=start; + } + level++; + if(count==0) return 0; + } + return count; +} + +inline uint get_start(uint symbol, uint mask) { + return symbol&mask; +} + +inline uint get_end(uint symbol, uint mask) { + return get_start(symbol,mask)+!mask+1; +} + +uint static_sequence_wvtree_noptrs::select(uint symbol, uint j) { + symbol = am->map(symbol); + uint mask = (1<select1(start)+1); + end = occ->select1(end+1)-1; + if(is_set(symbol,level)) { + uint ones_start = bitstring[level]->rank1(start-1); + pos = bitstring[level]->select1(ones_start+pos)-start+1; + } + else { + uint ones_start = bitstring[level]->rank1(start-1); + pos = bitstring[level]->select0(start-ones_start+pos)-start+1; + } + mask <<=1; + sum <<=1; + if(level==0) break; + level--; + } + return pos-1; +} + +uint static_sequence_wvtree_noptrs::size() { + uint ptrs = sizeof(static_sequence_wvtree_noptrs)+height*sizeof(static_sequence*); + uint bytesBitstrings = 0; + for(uint i=0;isize(); + return bytesBitstrings+occ->size()+ptrs; +} + +void static_sequence_wvtree_noptrs::build_level(uint **bm, uint *symbols, uint level, uint length, uint offset) { + if(level==height) return; + uint cleft=0; + for(uint i=0;i>= 1; + } + return ret; +} + +bool static_sequence_wvtree_noptrs::is_set(uint val, uint ind) { + assert(ind +#include +#include +#include +#include +#include +#include + +using namespace std; + +class static_sequence_wvtree_noptrs : public static_sequence { + public: + + /** Builds a Wavelet Tree for the string + * pointed by symbols assuming its length + * equals n and uses bmb to build the bitsequence */ + static_sequence_wvtree_noptrs(uint * symbols, uint n, static_bitsequence_builder * bmb, alphabet_mapper * am); + + /** Destroys the Wavelet Tree */ + virtual ~static_sequence_wvtree_noptrs(); + + virtual uint rank(uint symbol, uint pos); + virtual uint select(uint symbol, uint i); + virtual uint access(uint pos); + virtual uint size(); + + virtual uint save(FILE *fp); + static static_sequence_wvtree_noptrs * load(FILE *fp); + + protected: + + static_sequence_wvtree_noptrs(); + + alphabet_mapper * am; + /** Only one bit-string for the Wavelet Tree. */ + static_bitsequence **bitstring, *occ; + + /** Length of the string. */ + uint n; + + /** Height of the Wavelet Tree. */ + uint height,max_v; + + /** Obtains the maximum value from the string + * symbols of length n */ + uint max_value(uint * symbols, uint n); + + /** How many bits are needed to represent val */ + uint bits(uint val); + + /** Returns true if val has its ind-th bit set + * to one. */ + bool is_set(uint val, uint ind); + + /** Sets the ind-th bit in val */ + uint set(uint val, uint ind); + + /** Recursive function for building the Wavelet Tree. */ + void build_level(uint **bm, uint *symbols, uint level, uint length, uint offset); +}; +#endif diff --git a/libcds/src/utils/alphabet_mapper_cont.cpp b/libcds/src/utils/alphabet_mapper_cont.cpp new file mode 100644 index 0000000..f6a9cb5 --- /dev/null +++ b/libcds/src/utils/alphabet_mapper_cont.cpp @@ -0,0 +1,77 @@ +/* alphabet_mapper_cont.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * alphabet_mapper_cont definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include + +alphabet_mapper_cont::alphabet_mapper_cont(uint * seq, uint n, static_bitsequence_builder *bmb) { + uint max_v = 0; + for(uint i=0;ibuild(bmap,max_v); + delete [] bmap; +} + +alphabet_mapper_cont::alphabet_mapper_cont() { +} + +alphabet_mapper_cont::~alphabet_mapper_cont() { + delete m; +} + +uint alphabet_mapper_cont::map(uint s) { + return m->rank1(s); +} + +uint alphabet_mapper_cont::unmap(uint s) { + return m->select1(s); +} + +uint alphabet_mapper_cont::size() { + return sizeof(alphabet_mapper_cont)+m->size(); +} + +uint alphabet_mapper_cont::save(FILE *fp) { + uint wr = ALPHABET_MAPPER_CONT_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + if(wr!=1) return 1; + if(m->save(fp)) return 1; + return 0; +} + +alphabet_mapper_cont * alphabet_mapper_cont::load(FILE * fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=ALPHABET_MAPPER_CONT_HDR) return NULL; + alphabet_mapper_cont * ret = new alphabet_mapper_cont(); + ret->m = static_bitsequence::load(fp); + if(ret->m==NULL) { + delete ret; + return NULL; + } + return ret; +} diff --git a/libcds/src/utils/alphabet_mapper_cont.h b/libcds/src/utils/alphabet_mapper_cont.h new file mode 100644 index 0000000..0b323d5 --- /dev/null +++ b/libcds/src/utils/alphabet_mapper_cont.h @@ -0,0 +1,52 @@ +/* alphabet_mapper_cont.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * alphabet_mapper_cont definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#ifndef _ALPHABET_MAPPER_CONT_H +#define _ALPHABET_MAPPER_CONT_H + +#include +#include +#include +#include +#include + +using namespace std; + +/** Mapper that doesn't change the value (identity) + * + * @author Francisco Claude + */ +class alphabet_mapper_cont : public alphabet_mapper { + public: + alphabet_mapper_cont(uint * seq, uint n, static_bitsequence_builder *bmb); + virtual ~alphabet_mapper_cont(); + virtual uint map(uint s); + virtual uint unmap(uint s); + virtual uint size(); + virtual uint save(FILE *fp); + static alphabet_mapper_cont * load(FILE *fp); + + protected: + alphabet_mapper_cont(); + static_bitsequence * m; +}; + +#endif /* _ALPHABET_MAPPER_CONT_H */ diff --git a/libcds/tests/static_bitsequence_test.cpp b/libcds/tests/static_bitsequence_test.cpp new file mode 100644 index 0000000..9fc08f0 --- /dev/null +++ b/libcds/tests/static_bitsequence_test.cpp @@ -0,0 +1,71 @@ +/* static_bitsequence_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include "static_bitsequence_tester.h" + +using namespace std; + +int main(int argc, char ** argv) { + if(argc!=5) { + cout << "usage: " << argv[0] << " " << endl; + return 0; + } + FILE * fp = fopen(argv[1],"r"); + if(fp==NULL) { + cout << "Error opening " << argv[1] << endl; + return -1; + } + uint *bitseq, len;//, ones; + uint l=fread(&len, sizeof(uint), 1, fp); + //l += fread(&ones,sizeof(uint),1,fp); + bitseq = new uint[uint_len(len,1)]; + l+=fread(bitseq, sizeof(uint), uint_len(len,1), fp); + fclose(fp); + + uint sample_rate; + stringstream ss(argv[3]); + ss >> sample_rate; + + static_bitsequence * bs; + + if(string(argv[2])==string("r")) bs = new static_bitsequence_rrr02(bitseq,len,sample_rate); + else bs = new static_bitsequence_brw32(bitseq,len,sample_rate); + + cout << "Size: " << bs->size() << endl; + cout << "bpb = " << bs->size()*8./len << endl; + + if(string(argv[4])==string("t")) + test_bitsequence(bitseq,len,bs); + cout << "******************************************" << endl; + speed_access(bs, bitseq, len); + cout << "******************************************" << endl; + speed_rank0(bs, bitseq, len); + cout << "******************************************" << endl; + speed_rank1(bs, bitseq, len); + cout << "******************************************" << endl; + speed_select0(bs, bitseq, len); + cout << "******************************************" << endl; + speed_select1(bs, bitseq, len); +} diff --git a/libcds/tests/static_bitsequence_tester.cpp b/libcds/tests/static_bitsequence_tester.cpp new file mode 100644 index 0000000..8168673 --- /dev/null +++ b/libcds/tests/static_bitsequence_tester.cpp @@ -0,0 +1,179 @@ +/* static_bitsequence_tester.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_tester + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include + +using namespace std; + +/* Time meassuring */ +double ticks= (double)sysconf(_SC_CLK_TCK); +struct tms t1,t2; + +void start_clock() { + times (&t1); +} + + +double stop_clock() { + times (&t2); + return (t2.tms_utime-t1.tms_utime)/ticks; +} +/* end Time meassuring */ + +uint NQUERIES=10000000; +uint SEED=47; + +void load(char *fname, uint ** text, uint * n) { + FILE * fp = fopen(fname,"r"); + if(fp==NULL) { + cout << "could not open " << fname << endl; + return; + } + if(fread(n,sizeof(uint),1,fp)!=1) { + cout << "Error reading file " << fname << endl; + return; + } + *text = new uint[uint_len(*n,1)]; + + if(fread(*text,sizeof(uint),uint_len(*n,1),fp)!=uint_len(*n,1)) { + cout << "Error reading file " << fname << endl; + return; + } +} + +void test_bitsequence(uint * bitseq, uint len, static_bitsequence * bs) { + uint ones = 0; + bool error = false; + for(uint i=0;ilength()/10))==0) { cout << endl; cout.flush(); } + } + if(bitget(bitseq,i)) ones++; + if(bs->access(i) != (bitget(bitseq,i)!=0)) { + cout << "Access error for position " << i << endl; + cout << " got: " << bs->access(i) << " expected: " << (bitget(bitseq,i)!=0) << endl; + error = true; + } + if(bs->rank1(i) != ones) { + cout << "Rank1 error for position " << i << endl; + cout << " got: " << bs->rank1(i) << " expected: " << ones << endl; + error = true; + } + if(bitget(bitseq,i) && bs->select1(ones) != i) { + cout << "Select1 error for position " << i << " ones:" << ones << endl; + cout << " got: " << bs->select1(ones) << " expected: " << i << endl; + error = true; + } + if(bs->rank0(i) != i+1-ones) { + cout << "Rank0 error for position " << i << endl; + cout << " got: " << bs->rank0(i) << " expected: " << ones << endl; + error = true; + } + if(!bitget(bitseq,i) && bs->select0(i+1-ones) != i) { + cout << "Select0 error for position " << i << endl; + cout << " got: " << bs->select0(i+1-ones) << " expected: " << i << endl; + error = true; + } + } + cout << "." << endl; +} + +void speed_access(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;iaccess(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " accesses: " << t << " secs" << endl; + cout << " * Time per access: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_rank0(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;irank0(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " rank0s: " << t << " secs" << endl; + cout << " * Time per rank0: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_rank1(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;irank1(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " rank1s: " << t << " secs" << endl; + cout << " * Time per rank1: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_select0(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + uint ones=ss->rank0(n-1); + srand(SEED); + + start_clock(); + for(uint i=0;iselect0(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " select0s: " << t << " secs" << endl; + cout << " * Time per select0: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} + +void speed_select1(static_bitsequence * ss, uint * bitseq, uint n) { + uint acc=0; + uint ones=ss->rank1(n-1); + srand(SEED); + + start_clock(); + for(uint i=0;iselect1(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " select1s: " << t << " secs" << endl; + cout << " * Time per select1: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} diff --git a/libcds/tests/static_bitsequence_tester.h b/libcds/tests/static_bitsequence_tester.h new file mode 100644 index 0000000..d7120b6 --- /dev/null +++ b/libcds/tests/static_bitsequence_tester.h @@ -0,0 +1,43 @@ +/* static_bitsequence_tester.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_tester + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef STATIC_BITSEQUENCE_TESTER_H +#define STATIC_BITSEQUENCE_TESTER_H + +void load(char *fname, uint ** text, uint * n); +void test_bitsequence(uint * bitseq, uint len, static_bitsequence * bs); +void speed_access(static_bitsequence * ss, uint * bitseq, uint n); +void speed_rank0(static_bitsequence * ss, uint * bitseq, uint n); +void speed_rank1(static_bitsequence * ss, uint * bitseq, uint n); +void speed_select0(static_bitsequence * ss, uint * bitseq, uint n); +void speed_select1(static_bitsequence * ss, uint * bitseq, uint n); + +#endif diff --git a/libcds/tests/static_sequence_gmr_chunk_test.cpp b/libcds/tests/static_sequence_gmr_chunk_test.cpp new file mode 100644 index 0000000..1cb656c --- /dev/null +++ b/libcds/tests/static_sequence_gmr_chunk_test.cpp @@ -0,0 +1,80 @@ +/* static_sequence_gmr_chunk_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_gmr_chunk_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=6) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[3]; + uint samp; + ss >> samp; + stringstream ss2; + ss2 << argv[4]; + uint perm_samp; + ss2 >> perm_samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + static_bitsequence_builder * bmb; + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + static_permutation_builder * spb = new static_permutation_builder_mrrr(perm_samp,bmb); + static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, spb); + static_sequence * sseq = ssb->build(text,n); + + delete bmb; + delete ssb; + delete spb; + + sseq = savetest(argv[1], sseq); + if(string(argv[5])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete sseq; + delete [] text; +} + diff --git a/libcds/tests/static_sequence_gmr_test.cpp b/libcds/tests/static_sequence_gmr_test.cpp new file mode 100644 index 0000000..26cb00b --- /dev/null +++ b/libcds/tests/static_sequence_gmr_test.cpp @@ -0,0 +1,95 @@ +/* static_sequence_gmr_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_gmr_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=8) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[4]; + uint samp; + ss >> samp; + stringstream ss2; + ss2 << argv[5]; + uint chunk_length; + ss2 >> chunk_length; + stringstream ss3; + ss3 << argv[6]; + uint perm_samp; + ss3 >> perm_samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + static_bitsequence_builder * bmb; + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + static_sequence_builder * ssb; + + if(string(argv[3])==string("w")) { + alphabet_mapper * am = new alphabet_mapper_cont(text,n,bmb); + ssb = new static_sequence_builder_wvtree_noptrs(bmb,am); + } else if(string(argv[3])==string("p")) { + alphabet_mapper * am = new alphabet_mapper_none(); + wt_coder * wc = new wt_coder_huff(text,n,am); + ssb = new static_sequence_builder_wvtree(wc,bmb,am); + } else { + ssb = new static_sequence_builder_gmr_chunk(bmb, new static_permutation_builder_mrrr(perm_samp,bmb)); + } + + static_sequence * sseq = new static_sequence_gmr(text,n,chunk_length,bmb,ssb); + + delete bmb; + delete ssb; + + sseq = savetest(argv[1], sseq); + if(string(argv[7])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete sseq; + delete [] text; +} + diff --git a/libcds/tests/static_sequence_tester.cpp b/libcds/tests/static_sequence_tester.cpp new file mode 100644 index 0000000..01712f8 --- /dev/null +++ b/libcds/tests/static_sequence_tester.cpp @@ -0,0 +1,241 @@ +/* static_sequence_tester.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_tester + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include "static_sequence_tester.h" + +using namespace std; + +/* Time meassuring */ +double ticks= (double)sysconf(_SC_CLK_TCK); +struct tms t1,t2; + +void start_clock() { + times (&t1); +} + + +double stop_clock() { + times (&t2); + return (t2.tms_utime-t1.tms_utime)/ticks; +} +/* end Time meassuring */ + +uint NQUERIES=100000; +uint SEED=47; + +void test_static_sequence(uint * symbols, uint n, static_sequence * ss) { + cout << "Size: " << ss->size() << endl; + uint max_v=0; + for(uint i=0;iaccess(i); + uint r = /*occ[symbols[i]];/*/ss->rank(symbols[i],i); + uint s = /*i; /*/ss->select(symbols[i],occ[symbols[i]]); + uint rM1 = (i==0)?0:ss->rank(symbols[i],i-1); + if(r!=occ[symbols[i]]) { + cout << "Error in rank for symbol " << symbols[i] << " at position " << i << endl; + cout << "value: " << r << endl; + cout << "Expected: " << occ[symbols[i]] << endl; + error = true; + } + if(s!=i) { + cout << "Error in select for symbol " << symbols[i] << " at position " << occ[symbols[i]] << endl; + cout << "value: " << s << endl; + cout << "Expected: " << i << endl; + error = true; + } + if(a!=symbols[i]) { + cout << "Error in access at position " << i << endl; + cout << "value: " << a << endl; + cout << "Expected: " << symbols[i] << endl; + error = true; + } + if(rM1!=occ[symbols[i]]-1) { + cout << "Error in rankM1 for symbol " << symbols[i] << " at position " << i-1 << endl; + cout << "value: " << rM1 << endl; + cout << "Expected: " << occ[symbols[i]]-1 << endl; + error = true; + } + } + if(!error) + cout << "Test OK! It works :)" << endl; + delete [] occ; +} + +void load(char *fname, uint ** text, uint * n) { + struct stat text_info; + if(stat(fname,&text_info)<0) { + cout << "could not stat: " << fname << endl; + return; + } + + *n= (uint)text_info.st_size/4; + *text = new uint[*n+1]; + FILE * fp = fopen(fname,"r"); + if(fp==NULL) { + cout << "could not open " << fname << endl; + return; + } + + cout << "File: " << fname << endl; + cout << "Length: " << *n << endl; + + uint max_symbol = 0; + for(uint i=0;i<*n;i++) { + uint c; + uint read=fread(&c,sizeof(uint),1,fp); + //assert(read==1); + (*text)[i] = 1+(uint)c; + c += read; + max_symbol = max(max_symbol,(*text)[i]); + } + max_symbol++; + fclose(fp); + + /*static_sequence * ss = ssb->build(*text,*n); + + char * fname2 = new char[10+string(fname).length()]; + sprintf(fname2,"%s.wt",fname); + fp = fopen(fname2,"w"); + ss->save(fp); + fclose(fp); + delete ss; + fp = fopen(fname2,"r"); + ss = static_sequence::load(fp); + fclose(fp); + delete [] fname2; + return ss;*/ +} + +static_sequence * savetest(char * bname, static_sequence * ss) { + char * fname = new char[10+string(bname).length()]; + sprintf(fname,"%s.ss",bname); + FILE * fp = fopen(fname,"w"); + cout << "Saving structure ... "; cout.flush(); + ss->save(fp); + cout << "done" << endl; cout.flush(); + fclose(fp); + cout << "Deleting structure ... "; cout.flush(); + delete ss; + cout << "done" << endl; cout.flush(); + fp = fopen(fname,"r"); + cout << "Loading structure ... "; cout.flush(); + ss = static_sequence::load(fp); + cout << "done" << endl; cout.flush(); + fclose(fp); + if(ss==NULL) cout << "Error loading static_sequence" << endl; + //cout << ss << endl; + delete [] fname; + return ss; +} + +void speed_rank(static_sequence * ss, uint * text, uint n) { + uint max_symbol = 0; + for(uint i=0;i0) + valid_symbols[c++]=i; + } + + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;irank(valid_symbols[symb],pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " ranks: " << t << " secs" << endl; + cout << " * Time per rank: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; + delete [] valid_symbols; + delete [] occ; +} + +void speed_select(static_sequence * ss, uint * text, uint n) { + uint max_symbol = 0; + for(uint i=0;i0) + valid_symbols[c++]=i; + } + + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;iselect(valid_symbols[symb],pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " selects: " << t << " secs" << endl; + cout << " * Time per select: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; + delete [] valid_symbols; + delete [] occ; +} + +void speed_access(static_sequence * ss, uint * text, uint n) { + uint acc=0; + srand(SEED); + + start_clock(); + for(uint i=0;iaccess(pos); + } + double t = stop_clock(); + cout << " * Time for " << NQUERIES << " accesses: " << t << " secs" << endl; + cout << " * Time per access: " << 1000*t/NQUERIES << " msecs" << endl; + cout << " - Check sum: " << acc << endl; +} diff --git a/libcds/tests/static_sequence_tester.h b/libcds/tests/static_sequence_tester.h new file mode 100644 index 0000000..a3583b7 --- /dev/null +++ b/libcds/tests/static_sequence_tester.h @@ -0,0 +1,42 @@ +/* static_sequence_tester.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_tester + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#ifndef STATIC_SEQUENCE_TESTER_H +#define STATIC_SEQUENCE_TESTER_H + +void test_static_sequence(uint * symbols, uint n, static_sequence * ss); +void load(char *fname, uint ** text, uint * n); +static_sequence * savetest(char * bname, static_sequence * ss); +void speed_rank(static_sequence * ss, uint * text, uint n); +void speed_select(static_sequence * ss, uint * text, uint n); +void speed_access(static_sequence * ss, uint * text, uint n); + +#endif diff --git a/libcds/tests/static_sequence_wvtree_noptrs_test.cpp b/libcds/tests/static_sequence_wvtree_noptrs_test.cpp new file mode 100644 index 0000000..ea788d0 --- /dev/null +++ b/libcds/tests/static_sequence_wvtree_noptrs_test.cpp @@ -0,0 +1,80 @@ +/* static_sequence_wvtree_noptrs_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree_noptrs_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=6) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[4]; + uint samp; + ss >> samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + alphabet_mapper * am; + + static_bitsequence_builder * bmb; + + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + if(string(argv[3])==string("p")) + am = new alphabet_mapper_none(); + else + am = new alphabet_mapper_cont(text,n,bmb); + + static_sequence_builder * ssb = new static_sequence_builder_wvtree_noptrs(bmb,am); + static_sequence * sseq = ssb->build(text,n); + + delete bmb; + delete ssb; + sseq = savetest(argv[1], sseq); + if(string(argv[5])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete [] text; + delete sseq; +} diff --git a/libcds/tests/static_sequence_wvtree_test.cpp b/libcds/tests/static_sequence_wvtree_test.cpp new file mode 100644 index 0000000..4470f24 --- /dev/null +++ b/libcds/tests/static_sequence_wvtree_test.cpp @@ -0,0 +1,81 @@ +/* static_sequence_wvtree_test.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_sequence_wvtree_test + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=6) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + stringstream ss; + ss << argv[4]; + uint samp; + ss >> samp; + + uint * text; + uint n; + load(argv[1],&text,&n); + + alphabet_mapper * am = new alphabet_mapper_none(); + + static_bitsequence_builder * bmb; + if(string(argv[2])==string("b")) + bmb = new static_bitsequence_builder_brw32(samp); + else + bmb = new static_bitsequence_builder_rrr02(samp); + + wt_coder * wc; + if(string(argv[3])==string("p")) + wc = new wt_coder_binary(text,n,am); + else + wc = new wt_coder_huff(text,n,am); + + static_sequence_builder * ssb = new static_sequence_builder_wvtree(wc,bmb,am); + static_sequence * sseq = ssb->build(text,n); + delete bmb; + delete ssb; + + sseq = savetest(argv[1], sseq); + if(string(argv[5])==string("t")) + test_static_sequence(text,n,sseq); + else + cout << "Size: " << sseq->size() << endl; + cout << "*************************************" << endl; + speed_access(sseq,text,n); + cout << "*************************************" << endl; + speed_rank(sseq,text,n); + cout << "*************************************" << endl; + speed_select(sseq,text,n); + + delete sseq; + delete [] text; +} + diff --git a/libcds/tests/text_to_int.cpp b/libcds/tests/text_to_int.cpp new file mode 100644 index 0000000..200fbdf --- /dev/null +++ b/libcds/tests/text_to_int.cpp @@ -0,0 +1,74 @@ +/* test_to_int.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * text_to_int + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "static_sequence_tester.h" + +int main(int argc, char ** argv) { + if(argc!=3) { + cout << "Usage: " << argv[0] << " " << endl; + return 0; + } + char * fname = argv[1]; + char * oname = argv[2]; + + FILE * fp = fopen(fname,"r"); + if(fp==NULL) { + cout << "could not open " << fname << endl; + return 1; + } + struct stat text_info; + if(stat(fname,&text_info)<0) { + cout << "could not stat: " << fname << endl; + return 1; + } + + uint n= (uint)text_info.st_size; + uint * text = new uint[n]; + + for(uint i=0;i