-
+/* static_sequence.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_sequence definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#include <static_sequence.h>
static_sequence::static_sequence() {}
--- /dev/null
+
+#include "static_sequence_gmr_chunk.h"
+
+static_sequence_gmr_chunk::static_sequence_gmr_chunk(uint * sequence, uint chunk_length, static_bitsequence_builder *bmb, static_permutation_builder *pmb) {
+ sigma = 0;
+ for(uint i=0;i<chunk_length;i++) {
+ sigma = max(sigma,sequence[i]);
+ }
+ uint * X_bitmap = new uint[(1+chunk_length+sigma)/W+1];
+ assert(X_bitmap!=NULL);
+ for(uint i=0;i<(1+sigma+chunk_length)/W+1;i++) X_bitmap[i]=0;
+ uint pi_blen = bits(chunk_length-1);
+ uint * pi = new uint[pi_blen*chunk_length/W+1];
+ assert(pi!=NULL);
+ for(uint i=0;i<pi_blen*chunk_length/W+1;i++) pi[i] = 0;
+ uint X_pos = 0;
+ uint * counter = new uint[sigma+1];
+ for(uint c=0;c<=sigma;c++) counter[c]=0;
+ for(uint i=0;i<chunk_length;i++) counter[sequence[i]+1]++;
+
+ for(uint c=0;c<sigma;c++) {
+ X_pos++;
+ for(uint i=0;i<counter[c+1];i++) {
+ bitset(X_bitmap, X_pos);
+ X_pos++;
+ }
+ counter[c+1]+=counter[c];
+ }
+ X_pos++;
+ for(uint i=0;i<chunk_length;i++) {
+ bitput(pi, pi_blen*counter[sequence[i]], pi_blen, (uint)i);
+ counter[sequence[i]]++;
+ }
+ this->X = new BitRankW32Int(X_bitmap, X_pos, true,20);
+ assert(X!=NULL);
+ this->permutation = createPerm(pi, chunk_length, t);
+ assert(permutation!=NULL);
+ this->sigma = sigma;
+ this->chunk_length = chunk_length;
+ delete [] counter;
+}
+
+
+static_sequence_gmr_chunk::~static_sequence_gmr_chunk() {
+ delete X;
+ delete permutation;
+}
+
+
+uint static_sequence_gmr_chunk::caccess(uint j) {
+ uint invPerm = inversePerm(permutation, j);
+ uint rank_pos = X->select1(invPerm+1);
+ uint ret = rank_pos - X->rank(rank_pos);// - 1;
+ return ret;
+}
+
+
+uint static_sequence_gmr_chunk::cselect(uint i, uint j) {
+ uint pos = X->select0(i+1) + j - i -1;
+ return getelemPerm(permutation, pos);
+}
+
+
+uint static_sequence_gmr_chunk::crank(uint i, uint j) {
+ uint ini = X->select0(i+1)-i;
+ uint ini_o = ini;
+ uint fin = X->select0(i+2);
+ if(fin<i+2) return 0;
+ fin = fin-(i+2);
+ if(fin<ini) return 0;
+ if(getelemPerm(permutation,ini) > j) return 0;
+ if(getelemPerm(permutation,ini) == j) return 1;
+ if(ini==fin) return 1;
+ while(ini < fin-1) {
+ uint med = (ini+fin)/2;
+ uint elem = getelemPerm(permutation, med);
+ if(elem >= j) fin = med;
+ else ini = med;
+ }
+ while(fin>ini_o && getelemPerm(permutation, fin)>j) fin--;
+ return fin-ini_o+1;
+}
+
+
+uint static_sequence_gmr_chunk::size() {
+ return sizeof(BitRankW32Int*)+sizeof(perm*)+(X->SpaceRequirementInBits()/8+sizeofPerm(permutation));
+}
--- /dev/null
+
+#ifndef _STATIC_SEQUENCE_GMR_CHUNK_H
+#define _STATIC_SEQUENCE_GMR_CHUNK_H
+
+#include <basics.h>
+#include <static_bitsequence.h>
+#include <static_bitsequence_builder.h>
+#include <static_permutation.h>
+#include <cassert>
+#include <iostream>
+
+using namespace std;
+
+/** Implementation of the Chunk of Golynski et al's rank/select
+ * data structure [1].
+ *
+ * [1] A. Golynski and I. Munro and S. Rao.
+ * Rank/select operations on large alphabets: a tool for text indexing.
+ * SODA 06.
+ *
+ * @author Francisco Claude
+ */
+class static_sequence_gmr_chunk: public static_sequence {
+ public:
+ /** Builds the structures needed for the chunk */
+ static_sequence_gmr_chunk(uint * sequence, uint chunk_length, static_bitsequence_builder *bmb, static_permutation_builder *pmb);
+
+ /** Destroy the chunk */
+ ~static_sequence_gmr_chunk();
+
+ virtual uint access(uint j);
+ virtual uint select(uint i, uint j);
+ virtual uint rank(uint i, uint j);
+ virtual uint size();
+ virtual uint save(FILE *fp);
+ static_sequence_gmr_chunk * load(FILE *fp);
+
+ protected:
+ /** Bitmap */
+ static_bitsequence * X;
+ /** Permutation */
+ static_permutation permutation;
+ /** Size of the alphabet */
+ uint sigma;
+ /** Length of the chunk */
+ uint chunk_length;
+};
+#endif
-
+/* static_sequence_wvtree.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_sequence_wvtree definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#include <static_sequence_wvtree.h>
static_sequence_wvtree::static_sequence_wvtree(uint * symbols, uint n, wt_coder * c, static_bitsequence_builder * bmb, alphabet_mapper * am) {
symbols[i] = am->map(symbols[i]);
this->am = am;
this->c=c;
- cout << "Building..."; cout.flush();
root = new wt_node_internal(symbols, n, 0, c, bmb);
- cout << "done" << endl; cout.flush();
for(uint i=0;i<n;i++)
symbols[i] = am->unmap(symbols[i]);
}
static_sequence_wvtree::static_sequence_wvtree() {}
-
static_sequence_wvtree::~static_sequence_wvtree() {
delete root;
delete am;
ret->root = wt_node::load(fp);
return ret;
}
-
-
+/* static_sequence_wvtree.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * static_sequence_wvtree definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#ifndef STATIC_SEQUENCE_WVTREE_H
#define STATIC_SEQUENCE_WVTREE_H
+
#include <iostream>
#include <cassert>
#include <basics.h>
using namespace std;
+/** Wavelet tree implementation using pointers.
+ *
+ * @author Francisco Claude
+ */
class static_sequence_wvtree : public static_sequence {
public:
/** Builds a Wavelet Tree for the string
* pointed by symbols assuming its length
- * equals n and the test flag allows to set
- * if the structure must be tested for
- * correctness after being created (this is very expensive). */
+ * equals n */
static_sequence_wvtree(uint * symbols, uint n, wt_coder * coder, static_bitsequence_builder * bmb, alphabet_mapper * am);
virtual ~static_sequence_wvtree();
virtual uint save(FILE * fp);
static static_sequence_wvtree * load(FILE *fp);
+
protected:
static_sequence_wvtree();
-
+/* wt_coder.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_coder definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#include <wt_coder.h>
wt_coder * wt_coder::load(FILE *fp) {
fseek(fp,-sizeof(uint),SEEK_CUR);
switch(rd) {
case WT_CODER_HUFF_HDR: return wt_coder_huff::load(fp);
+ case WT_CODER_BINARY_HDR: return wt_coder_binary::load(fp);
}
return NULL;
}
-
+/* wt_coder.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_coder definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#ifndef wt_coder_h
#define wt_coder_h
using namespace std;
#define WT_CODER_HUFF_HDR 2
+#define WT_CODER_BINARY_HDR 3
+/** Coder that defines the shape of a wavelet tree
+ *
+ * @author Francisco Claude
+ */
class wt_coder {
public:
virtual ~wt_coder() {};
+ /** Tells if at level l the symbol is represented by a one or a zero */
virtual bool is_set(uint symbol, uint l)=0;
+ /** Tells if the path of symbol becomes unique at level l */
virtual bool done(uint symbol, uint l)=0;
+ /** Returns the size of the coder */
virtual uint size()=0;
+ /** Saves the coder to a file, returns 0 in case of success */
virtual uint save(FILE *fp)=0;
+ /** Loads a coder from a file, returns NULL in case of error */
static wt_coder * load(FILE *fp);
};
#include <wt_coder_binary.h>
#endif
-
-
+/* wt_coder_binary.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_coder_binary definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#include <wt_coder_binary.h>
wt_coder_binary::wt_coder_binary(uint * seq, uint n, alphabet_mapper * am) {
h=bits(max_v);
}
+wt_coder_binary::wt_coder_binary() {}
+
wt_coder_binary::~wt_coder_binary() {}
bool wt_coder_binary::is_set(uint symbol, uint l) {
return sizeof(wt_coder_binary);
}
+uint wt_coder_binary::save(FILE *fp) {
+ uint wr = WT_CODER_BINARY_HDR;
+ wr = fwrite(&wr,sizeof(uint),1,fp);
+ wr += fwrite(&h,sizeof(uint),1,fp);
+ return wr-2;
+}
+
+wt_coder_binary * wt_coder_binary::load(FILE *fp) {
+ uint rd;
+ if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL;
+ if(rd!=WT_CODER_BINARY_HDR) return NULL;
+ wt_coder_binary * ret = new wt_coder_binary();
+ if(fread(&ret->h,sizeof(uint),1,fp)!=1) {
+ delete ret;
+ return NULL;
+ }
+ return ret;
+}
+/* wt_coder_binary.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_coder_binary definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#ifndef wt_coder_binary_h
#define wt_coder_binary_h
#include <wt_coder.h>
#include <alphabet_mapper.h>
+/** Considers the binary representation of the symbols as the code
+ *
+ * @author Francisco Claude
+ */
class wt_coder_binary: public wt_coder {
public:
+ /** Buils a wt_coder_binary using the sequence of length n and the alphabet_mapper
+ * to determine the length of the binary codes */
wt_coder_binary(uint * seq, uint n, alphabet_mapper * am);
virtual ~wt_coder_binary();
virtual bool is_set(uint symbol, uint l);
virtual bool done(uint symbol, uint l);
virtual uint size();
+ virtual uint save(FILE *fp);
+ static wt_coder_binary * load(FILE *fp);
protected:
+ wt_coder_binary();
uint h;
};
-
+/* wt_coder_huff.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_coder_huff definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#include <wt_coder_huff.h>
wt_coder_huff::wt_coder_huff(uint * symbs, uint n, alphabet_mapper * am) {
-
+/* wt_coder_huff.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_coder_huff definition
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#ifndef wt_coder_huff_h
#define wt_coder_huff_h
#include <huffman_codes.h>
#include <alphabet_mapper.h>
+/** Uses huffman codes to determine the shape of the wavelet tree
+ *
+ * @author Francisco Claude
+ */
class wt_coder_huff: public wt_coder {
public:
+ /** Buils a wt_coder_huff using the sequence of length n and the alphabet_mapper
+ * to determine the huffman codes */
wt_coder_huff(uint *symbs, uint n, alphabet_mapper * am);
virtual ~wt_coder_huff();
virtual bool is_set(uint symbol, uint l);
virtual uint size();
virtual uint save(FILE *fp);
static wt_coder_huff * load(FILE *fp);
- uint * get_buffer(uint symbol, uint *n);
+ //uint * get_buffer(uint symbol, uint *n);
protected:
wt_coder_huff();
};
#endif
-
-/* wt_node.h
+/* wt_node.cpp
* Copyright (C) 2008, Francisco Claude, all rights reserved.
*
* wt_node
#define WT_NODE_INTERNAL_HDR 2
#define WT_NODE_LEAF_HDR 3
-
+/** Base clase for nodes in the wavelet tree
+ *
+ * @author Francisco Claude
+ */
class wt_node {
public:
virtual ~wt_node() {}
#include <wt_node_leaf.h>
#endif
-
-
+/* wt_node_internal.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_node_internal
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#include <wt_node_internal.h>
-
wt_node_internal::wt_node_internal(uint * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb) {
uint * ibitmap = new uint[n/W+1];
for(uint i=0;i<n/W+1;i++)
-
+/* wt_node_internal.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_node_internal
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
#ifndef wt_node_internal_h
#define wt_node_internal_h
#include <static_bitsequence_builder.h>
#include <cassert>
+/** Clase for representing internal nodes
+ *
+ * @author Francisco Claude
+ */
class wt_node_internal: public wt_node {
public:
wt_node_internal(uint * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb);
};
#endif
-
+/* wt_node_leaf.cpp
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_node_leaf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
#include <wt_node_leaf.h>
+/* wt_node_leaf.h
+ * Copyright (C) 2008, Francisco Claude, all rights reserved.
+ *
+ * wt_node_leaf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
#ifndef wt_node_leaf_h
#define wt_node_leaf_h
#include <wt_coder.h>
#include <cassert>
+/** Class for representing leaves of the wavelet tree.
+ *
+ * @author Francisco Claude
+ */
class wt_node_leaf: public wt_node {
public:
wt_node_leaf(uint symbol, uint count);
};
#endif
-
using namespace std;
+void test_static_sequence(uint * symbols, uint n, static_sequence * ss) {
+ cout << "Size: " << ss->size() << endl;
+ uint max_v=0;
+ for(uint i=0;i<n;i++)
+ max_v = max(max_v,symbols[i]);
+ uint * occ = new uint[max_v+1];
+ for(uint i=0;i<=max_v;i++)
+ occ[i] = 0;
+ bool error = false;
+ for(uint i=0;i<n && !error;i++) {
+ if(i!=0 && i%max(1,(n-1)/100)==0) { cout << "."; cout.flush(); }
+ if(i!=0 && i%max(1,(n-1)/10)==0) cout << endl;
+ occ[symbols[i]]++;
+ uint a = ss->access(i);
+ uint r = ss->rank(symbols[i],i);
+ uint s = ss->select(symbols[i],occ[symbols[i]]);
+ uint rM1 = (i==0)?0:ss->rank(symbols[i],i-1);
+ if(r!=occ[symbols[i]]) {
+ cout << "Error in rank for symbol " << symbols[i] << " at position " << i << endl;
+ cout << "value: " << r << endl;
+ cout << "Expected: " << occ[symbols[i]] << endl;
+ error = true;
+ }
+ if(s!=i) {
+ cout << "Error in select for symbol " << symbols[i] << " at position " << occ[symbols[i]] << endl;
+ cout << "value: " << s << endl;
+ cout << "Expected: " << i << endl;
+ error = true;
+ }
+ if(a!=symbols[i]) {
+ cout << "Error in access at position " << i << endl;
+ cout << "value: " << a << endl;
+ cout << "Expected: " << symbols[i] << endl;
+ error = true;
+ }
+ if(rM1!=occ[symbols[i]]-1) {
+ cout << "Error in rankM1 for symbol " << symbols[i] << " at position " << i-1 << endl;
+ cout << "value: " << rM1 << endl;
+ cout << "Expected: " << occ[symbols[i]]-1 << endl;
+ error = true;
+ }
+ }
+ if(!error)
+ cout << "Test OK! It works :)" << endl;
+ delete [] occ;
+}
+
int main(int argc, char ** argv) {
if(argc!=3) {
cout << "usage: " << argv[0] << " <file> <samp>" << endl;
cout << "Building Huffman table..."; cout.flush();
wt_coder * wtc = new wt_coder_huff(text,n,am);
cout << "done" << endl; cout.flush();
- static_sequence * wt = new static_sequence_wvtree(text,n,wtc,bmb,am,true);
+ cout << "Building static_sequence..."; cout.flush();
+ static_sequence * wt = new static_sequence_wvtree(text,n,wtc,bmb,am);
+ cout << "done" << endl; cout.flush();
delete bmb;
char * fname = new char[10+string(argv[1]).length()];
fclose(fp);
delete [] fname;
-
+ test_static_sequence(text,n,wt);
+
cout << "WT Size: " << wt->size() << endl;
cout << "ft = " << 1.*wt->size()/(bits(max_symbol-1)*n/8) << endl;
using namespace std;
+void test_static_sequence(uint * symbols, uint n, static_sequence * ss) {
+ cout << "Size: " << ss->size() << endl;
+ uint max_v=0;
+ for(uint i=0;i<n;i++)
+ max_v = max(max_v,symbols[i]);
+ uint * occ = new uint[max_v+1];
+ for(uint i=0;i<=max_v;i++)
+ occ[i] = 0;
+ bool error = false;
+ for(uint i=0;i<n && !error;i++) {
+ if(i!=0 && i%max(1,(n-1)/100)==0) { cout << "."; cout.flush(); }
+ if(i!=0 && i%max(1,(n-1)/10)==0) cout << endl;
+ occ[symbols[i]]++;
+ uint a = ss->access(i);
+ uint r = ss->rank(symbols[i],i);
+ uint s = ss->select(symbols[i],occ[symbols[i]]);
+ uint rM1 = (i==0)?0:ss->rank(symbols[i],i-1);
+ if(r!=occ[symbols[i]]) {
+ cout << "Error in rank for symbol " << symbols[i] << " at position " << i << endl;
+ cout << "value: " << r << endl;
+ cout << "Expected: " << occ[symbols[i]] << endl;
+ error = true;
+ }
+ if(s!=i) {
+ cout << "Error in select for symbol " << symbols[i] << " at position " << occ[symbols[i]] << endl;
+ cout << "value: " << s << endl;
+ cout << "Expected: " << i << endl;
+ error = true;
+ }
+ if(a!=symbols[i]) {
+ cout << "Error in access at position " << i << endl;
+ cout << "value: " << a << endl;
+ cout << "Expected: " << symbols[i] << endl;
+ error = true;
+ }
+ if(rM1!=occ[symbols[i]]-1) {
+ cout << "Error in rankM1 for symbol " << symbols[i] << " at position " << i-1 << endl;
+ cout << "value: " << rM1 << endl;
+ cout << "Expected: " << occ[symbols[i]]-1 << endl;
+ error = true;
+ }
+ }
+ if(!error)
+ cout << "Test OK! It works :)" << endl;
+ delete [] occ;
+}
+
int main(int argc, char ** argv) {
if(argc!=3) {
cout << "usage: " << argv[0] << " <file> <samp>" << endl;
cout << "Building Huffman table..."; cout.flush();
wt_coder * wtc = new wt_coder_huff(text,n,am);
cout << "done" << endl; cout.flush();
- static_sequence * wt = new static_sequence_wvtree(text,n,wtc,bmb,am,true);
+ cout << "Building static_sequence..."; cout.flush();
+ static_sequence * wt = new static_sequence_wvtree(text,n,wtc,bmb,am);
+ cout << "done" << endl; cout.flush();
delete bmb;
char * fname = new char[10+string(argv[1]).length()];
fclose(fp);
delete [] fname;
- ((static_sequence_wvtree*)wt)->test_structure(text,n);
+ test_static_sequence(text,n,wt);
cout << "WT Size: " << wt->size() << endl;
cout << "ft = " << 1.*wt->size()/n << endl;