From: darroyue Date: Wed, 14 Jan 2009 02:37:36 +0000 (+0000) Subject: New (faster) representation for tags added; faster construction of parentheses X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=a9846746dc7a55764591fcc273fd48c6049df962;p=SXSI%2FXMLTree.git New (faster) representation for tags added; faster construction of parentheses git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@56 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/XMLTree.cpp b/XMLTree.cpp index 7d0c5e3..92360a4 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -101,7 +101,7 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) if (!(XML_Tree->indexing_empty_texts)) XML_Tree->EBVector = static_bitsequence_rrr02::load(fp); // loads the tags - XML_Tree->Tags = static_sequence_wvtree::load(fp); + XML_Tree->Tags = static_sequence::load(fp); // loads the texts XML_Tree->Text->Load(fp,sample_rate_text); @@ -126,16 +126,16 @@ XMLTree::~XMLTree() free(TagName); if (!indexing_empty_texts) { - EBVector->~static_bitsequence_rrr02(); + //EBVector->~static_bitsequence_rrr02(); delete EBVector; EBVector = NULL; } - Tags->~static_sequence_wvtree(); + //Tags->~static_sequence_wvtree(); delete Tags; Tags = NULL; - Text->~TextCollection(); + //Text->~TextCollection(); delete Text; Text = NULL; @@ -571,11 +571,12 @@ int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text) initialized = true; finished = false; npar = 0; - ntagnames = 0; - + parArraySize = 1; + ntagnames = 0; + indexing_empty_texts = empty_texts; - par_aux = (pb *)malloc(sizeof(pb)); + par_aux = (pb *)malloc(sizeof(pb)*parArraySize); if (!par_aux) { fprintf(stderr, "Error: not enough memory\n"); return NULLT; @@ -621,14 +622,18 @@ int XMLTree::CloseDocument() } // creates the data structure for the tree topology - Par = (bp *)malloc(sizeof(bp)); + Par = (bp *)malloc(sizeof(bp)); bp_construct(Par, npar, par_aux, OPT_DEGREE|0); // creates structure for tags - alphabet_mapper * am = new alphabet_mapper_none(); - static_bitsequence_builder * bmb = new static_bitsequence_builder_rrr02(32); - wt_coder * wtc = new wt_coder_huff((uint *)tags_aux,npar-1,am); - Tags = new static_sequence_wvtree((uint *) tags_aux, (uint) npar-1, wtc, bmb, am); + static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20); + static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb); + static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb); + Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,2*ntagnames, bmb, ssb); + + delete bmb; + delete pmb; + delete ssb; // makes the text collection static Text->MakeStatic(); @@ -655,7 +660,11 @@ int XMLTree::NewOpenTag(unsigned char *tagname) } // inserts a new opening parentheses in the bit sequence - par_aux = (pb *)realloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb)))); + if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis + par_aux = (pb *)realloc(par_aux, sizeof(pb)*2*parArraySize); + parArraySize *= 2; + } + if (!par_aux) { fprintf(stderr, "Error: not enough memory\n"); return NULLT; @@ -708,7 +717,11 @@ int XMLTree::NewClosingTag(unsigned char *tagname) } // inserts a new closing parentheses in the bit sequence - par_aux = (pb *)realloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb)))); + if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis + par_aux = (pb *)realloc(par_aux, sizeof(pb)*2*parArraySize); + parArraySize *= 2; + } + if (!par_aux) { fprintf(stderr, "Error: not enough memory\n"); return NULLT; diff --git a/XMLTree.h b/XMLTree.h index f605286..7407014 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -37,6 +37,8 @@ using SXSI::TextCollection; #define NULLT -1 +#define PERM_SAMPLE 10 + // sets bit p in e #define bitset(e,p) ((e)[(p)/W] |= (1<<((p)%W))) // cleans bit p in e @@ -67,7 +69,7 @@ class XMLTree { static_bitsequence_rrr02 *EBVector; /** Tag sequence represented with a data structure for rank and select */ - static_sequence_wvtree *Tags; + static_sequence *Tags; /** The texts in the XML document */ TextCollection *Text; @@ -85,6 +87,7 @@ class XMLTree { pb *par_aux; TagType *tags_aux; int npar; + int parArraySize; int ntagnames; unsigned int *empty_texts_aux; diff --git a/libcds/Doxyfile b/libcds/Doxyfile index 51edd7b..aab437f 100644 --- a/libcds/Doxyfile +++ b/libcds/Doxyfile @@ -5,7 +5,7 @@ #--------------------------------------------------------------------------- DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = libcds -PROJECT_NUMBER = 1.0 +PROJECT_NUMBER = 0.8 OUTPUT_DIRECTORY = docs/ CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English diff --git a/libcds/Makefile b/libcds/Makefile index 5f2e6de..b5f4fa3 100644 --- a/libcds/Makefile +++ b/libcds/Makefile @@ -1,24 +1,32 @@ -all: libcompact tests +all: clean libcompact tests doc: - doxygen + @echo " [DOC] Generating documentation" + @doxygen libcompact: - make -C src + @echo " [MSG] Entering directory src" + @make --no-print-directory -C src tests: libcompact - make -C tests + @echo " [MSG] Entering directory tests" + @make --no-print-directory -C tests clean: - make -C src clean - make -C tests clean - rm -rf docs/* - touch docs/delete_me - rm -f lib/* - touch lib/delete_me - rm -f includes/* - touch includes/delete_me + @echo " [MSG] Entering directory src" + @make --no-print-directory -C src clean + @echo " [MSG] Entering directory tests" + @make --no-print-directory -C tests clean + @echo " [CLN] Cleaning docs folder" + @rm -rf docs/* + @touch docs/delete_me + @echo " [CLN] Cleaning lib folder" + @rm -f lib/* + @touch lib/delete_me + @echo " [CLN] Cleaning includes folder" + @rm -f includes/* + @touch includes/delete_me diff --git a/libcds/src/Makefile b/libcds/src/Makefile index b208f91..0f7549b 100644 --- a/libcds/src/Makefile +++ b/libcds/src/Makefile @@ -1,34 +1,41 @@ CPP=g++ -CPPFLAGS=-g3 -Wall -#CPPFLAGS=-O9 -Wall -DNDEBUG -pedantic +#CPPFLAGS=-g3 -Wall +CPPFLAGS=-O9 -w -DNDEBUG INCL=-I../includes/ CODERS_DIR=coders CODERS_OBJECTS=$(CODERS_DIR)/huff.o $(CODERS_DIR)/huffman_codes.o +STATIC_PERMUTATION_DIR=static_permutation +STATIC_PERMUTATION_OBJECTS=$(STATIC_PERMUTATION_DIR)/perm.o $(STATIC_PERMUTATION_DIR)/static_permutation.o $(STATIC_PERMUTATION_DIR)/static_permutation_mrrr.o $(STATIC_PERMUTATION_DIR)/static_permutation_builder_mrrr.o + STATIC_BITSEQUENCE_DIR=static_bitsequence -STATIC_BITSEQUENCE_OBJECTS=$(STATIC_BITSEQUENCE_DIR)/static_bitsequence.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_naive.o $(STATIC_BITSEQUENCE_DIR)/table_offset.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_brw32.o +STATIC_BITSEQUENCE_OBJECTS=$(STATIC_BITSEQUENCE_DIR)/static_bitsequence.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_naive.o $(STATIC_BITSEQUENCE_DIR)/table_offset.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_brw32.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_rrr02_light.o $(STATIC_BITSEQUENCE_DIR)/static_bitsequence_builder_rrr02_light.o STATIC_SEQUENCE_DIR=static_sequence -STATIC_SEQUENCE_OBJECTS=$(STATIC_SEQUENCE_DIR)/static_sequence.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree.o $(STATIC_SEQUENCE_DIR)/wt_coder_binary.o $(STATIC_SEQUENCE_DIR)/wt_coder_huff.o $(STATIC_SEQUENCE_DIR)/wt_node_internal.o $(STATIC_SEQUENCE_DIR)/wt_node_leaf.o $(STATIC_SEQUENCE_DIR)/wt_coder.o $(STATIC_SEQUENCE_DIR)/wt_node.o +STATIC_SEQUENCE_OBJECTS=$(STATIC_SEQUENCE_DIR)/static_sequence.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree.o $(STATIC_SEQUENCE_DIR)/wt_coder_binary.o $(STATIC_SEQUENCE_DIR)/wt_coder_huff.o $(STATIC_SEQUENCE_DIR)/wt_node_internal.o $(STATIC_SEQUENCE_DIR)/wt_node_leaf.o $(STATIC_SEQUENCE_DIR)/wt_coder.o $(STATIC_SEQUENCE_DIR)/wt_node.o $(STATIC_SEQUENCE_DIR)/static_sequence_gmr_chunk.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_gmr_chunk.o $(STATIC_SEQUENCE_DIR)/static_sequence_gmr.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_wvtree.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_gmr.o $(STATIC_SEQUENCE_DIR)/static_sequence_wvtree_noptrs.o $(STATIC_SEQUENCE_DIR)/static_sequence_builder_wvtree_noptrs.o UTILS_DIR=utils -UTILS_OBJECTS=$(UTILS_DIR)/alphabet_mapper_none.o $(UTILS_DIR)/alphabet_mapper.o +UTILS_OBJECTS=$(UTILS_DIR)/alphabet_mapper_none.o $(UTILS_DIR)/alphabet_mapper.o $(UTILS_DIR)/alphabet_mapper_cont.o %.o: %.cpp - $(CPP) $(CPPFLAGS) $(INCL) -c $< -o $@ + @echo " [C++] Compiling $<" + @$(CPP) $(CPPFLAGS) $(INCL) -c $< -o $@ all: lib clean: - rm -f $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) + @echo " [CLN] Removing object files" + @rm -f $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) $(STATIC_PERMUTATION_OBJECTS) -lib: pre $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) - ar vrcs ../lib/libcds.a $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) +lib: pre $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) $(STATIC_PERMUTATION_OBJECTS) + @echo " [LIB] Packing the object files" + @ar rcs ../lib/libcds.a $(CODERS_OBJECTS) $(STATIC_BITSEQUENCE_OBJECTS) $(STATIC_SEQUENCE_OBJECTS) $(UTILS_OBJECTS) $(STATIC_PERMUTATION_OBJECTS) pre: - cp basics.h ../includes/ - cp */*.h ../includes/ + @echo " [HDR] Populating the includes folder" + @cp basics.h ../includes/ + @cp */*.h ../includes/ diff --git a/libcds/src/basics.h b/libcds/src/basics.h index 9c88ad2..6412228 100644 --- a/libcds/src/basics.h +++ b/libcds/src/basics.h @@ -1,5 +1,5 @@ /* basics.h - * Copyright (C) 2008, Rodrigo Gonzalez & Francisco Claude, all rights reserved. + * Copyright (C) 2005, Rodrigo Gonzalez, all rights reserved. * * Some preliminary stuff * @@ -23,6 +23,14 @@ #ifndef _BASICS_H #define _BASICS_H +#include +#include +#include +#include +#include +#include +#include +#include #include using namespace std; #include @@ -131,7 +139,10 @@ inline uint bits(uint n){ #define bitclean(e,p) ((e)[(p)/W] &= ~(1<<((p)%W))) /** uints required to represent e integers of n bits each */ -#define uint_len(e,n) (((e)*(n))/W+(((e)*(n))%W > 0)) +//#define uint_len(e,n) (((e)*(n))/W+(((e)*(n))%W > 0)) +inline uint uint_len(uint e, uint n) { + return ((unsigned long long)e*n/W+((unsigned long long)e*n%W>0)); +} /** Retrieve a given index from array A where every value uses len bits * @param A Array diff --git a/libcds/src/coders/huff.cpp b/libcds/src/coders/huff.cpp index deb0e70..52b95f9 100644 --- a/libcds/src/coders/huff.cpp +++ b/libcds/src/coders/huff.cpp @@ -1,4 +1,23 @@ +/* huff.cpp + Copyright (C) 2008, Gonzalo Navarro, all rights reserved. + Canonical Huffman + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ // implements canonical Huffman #include @@ -180,16 +199,17 @@ ulong decodeHuff (THuff H, uint *symb, uint *stream, ulong ptr) */ void saveHuff (THuff H, FILE *f) - { uint *symb = (uint*)malloc((H.lim+1)*sizeof(uint)); + { uint *symb = new uint[H.lim+1]; uint i; + for(i=0;i<(H.lim+1);i++) symb[i] = 0; for (i=0;i<=H.max;i++) - if (H.s.spos[i] != (uint)~0) symb[H.s.spos[i]] = i; + if (H.s.spos[i] != (uint)~0) symb[H.s.spos[i]] = i; uint l=fwrite (&H.max,sizeof(uint),1,f); l += fwrite (&H.lim,sizeof(uint),1,f); l += fwrite (&H.depth,sizeof(uint),1,f); l += fwrite (symb,sizeof(uint),H.lim+1,f); l += fwrite (H.num,sizeof(uint),H.depth+1,f); - free (symb); + delete [] (symb); } uint sizeHuff (THuff H) @@ -235,5 +255,3 @@ THuff loadHuff (FILE *f, int enc) } return H; } - - diff --git a/libcds/src/coders/huff.h b/libcds/src/coders/huff.h index 2dc96e2..e990974 100644 --- a/libcds/src/coders/huff.h +++ b/libcds/src/coders/huff.h @@ -1,5 +1,23 @@ +/* huff.h + Copyright (C) 2008, Gonzalo Navarro, all rights reserved. -// implements canonical Huffman + Canonical Huffman + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ #ifndef HUFFINCLUDED #define HUFFINCLUDED @@ -18,36 +36,51 @@ typedef struct ulong total; // total length to achieve, in bits } THuff; - // Creates Huffman encoder given symbols 0..lim with frequencies - // freq[i], ready for compression +/** Creates Huffman encoder given symbols 0..lim with frequencies + * freq[i], ready for compression + * + * @author Gonzalo Navarro + */ THuff createHuff (uint *freq, uint lim); - // Encodes symb using H, over stream[ptr...lim] (ptr and lim are - // bit positions of stream). Returns the new ptr. - +/** Encodes symb using H, over stream[ptr...lim] (ptr and lim are + * bit positions of stream). Returns the new ptr. + * + * @author Gonzalo Navarro + */ ulong encodeHuff (THuff H, uint symb, uint *stream, ulong ptr); - // Decodes *symb using H, over stream[ptr...lim] (ptr and lim are - // bit positions of stream). Returns the new ptr. - +/** Decodes *symb using H, over stream[ptr...lim] (ptr and lim are + * bit positions of stream). Returns the new ptr. + * + * @author Gonzalo Navarro + */ ulong decodeHuff (THuff H, uint *symb, uint *stream, ulong ptr); - // Writes H in file f - +/** Writes H in file f + * + * @author Gonzalo Navarro + */ void saveHuff (THuff H, FILE *f); - // Size of H written on file - +/** Size of H written on file + * + * @author Gonzalo Navarro + */ uint sizeHuff (THuff H); - // Frees H - +/** Frees H + * + * @author Gonzalo Navarro + */ void freeHuff (THuff H); - // Loads H from file f, prepared for encoding or decoding depending - // on enc - +/** Loads H from file f, prepared for encoding or decoding depending + * on enc + * + * @author Gonzalo Navarro + */ THuff loadHuff (FILE *f, int enc); #endif diff --git a/libcds/src/coders/huffman_codes.cpp b/libcds/src/coders/huffman_codes.cpp index cb64fd1..8bffa69 100644 --- a/libcds/src/coders/huffman_codes.cpp +++ b/libcds/src/coders/huffman_codes.cpp @@ -1,3 +1,23 @@ +/* huffman_codes.cpp + Copyright (C) 2008, Francisco Claude, all rights reserved. + + Wrapper for huff written by Gonzalo Navarro + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ #include @@ -47,5 +67,3 @@ huffman_codes * huffman_codes::load(FILE * fp) { ret->huff_table = loadHuff(fp,1); return ret; } - - diff --git a/libcds/src/coders/huffman_codes.h b/libcds/src/coders/huffman_codes.h index d421899..a82c8dd 100644 --- a/libcds/src/coders/huffman_codes.h +++ b/libcds/src/coders/huffman_codes.h @@ -1,3 +1,23 @@ +/* huffman_codes.h + Copyright (C) 2008, Francisco Claude, all rights reserved. + + Wrapper for huff written by Gonzalo Navarro + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ #ifndef HUFFMAN_CODES_H #define HUFFMAN_CODES_H @@ -5,17 +25,33 @@ #include #include +/** Wrapper for the canonical huffman implementation of Gonzalo Navarro. + * + * @author Francisco Claude + */ class huffman_codes { public: + /** Creates the codes for the sequence seq of length n */ huffman_codes(uint * seq, uint n); ~huffman_codes(); + /** Encodes symb into stream at bit-position pos, return the ending position (bits) */ ulong encode(uint symb, uint * stream, ulong pos); + + /** decodes into symb from stream at bit-position pos, returns the new position */ ulong decode(uint * symb, uint * stream, ulong pos); + + /** Returns the maximum length of a code */ uint max_length(); + + /** Returns the size of the table */ uint size(); + + /** Saves the coder to a file */ uint save(FILE *fp); + + /** Loads a coder from a file */ static huffman_codes * load(FILE *fp); protected: diff --git a/libcds/src/static_bitsequence/static_bitsequence.cpp b/libcds/src/static_bitsequence/static_bitsequence.cpp index 65de9c4..7813d8c 100644 --- a/libcds/src/static_bitsequence/static_bitsequence.cpp +++ b/libcds/src/static_bitsequence/static_bitsequence.cpp @@ -98,7 +98,7 @@ static_bitsequence * static_bitsequence::load(FILE * fp) { switch(r) { case RRR02_HDR: return static_bitsequence_rrr02::load(fp); case BRW32_HDR: return static_bitsequence_brw32::load(fp); + case RRR02_LIGHT_HDR: return static_bitsequence_rrr02_light::load(fp); } return NULL; } - diff --git a/libcds/src/static_bitsequence/static_bitsequence.h b/libcds/src/static_bitsequence/static_bitsequence.h index 4a3f915..2fbdf48 100644 --- a/libcds/src/static_bitsequence/static_bitsequence.h +++ b/libcds/src/static_bitsequence/static_bitsequence.h @@ -24,6 +24,7 @@ #define RRR02_HDR 2 #define BRW32_HDR 3 +#define RRR02_LIGHT_HDR 4 #include #include @@ -85,9 +86,9 @@ protected: }; -#include "static_bitsequence_rrr02.h" -#include "static_bitsequence_naive.h" -#include "static_bitsequence_brw32.h" +#include +#include +#include +#include #endif /* _STATIC_BITSEQUENCE_H */ - diff --git a/libcds/src/static_bitsequence/static_bitsequence_brw32.cpp b/libcds/src/static_bitsequence/static_bitsequence_brw32.cpp index 9bc1313..2ca4eed 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_brw32.cpp +++ b/libcds/src/static_bitsequence/static_bitsequence_brw32.cpp @@ -38,7 +38,7 @@ static_bitsequence_brw32::static_bitsequence_brw32(){ data=NULL; - this->owner = true; +// this->owner = true; this->n=0; this->factor=0; } @@ -54,7 +54,7 @@ static_bitsequence_brw32::static_bitsequence_brw32( uint *bitarray, uint _n, uin data[i] = bitarray[i]; for(uint i=uint_len(_n,1);i<_n/W+1;i++) data[i] = 0; - this->owner = true; + //this->owner = true; this->n=_n; uint lgn=bits(n-1); this->factor=_factor; @@ -70,7 +70,7 @@ static_bitsequence_brw32::static_bitsequence_brw32( uint *bitarray, uint _n, uin static_bitsequence_brw32::~static_bitsequence_brw32() { delete [] Rs; - if (owner) delete [] data; + delete [] data; } //Metodo que realiza la busqueda d @@ -142,7 +142,6 @@ static_bitsequence_brw32 * static_bitsequence_brw32::load(FILE *f) { ret->data = new uint[ret->n/W+1]; if (!ret->data) return NULL; if (fread (ret->data,sizeof(uint),ret->n/W+1,f) != ret->n/W+1) return NULL; - ret->owner = true; ret->Rs= new uint[ret->n/ret->s+1]; if (!ret->Rs) return NULL; if (fread (ret->Rs,sizeof(uint),ret->n/ret->s+1,f) != ret->n/ret->s+1) return NULL; @@ -152,15 +151,15 @@ static_bitsequence_brw32 * static_bitsequence_brw32::load(FILE *f) { } uint static_bitsequence_brw32::SpaceRequirementInBits() { - return (owner?n:0)+(n/s)*sizeof(uint)*8 +sizeof(static_bitsequence_brw32)*8; + return uint_len(n,1)*sizeof(uint)*8+(n/s)*sizeof(uint)*8 +sizeof(static_bitsequence_brw32)*8; } uint static_bitsequence_brw32::size() { - return SpaceRequirementInBits()/8; + return sizeof(static_bitsequence_brw32)+SpaceRequirementInBits()/8; } uint static_bitsequence_brw32::SpaceRequirement() { - return (owner?n:0)/8+(n/s)*sizeof(uint)+sizeof(static_bitsequence_brw32); + return n/8+(n/s)*sizeof(uint)+sizeof(static_bitsequence_brw32); } uint static_bitsequence_brw32::prev2(uint start) { @@ -293,3 +292,67 @@ uint static_bitsequence_brw32::select1(uint x) { } return left-1; } + +uint static_bitsequence_brw32::select0(uint x) { + // returns i such that x=rank_0(i) && rank_0(i-1) integers) return n; + j = data[left]; + zeros = W-popcount(j); + } + //sequential search using popcount over a char + left=left*b; + rankmid = 8-popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + rankmid = 8-popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + rankmid = 8-popcount8(j); + if (rankmid < x) { + j=j>>8; + x-=rankmid; + left+=8; + } + } + } + + // then sequential search bit a bit + while (x>0) { + if (j%2 == 0 ) x--; + j=j>>1; + left++; + } + left--; + if (left > n) return n; + else return left; +} diff --git a/libcds/src/static_bitsequence/static_bitsequence_brw32.h b/libcds/src/static_bitsequence/static_bitsequence_brw32.h index 50ea7b4..64fcf1b 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_brw32.h +++ b/libcds/src/static_bitsequence/static_bitsequence_brw32.h @@ -46,7 +46,7 @@ class static_bitsequence_brw32 : public static_bitsequence { private: uint *data; - bool owner; + //bool owner; uint n,integers; uint factor,b,s; uint *Rs; //superblock array @@ -64,6 +64,7 @@ public: uint prev(uint start); // gives the largest index i<=start such that IsBitSet(i)=true uint prev2(uint start); // gives the largest index i<=start such that IsBitSet(i)=true uint next(uint start); // gives the smallest index i>=start such that IsBitSet(i)=true + virtual uint select0(uint x); // gives the position of the x:th 1. virtual uint select1(uint x); // gives the position of the x:th 1. uint SpaceRequirementInBits(); uint SpaceRequirement(); @@ -75,4 +76,3 @@ public: }; #endif - diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder.h b/libcds/src/static_bitsequence/static_bitsequence_builder.h index fb471bb..877dd2c 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_builder.h +++ b/libcds/src/static_bitsequence/static_bitsequence_builder.h @@ -25,10 +25,12 @@ class static_bitsequence_builder { public: virtual ~static_bitsequence_builder() {} + /** Builds a static_bitsequence for the bitmap bitsequence of length len */ virtual static_bitsequence * build(uint * bitsequence, uint len)=0; }; #include +#include #include #endif /* _STATIC_BITSEQUENCE_BUILDER_H */ diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder_brw32.cpp b/libcds/src/static_bitsequence/static_bitsequence_builder_brw32.cpp index f982cf3..14e8057 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_builder_brw32.cpp +++ b/libcds/src/static_bitsequence/static_bitsequence_builder_brw32.cpp @@ -1,3 +1,23 @@ +/* static_bitsequence_builder_brw32.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_brw32 definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ #include diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder_brw32.h b/libcds/src/static_bitsequence/static_bitsequence_builder_brw32.h index a5116e3..a2f9308 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_builder_brw32.h +++ b/libcds/src/static_bitsequence/static_bitsequence_builder_brw32.h @@ -1,7 +1,7 @@ /* static_bitsequence_builder_brw32.h * Copyright (C) 2008, Francisco Claude, all rights reserved. * - * static_bitsequence_builder definition + * static_bitsequence_builder_brw32 definition * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -28,6 +28,7 @@ class static_bitsequence_builder_brw32 : public static_bitsequence_builder { public: + /** Defines the sample rate used to build the bitmaps (brw32) */ static_bitsequence_builder_brw32(uint sampling); virtual ~static_bitsequence_builder_brw32() {} virtual static_bitsequence * build(uint * bitsequence, uint len); diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02.cpp b/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02.cpp index 32e8ad6..36ddc31 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02.cpp +++ b/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02.cpp @@ -1,3 +1,23 @@ +/* static_bitsequence_builder_rrr02.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * static_bitsequence_builder_rrr02 definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ #include diff --git a/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02.h b/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02.h index e858221..1ebf5a1 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02.h +++ b/libcds/src/static_bitsequence/static_bitsequence_builder_rrr02.h @@ -1,7 +1,7 @@ /* static_bitsequence_builder_rrr02.h * Copyright (C) 2008, Francisco Claude, all rights reserved. * - * static_bitsequence_builder definition + * static_bitsequence_builder_rrr02 definition * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public @@ -28,6 +28,7 @@ class static_bitsequence_builder_rrr02 : public static_bitsequence_builder { public: + /** Defines the sample rate used to build the bitmaps (rrr02) */ static_bitsequence_builder_rrr02(uint sampling); virtual ~static_bitsequence_builder_rrr02() {} virtual static_bitsequence * build(uint * bitsequence, uint len); diff --git a/libcds/src/static_bitsequence/static_bitsequence_naive.cpp b/libcds/src/static_bitsequence/static_bitsequence_naive.cpp index ba3c150..d9d0a0c 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_naive.cpp +++ b/libcds/src/static_bitsequence/static_bitsequence_naive.cpp @@ -68,4 +68,3 @@ uint static_bitsequence_naive::size() { } int static_bitsequence_naive::save(FILE * fp) { return -1; } - diff --git a/libcds/src/static_bitsequence/static_bitsequence_rrr02.cpp b/libcds/src/static_bitsequence/static_bitsequence_rrr02.cpp index 404d905..aee722e 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_rrr02.cpp +++ b/libcds/src/static_bitsequence/static_bitsequence_rrr02.cpp @@ -19,7 +19,7 @@ * */ -#include "static_bitsequence_rrr02.h" +#include table_offset * static_bitsequence_rrr02::E = NULL; @@ -346,4 +346,3 @@ static_bitsequence_rrr02 * static_bitsequence_rrr02::load(FILE * fp) { ret->create_sampling(ret->sample_rate); return ret; } - diff --git a/libcds/src/static_bitsequence/static_bitsequence_rrr02.h b/libcds/src/static_bitsequence/static_bitsequence_rrr02.h index 1d343f5..3327672 100644 --- a/libcds/src/static_bitsequence/static_bitsequence_rrr02.h +++ b/libcds/src/static_bitsequence/static_bitsequence_rrr02.h @@ -106,5 +106,3 @@ protected: }; #endif /* _STATIC_BITSEQUENCE_RRR02_H */ - - diff --git a/libcds/src/static_bitsequence/table_offset.cpp b/libcds/src/static_bitsequence/table_offset.cpp index 434009a..e2350ed 100644 --- a/libcds/src/static_bitsequence/table_offset.cpp +++ b/libcds/src/static_bitsequence/table_offset.cpp @@ -121,5 +121,3 @@ uint generaClase(ushort * bch, uint u, uint clase, uint puestos, uint pos_ini, u } return ret; } - - diff --git a/libcds/src/static_bitsequence/table_offset.h b/libcds/src/static_bitsequence/table_offset.h index f24c22e..8f7bdeb 100644 --- a/libcds/src/static_bitsequence/table_offset.h +++ b/libcds/src/static_bitsequence/table_offset.h @@ -41,6 +41,7 @@ using namespace std; class table_offset { public: + /** builds a universal table, designed for u<=15 */ table_offset(uint u); ~table_offset(); @@ -102,4 +103,3 @@ protected: }; #endif - diff --git a/libcds/src/static_sequence/static_sequence.cpp b/libcds/src/static_sequence/static_sequence.cpp index 0ba5c2a..e97a112 100644 --- a/libcds/src/static_sequence/static_sequence.cpp +++ b/libcds/src/static_sequence/static_sequence.cpp @@ -25,12 +25,19 @@ static_sequence::static_sequence() {} static_sequence::~static_sequence() {} uint static_sequence::length() { return len; } +uint static_sequence::count(uint s) { + return rank(s,len-1); +} + static_sequence * static_sequence::load(FILE * fp) { uint rd; if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; fseek(fp,-sizeof(uint),SEEK_CUR); switch(rd) { case WVTREE_HDR: return static_sequence_wvtree::load(fp); + case GMR_CHUNK_HDR: return static_sequence_gmr_chunk::load(fp); + case GMR_HDR: return static_sequence_gmr::load(fp); + case WVTREE_NOPTRS_HDR: return static_sequence_wvtree_noptrs::load(fp); } return NULL; } diff --git a/libcds/src/static_sequence/static_sequence.h b/libcds/src/static_sequence/static_sequence.h index 0baf721..cf8a1e0 100644 --- a/libcds/src/static_sequence/static_sequence.h +++ b/libcds/src/static_sequence/static_sequence.h @@ -27,6 +27,9 @@ #include #define WVTREE_HDR 2 +#define GMR_CHUNK_HDR 3 +#define GMR_HDR 4 +#define WVTREE_NOPTRS_HDR 5 using namespace std; @@ -55,7 +58,7 @@ public: virtual uint length(); /** Returns how many cs are in the sequence */ - virtual uint count(uint c)=0; + virtual uint count(uint c); /** Returns the size of the structure in bytes */ virtual uint size()=0; @@ -73,5 +76,8 @@ protected: }; #include +#include +#include +#include #endif /* _STATIC_SEQUENCE_H */ diff --git a/libcds/src/static_sequence/static_sequence_gmr_chunk.cpp b/libcds/src/static_sequence/static_sequence_gmr_chunk.cpp index 6705669..d575e99 100644 --- a/libcds/src/static_sequence/static_sequence_gmr_chunk.cpp +++ b/libcds/src/static_sequence/static_sequence_gmr_chunk.cpp @@ -1,21 +1,41 @@ - +/* static_sequence_gmr_chunk.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * gmr_chunk + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + #include "static_sequence_gmr_chunk.h" static_sequence_gmr_chunk::static_sequence_gmr_chunk(uint * sequence, uint chunk_length, static_bitsequence_builder *bmb, static_permutation_builder *pmb) { sigma = 0; for(uint i=0;iX = new BitRankW32Int(X_bitmap, X_pos, true,20); + //cout << "pi_blen=" << pi_blen << endl; + this->X = bmb->build(X_bitmap,X_pos); //new BitRankW32Int(X_bitmap, X_pos, true,20); assert(X!=NULL); - this->permutation = createPerm(pi, chunk_length, t); + delete [] X_bitmap; + //cout << "a" << endl; cout.flush(); + this->permutation = pmb->build(pi,chunk_length); //createPerm(pi, chunk_length, t); + //cout << "a" << endl; cout.flush(); assert(permutation!=NULL); this->sigma = sigma; - this->chunk_length = chunk_length; + this->len = chunk_length; delete [] counter; } +static_sequence_gmr_chunk::static_sequence_gmr_chunk() { +} static_sequence_gmr_chunk::~static_sequence_gmr_chunk() { delete X; @@ -47,41 +73,77 @@ static_sequence_gmr_chunk::~static_sequence_gmr_chunk() { } -uint static_sequence_gmr_chunk::caccess(uint j) { - uint invPerm = inversePerm(permutation, j); +uint static_sequence_gmr_chunk::access(uint j) { + uint invPerm = permutation->rev_pi(j); //inversePerm(permutation, j); + //cout << "invPerm=" << invPerm << endl; uint rank_pos = X->select1(invPerm+1); - uint ret = rank_pos - X->rank(rank_pos);// - 1; + //cout << "rank_pos=" << rank_pos << endl; + uint ret = rank_pos - X->rank1(rank_pos);// - 1; + //cout << "ret = " << ret << endl; return ret; } -uint static_sequence_gmr_chunk::cselect(uint i, uint j) { +uint static_sequence_gmr_chunk::select(uint i, uint j) { uint pos = X->select0(i+1) + j - i -1; - return getelemPerm(permutation, pos); + /*cout << "pos=" << pos << endl; + cout << "pos'=" << X->rank1(X->select0(i+1)+j) << endl; + cout << "perm_pos=" << permutation->pi(pos) << endl;*/ + return permutation->pi(pos); //getelemPerm(permutation, pos); } -uint static_sequence_gmr_chunk::crank(uint i, uint j) { +uint static_sequence_gmr_chunk::rank(uint i, uint j) { uint ini = X->select0(i+1)-i; uint ini_o = ini; uint fin = X->select0(i+2); if(fin j) return 0; - if(getelemPerm(permutation,ini) == j) return 1; + if(permutation->pi(ini) > j) return 0; + if(permutation->pi(ini) == j) return 1; if(ini==fin) return 1; while(ini < fin-1) { uint med = (ini+fin)/2; - uint elem = getelemPerm(permutation, med); + uint elem = permutation->pi(med); //getelemPerm(permutation, med); if(elem >= j) fin = med; else ini = med; } - while(fin>ini_o && getelemPerm(permutation, fin)>j) fin--; + while(fin>ini_o && permutation->pi(fin)>j) fin--; return fin-ini_o+1; } uint static_sequence_gmr_chunk::size() { - return sizeof(BitRankW32Int*)+sizeof(perm*)+(X->SpaceRequirementInBits()/8+sizeofPerm(permutation)); + return sizeof(static_sequence_gmr_chunk)+permutation->size()+X->size(); +} + +uint static_sequence_gmr_chunk::save(FILE *fp) { + uint wr = GMR_CHUNK_HDR; + wr = fwrite(&wr,sizeof(uint),1,fp); + wr += fwrite(&len,sizeof(uint),1,fp); + wr += fwrite(&sigma,sizeof(uint),1,fp); + if(wr!=3) return 1; + if(X->save(fp)) return 1; + if(permutation->save(fp)) return 1; + return 0; +} + +static_sequence_gmr_chunk * static_sequence_gmr_chunk::load(FILE *fp) { + uint rd; + if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; + if(rd!=GMR_CHUNK_HDR) return NULL; + static_sequence_gmr_chunk * ret = new static_sequence_gmr_chunk(); + rd = fread(&ret->len,sizeof(uint),1,fp); + rd += fread(&ret->sigma,sizeof(uint),1,fp); + ret->X = static_bitsequence::load(fp); + ret->permutation = static_permutation::load(fp); + if(rd!=2 || ret->X==NULL || ret->permutation==NULL) { + /*cout << "rd=" << rd << endl; + cout << "X =" << ret->X << endl; + cout << "P =" << ret->permutation << endl;*/ + delete ret; + return NULL; + } + return ret; } diff --git a/libcds/src/static_sequence/static_sequence_gmr_chunk.h b/libcds/src/static_sequence/static_sequence_gmr_chunk.h index a06b635..9c034f5 100644 --- a/libcds/src/static_sequence/static_sequence_gmr_chunk.h +++ b/libcds/src/static_sequence/static_sequence_gmr_chunk.h @@ -1,11 +1,33 @@ +/* static_sequence_gmr_chunk.h + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * gmr_chunk + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ #ifndef _STATIC_SEQUENCE_GMR_CHUNK_H #define _STATIC_SEQUENCE_GMR_CHUNK_H #include +#include #include #include #include +#include #include #include @@ -33,16 +55,17 @@ class static_sequence_gmr_chunk: public static_sequence { virtual uint rank(uint i, uint j); virtual uint size(); virtual uint save(FILE *fp); - static_sequence_gmr_chunk * load(FILE *fp); + static static_sequence_gmr_chunk * load(FILE *fp); protected: /** Bitmap */ static_bitsequence * X; /** Permutation */ - static_permutation permutation; + static_permutation * permutation; /** Size of the alphabet */ uint sigma; /** Length of the chunk */ - uint chunk_length; + //uint chunk_length; + static_sequence_gmr_chunk(); }; #endif diff --git a/libcds/src/static_sequence/static_sequence_wvtree.cpp b/libcds/src/static_sequence/static_sequence_wvtree.cpp index b1353d4..ea6a813 100644 --- a/libcds/src/static_sequence/static_sequence_wvtree.cpp +++ b/libcds/src/static_sequence/static_sequence_wvtree.cpp @@ -1,4 +1,4 @@ -/* static_sequence_wvtree.h +/* static_sequence_wvtree.cpp * Copyright (C) 2008, Francisco Claude, all rights reserved. * * static_sequence_wvtree definition @@ -25,7 +25,9 @@ static_sequence_wvtree::static_sequence_wvtree(uint * symbols, uint n, wt_coder for(uint i=0;imap(symbols[i]); this->am = am; + am->use(); this->c=c; + c->use(); root = new wt_node_internal(symbols, n, 0, c, bmb); for(uint i=0;iunmap(symbols[i]); @@ -35,8 +37,8 @@ static_sequence_wvtree::static_sequence_wvtree() {} static_sequence_wvtree::~static_sequence_wvtree() { delete root; - delete am; - delete c; + am->unuse(); + c->unuse(); } uint static_sequence_wvtree::rank(uint symbol, uint pos) { @@ -81,7 +83,9 @@ static_sequence_wvtree * static_sequence_wvtree::load(FILE *fp) { static_sequence_wvtree * ret = new static_sequence_wvtree(); if(fread(&ret->n,sizeof(uint),1,fp)!=1) return NULL; ret->c = wt_coder::load(fp); + ret->c->use(); ret->am = alphabet_mapper::load(fp); + ret->am->use(); ret->root = wt_node::load(fp); return ret; } diff --git a/libcds/src/static_sequence/wt_coder.cpp b/libcds/src/static_sequence/wt_coder.cpp index 147e939..4735ee5 100644 --- a/libcds/src/static_sequence/wt_coder.cpp +++ b/libcds/src/static_sequence/wt_coder.cpp @@ -21,6 +21,19 @@ #include +wt_coder::wt_coder() { + user_count=0; +} + +void wt_coder::use() { + user_count++; +} + +void wt_coder::unuse() { + user_count--; + if(user_count==0) delete this; +} + wt_coder * wt_coder::load(FILE *fp) { uint rd; if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; diff --git a/libcds/src/static_sequence/wt_coder.h b/libcds/src/static_sequence/wt_coder.h index 1a55397..3d22b21 100644 --- a/libcds/src/static_sequence/wt_coder.h +++ b/libcds/src/static_sequence/wt_coder.h @@ -36,6 +36,9 @@ using namespace std; */ class wt_coder { public: + wt_coder(); + virtual void use(); + virtual void unuse(); virtual ~wt_coder() {}; /** Tells if at level l the symbol is represented by a one or a zero */ virtual bool is_set(uint symbol, uint l)=0; @@ -47,6 +50,8 @@ class wt_coder { virtual uint save(FILE *fp)=0; /** Loads a coder from a file, returns NULL in case of error */ static wt_coder * load(FILE *fp); + protected: + uint user_count; }; #include diff --git a/libcds/src/static_sequence/wt_coder_binary.cpp b/libcds/src/static_sequence/wt_coder_binary.cpp index cdbac13..f8cc209 100644 --- a/libcds/src/static_sequence/wt_coder_binary.cpp +++ b/libcds/src/static_sequence/wt_coder_binary.cpp @@ -38,7 +38,7 @@ bool wt_coder_binary::is_set(uint symbol, uint l) { } bool wt_coder_binary::done(uint symbol, uint l) { - if(l==h-1) return true; + if(l==h) return true; return false; } diff --git a/libcds/src/static_sequence/wt_node_internal.cpp b/libcds/src/static_sequence/wt_node_internal.cpp index ce5de07..b4727a3 100644 --- a/libcds/src/static_sequence/wt_node_internal.cpp +++ b/libcds/src/static_sequence/wt_node_internal.cpp @@ -51,7 +51,7 @@ wt_node_internal::wt_node_internal(uint * symbols, uint n, uint l, wt_coder * c, } } if(count_left>0) { - if(match_left) + if(match_left/* && c->done(left[0],l+1)*/) left_child = new wt_node_leaf(left[0], count_left); else left_child = new wt_node_internal(left, count_left, l+1, c, bmb); @@ -59,7 +59,7 @@ wt_node_internal::wt_node_internal(uint * symbols, uint n, uint l, wt_coder * c, left_child = NULL; } if(count_right>0) { - if(match_right) + if(match_right/* && c->done(right[0],l+1)*/) right_child = new wt_node_leaf(right[0], count_right); else right_child = new wt_node_internal(right, count_right, l+1, c, bmb); diff --git a/libcds/src/static_sequence/wt_node_leaf.cpp b/libcds/src/static_sequence/wt_node_leaf.cpp index e3b7c19..d99ea62 100644 --- a/libcds/src/static_sequence/wt_node_leaf.cpp +++ b/libcds/src/static_sequence/wt_node_leaf.cpp @@ -31,20 +31,18 @@ wt_node_leaf::wt_node_leaf() {} wt_node_leaf::~wt_node_leaf() {} uint wt_node_leaf::rank(uint symbol, uint pos, uint l, wt_coder * c) { - assert(symbol==this->symbol); + if(symbol!=this->symbol) return 0; pos++; - assert(pos<=count); return pos; } uint wt_node_leaf::select(uint symbol, uint pos, uint l, wt_coder * c) { - assert(symbol==this->symbol); - assert(pos<=count && pos>0); + if(symbol!=this->symbol) return (uint)-1; + if(pos==0 || pos>count) return (uint)-1; return pos; } uint wt_node_leaf::access(uint pos) { - assert(pos +alphabet_mapper::alphabet_mapper() { + user_count=0; +} + +void alphabet_mapper::use() { + user_count++; +} + +void alphabet_mapper::unuse() { + user_count--; + if(user_count==0) + delete this; +} + alphabet_mapper * alphabet_mapper::load(FILE *fp) { uint rd; if(fread(&rd,sizeof(uint),1,fp)!=1) return NULL; fseek(fp,-1*sizeof(uint),SEEK_CUR); switch(rd) { case ALPHABET_MAPPER_NONE_HDR: return alphabet_mapper_none::load(fp); + case ALPHABET_MAPPER_CONT_HDR: return alphabet_mapper_cont::load(fp); } return NULL; } diff --git a/libcds/src/utils/alphabet_mapper.h b/libcds/src/utils/alphabet_mapper.h index 125e0c6..faef2ff 100644 --- a/libcds/src/utils/alphabet_mapper.h +++ b/libcds/src/utils/alphabet_mapper.h @@ -26,6 +26,7 @@ #include #define ALPHABET_MAPPER_NONE_HDR 2 +#define ALPHABET_MAPPER_CONT_HDR 3 using namespace std; @@ -35,14 +36,25 @@ using namespace std; */ class alphabet_mapper { public: + alphabet_mapper(); virtual ~alphabet_mapper() {} + /** Maps the symbol */ virtual uint map(uint s)=0; + /** Unmaps the symbol */ virtual uint unmap(uint s)=0; + /** Returns the size of the mapper */ virtual uint size()=0; + /** Saves the mapper to a file */ virtual uint save(FILE *fp)=0; + /** Loads the mapper from a file */ static alphabet_mapper * load(FILE * fp); + virtual void use(); + virtual void unuse(); + protected: + uint user_count; }; #include +#include #endif /* _ALPHABET_MAPPER_H */ diff --git a/libcds/src/utils/alphabet_mapper_none.cpp b/libcds/src/utils/alphabet_mapper_none.cpp index 9bb4bbe..1e545f0 100644 --- a/libcds/src/utils/alphabet_mapper_none.cpp +++ b/libcds/src/utils/alphabet_mapper_none.cpp @@ -1,4 +1,24 @@ - +/* alphabet_mapper_none.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * alphabet_mapper definition + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ + #include alphabet_mapper_none::alphabet_mapper_none() { } @@ -22,4 +42,3 @@ alphabet_mapper_none * alphabet_mapper_none::load(FILE * fp) { if(rd!=ALPHABET_MAPPER_NONE_HDR) return NULL; return new alphabet_mapper_none(); } - diff --git a/libcds/tests/Makefile b/libcds/tests/Makefile index e1dfb7f..f91201f 100644 --- a/libcds/tests/Makefile +++ b/libcds/tests/Makefile @@ -1,34 +1,47 @@ CPP=g++ #CPPFLAGS=-g3 -Wall -I../includes/ -CPPFLAGS=-O9 -Wall -DNDEBUG -I../includes/ +CPPFLAGS=-O9 -w -DNDEBUG -I../includes/ + +OBJECTS=make_bitmap.o static_bitsequence_tester.o static_sequence_tester.o static_sequence_wvtree_test.o static_sequence_gmr_test.o static_sequence_gmr_chunk_test.o static_sequence_wvtree_noptrs_test.o static_bitsequence_test.o text_to_int.o +BIN=make_bitmap static_sequence_wvtree_test static_sequence_gmr_test static_sequence_gmr_chunk_test static_sequence_wvtree_noptrs_test static_bitsequence_test text_to_int -OBJECTS=test_naive.o test_rrr02.o test_brw32.o make_bitmap.o test_wvtree01.o test_wvtree02.o -BIN=test_naive test_rrr02 test_brw32 make_bitmap test_wvtree01 test_wvtree02 LIB=../lib/libcds.a %.o: %.cpp - $(CPP) $(CPPFLAGS) -c $< -o $@ + @echo " [C++] Compiling $<" + @$(CPP) $(CPPFLAGS) -c $< -o $@ all: $(OBJECTS) $(BIN) -test_naive: - $(CPP) $(CPPFLAGS) -o test_naive test_naive.o $(LIB) +static_bitsequence_test: + @echo " [C++] Building static_bitsequence_test" + @$(CPP) $(CPPFLAGS) -o static_bitsequence_test static_bitsequence_test.o static_bitsequence_tester.o $(LIB) + +make_bitmap: + @echo " [C++] Building make_bitmap" + @$(CPP) $(CPPFLAGS) -o make_bitmap make_bitmap.o $(LIB) -test_rrr02: - $(CPP) $(CPPFLAGS) -o test_rrr02 test_rrr02.o $(LIB) +text_to_int: + @echo " [C++] Building text_to_int" + @$(CPP) $(CPPFLAGS) -o text_to_int text_to_int.o $(LIB) -test_brw32: - $(CPP) $(CPPFLAGS) -o test_brw32 test_brw32.o $(LIB) +static_sequence_wvtree_test: + @echo " [C++] Building static_sequence_wvtree_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_wvtree_test static_sequence_wvtree_test.o static_sequence_tester.o $(LIB) -make_bitmap: - $(CPP) $(CPPFLAGS) -o make_bitmap make_bitmap.o $(LIB) +static_sequence_gmr_test: + @echo " [C++] Building static_sequence_gmr_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_gmr_test static_sequence_gmr_test.o static_sequence_tester.o $(LIB) -test_wvtree01: - $(CPP) $(CPPFLAGS) -o test_wvtree01 test_wvtree01.o $(LIB) +static_sequence_wvtree_noptrs_test: + @echo " [C++] Building static_sequence_wvtree_noptrs_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_wvtree_noptrs_test static_sequence_wvtree_noptrs_test.o static_sequence_tester.o $(LIB) -test_wvtree02: - $(CPP) $(CPPFLAGS) -o test_wvtree02 test_wvtree02.o $(LIB) +static_sequence_gmr_chunk_test: + @echo " [C++] Building static_sequence_gmr_chunk_test" + @$(CPP) $(CPPFLAGS) -o static_sequence_gmr_chunk_test static_sequence_gmr_chunk_test.o static_sequence_tester.o $(LIB) clean: - rm -f $(OBJECTS) $(BIN) + @echo " [CLN] Cleaning object files" + @rm -f $(OBJECTS) $(BIN) diff --git a/libcds/tests/make_bitmap.cpp b/libcds/tests/make_bitmap.cpp index 0da2d93..11d2f28 100644 --- a/libcds/tests/make_bitmap.cpp +++ b/libcds/tests/make_bitmap.cpp @@ -1,3 +1,23 @@ +/* make_bitmap.cpp + * Copyright (C) 2008, Francisco Claude, all rights reserved. + * + * make_bitmap + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + * + */ #include #include diff --git a/testXML.srx b/testXML.srx index fe7f118..90550fd 100644 Binary files a/testXML.srx and b/testXML.srx differ diff --git a/test_XML.cpp b/test_XML.cpp index c13c04d..e38d325 100644 --- a/test_XML.cpp +++ b/test_XML.cpp @@ -24,7 +24,7 @@ int main() unsigned char openTag[]="A", closeTag[]="/A", filename[]="testXML", text[]="Hello World"; treeNode x; - n = 49999; + n = 99999; X = new XMLTree();