From 4bd816265ae88b4e1631634260384baa559f8967 Mon Sep 17 00:00:00 2001 From: fclaude Date: Sun, 8 Mar 2009 02:54:06 +0000 Subject: [PATCH] Adding support for building wavelet trees using uchar arrays git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@214 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- libcds/src/coders/huffman_codes.cpp | 13 +++++ libcds/src/coders/huffman_codes.h | 1 + .../static_sequence_wvtree.cpp | 12 +++++ .../static_sequence/static_sequence_wvtree.h | 2 + libcds/src/static_sequence/wt_coder_huff.cpp | 10 ++++ libcds/src/static_sequence/wt_coder_huff.h | 1 + .../src/static_sequence/wt_node_internal.cpp | 50 +++++++++++++++++++ libcds/src/static_sequence/wt_node_internal.h | 1 + 8 files changed, 90 insertions(+) diff --git a/libcds/src/coders/huffman_codes.cpp b/libcds/src/coders/huffman_codes.cpp index 8bffa69..a3ccf1a 100644 --- a/libcds/src/coders/huffman_codes.cpp +++ b/libcds/src/coders/huffman_codes.cpp @@ -34,6 +34,19 @@ huffman_codes::huffman_codes(uint * symb, uint n) { delete [] occ; } +huffman_codes::huffman_codes(uchar * symb, uint n) { + uchar max_v = 0; + for(uint i=0;iunmap(symbols[i]); } +static_sequence_wvtree::static_sequence_wvtree(uchar * symbols, uint n, wt_coder * c, static_bitsequence_builder * bmb, alphabet_mapper * am) { + for(uint i=0;imap((uint)symbols[i]); + this->am = am; + am->use(); + this->c=c; + c->use(); + root = new wt_node_internal(symbols, n, 0, c, bmb); + for(uint i=0;iunmap((uint)symbols[i]); +} + static_sequence_wvtree::static_sequence_wvtree() {} static_sequence_wvtree::~static_sequence_wvtree() { diff --git a/libcds/src/static_sequence/static_sequence_wvtree.h b/libcds/src/static_sequence/static_sequence_wvtree.h index f972648..c67d0f8 100644 --- a/libcds/src/static_sequence/static_sequence_wvtree.h +++ b/libcds/src/static_sequence/static_sequence_wvtree.h @@ -46,6 +46,8 @@ class static_sequence_wvtree : public static_sequence { * equals n */ static_sequence_wvtree(uint * symbols, uint n, wt_coder * coder, static_bitsequence_builder * bmb, alphabet_mapper * am); + static_sequence_wvtree(uchar * symbols, uint n, wt_coder * coder, static_bitsequence_builder * bmb, alphabet_mapper * am); + virtual ~static_sequence_wvtree(); virtual uint rank(uint symbol, uint pos); diff --git a/libcds/src/static_sequence/wt_coder_huff.cpp b/libcds/src/static_sequence/wt_coder_huff.cpp index cf831dc..5ba633b 100644 --- a/libcds/src/static_sequence/wt_coder_huff.cpp +++ b/libcds/src/static_sequence/wt_coder_huff.cpp @@ -31,6 +31,16 @@ wt_coder_huff::wt_coder_huff(uint * symbs, uint n, alphabet_mapper * am) { symbs[i] = am->unmap(symbs[i]); } +wt_coder_huff::wt_coder_huff(uchar * symbs, uint n, alphabet_mapper * am) { + for(uint i=0;imap((uint)symbs[i]); + hc = new huffman_codes(symbs, n); + buffer = new uint[hc->max_length()/W+1]; + s_len = 0; last_symbol = (uint)-1; + for(uint i=0;iunmap((uint)symbs[i]); +} + wt_coder_huff::wt_coder_huff() {} wt_coder_huff::~wt_coder_huff() { diff --git a/libcds/src/static_sequence/wt_coder_huff.h b/libcds/src/static_sequence/wt_coder_huff.h index 85fa2f7..1811ff0 100644 --- a/libcds/src/static_sequence/wt_coder_huff.h +++ b/libcds/src/static_sequence/wt_coder_huff.h @@ -36,6 +36,7 @@ class wt_coder_huff: public wt_coder { /** Buils a wt_coder_huff using the sequence of length n and the alphabet_mapper * to determine the huffman codes */ wt_coder_huff(uint *symbs, uint n, alphabet_mapper * am); + wt_coder_huff(uchar *symbs, uint n, alphabet_mapper * am); virtual ~wt_coder_huff(); virtual bool is_set(uint symbol, uint l); virtual bool done(uint symbol, uint l); diff --git a/libcds/src/static_sequence/wt_node_internal.cpp b/libcds/src/static_sequence/wt_node_internal.cpp index b4727a3..d2d0fa8 100644 --- a/libcds/src/static_sequence/wt_node_internal.cpp +++ b/libcds/src/static_sequence/wt_node_internal.cpp @@ -70,6 +70,56 @@ wt_node_internal::wt_node_internal(uint * symbols, uint n, uint l, wt_coder * c, delete [] right; } +wt_node_internal::wt_node_internal(uchar * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb) { + uint * ibitmap = new uint[n/W+1]; + for(uint i=0;iis_set((uint)symbols[i],l)) + bitset(ibitmap,i); + bitmap = bmb->build(ibitmap, n); + delete [] ibitmap; + uint count_right = bitmap->rank1(n-1); + uint count_left = n-count_right+1; + uchar * left = new uchar[count_left+1]; + uchar * right = new uchar[count_right+1]; + count_right = count_left = 0; + bool match_left = true, match_right = true; + for(uint i=0;iaccess(i)) { + right[count_right++]=symbols[i]; + if(count_right>1) + if(right[count_right-1]!=right[count_right-2]) + match_right = false; + } + else { + left[count_left++]=symbols[i]; + if(count_left>1) + if(left[count_left-1]!=left[count_left-2]) + match_left = false; + } + } + if(count_left>0) { + if(match_left/* && c->done(left[0],l+1)*/) + left_child = new wt_node_leaf((uint)left[0], count_left); + else + left_child = new wt_node_internal(left, count_left, l+1, c, bmb); + } else { + left_child = NULL; + } + if(count_right>0) { + if(match_right/* && c->done(right[0],l+1)*/) + right_child = new wt_node_leaf((uint)right[0], count_right); + else + right_child = new wt_node_internal(right, count_right, l+1, c, bmb); + } else { + right_child = NULL; + } + delete [] left; + delete [] right; +} + + wt_node_internal::wt_node_internal() { } wt_node_internal::~wt_node_internal() { diff --git a/libcds/src/static_sequence/wt_node_internal.h b/libcds/src/static_sequence/wt_node_internal.h index 7d9dad6..b39efbd 100644 --- a/libcds/src/static_sequence/wt_node_internal.h +++ b/libcds/src/static_sequence/wt_node_internal.h @@ -37,6 +37,7 @@ class wt_node_internal: public wt_node { public: wt_node_internal(uint * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb); + wt_node_internal(uchar * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb); virtual ~wt_node_internal(); virtual uint rank(uint symbol, uint pos, uint level, wt_coder * c); virtual uint select(uint symbol, uint pos, uint level, wt_coder * c); -- 2.17.1