Adding support for building wavelet trees using uchar arrays
authorfclaude <fclaude@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Sun, 8 Mar 2009 02:54:06 +0000 (02:54 +0000)
committerfclaude <fclaude@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Sun, 8 Mar 2009 02:54:06 +0000 (02:54 +0000)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@214 3cdefd35-fc62-479d-8e8d-bae585ffb9ca

libcds/src/coders/huffman_codes.cpp
libcds/src/coders/huffman_codes.h
libcds/src/static_sequence/static_sequence_wvtree.cpp
libcds/src/static_sequence/static_sequence_wvtree.h
libcds/src/static_sequence/wt_coder_huff.cpp
libcds/src/static_sequence/wt_coder_huff.h
libcds/src/static_sequence/wt_node_internal.cpp
libcds/src/static_sequence/wt_node_internal.h

index 8bffa69..a3ccf1a 100644 (file)
@@ -34,6 +34,19 @@ huffman_codes::huffman_codes(uint * symb, uint n) {
   delete [] occ;
 }
 
+huffman_codes::huffman_codes(uchar * symb, uint n) {
+  uchar max_v = 0;
+  for(uint i=0;i<n;i++)
+    max_v = max(max_v,symb[i]);
+  uint * occ = new uint[max_v+1];
+  for(uint i=0;i<max_v+1;i++)
+    occ[i] = 0;
+  for(uint i=0;i<n;i++)
+    occ[symb[i]]++;
+  huff_table = createHuff(occ, max_v);
+  delete [] occ;
+}
+
 huffman_codes::huffman_codes() {
 }
 
index a82c8dd..7538225 100644 (file)
@@ -34,6 +34,7 @@ class huffman_codes {
   public:
     /** Creates the codes for the sequence seq of length n */
     huffman_codes(uint * seq, uint n);
+    huffman_codes(uchar * seq, uint n);
     ~huffman_codes();
     
     /** Encodes symb into stream at bit-position pos, return the ending position (bits) */
index ea6a813..aa6a885 100644 (file)
@@ -33,6 +33,18 @@ static_sequence_wvtree::static_sequence_wvtree(uint * symbols, uint n, wt_coder
     symbols[i] = am->unmap(symbols[i]);  
 }
 
+static_sequence_wvtree::static_sequence_wvtree(uchar * symbols, uint n, wt_coder * c, static_bitsequence_builder * bmb, alphabet_mapper * am) {
+  for(uint i=0;i<n;i++) 
+    symbols[i] = (uchar)am->map((uint)symbols[i]);
+  this->am = am;
+       am->use();
+  this->c=c;
+       c->use();
+       root = new wt_node_internal(symbols, n, 0, c, bmb);
+  for(uint i=0;i<n;i++) 
+    symbols[i] = (uchar)am->unmap((uint)symbols[i]);  
+}
+
 static_sequence_wvtree::static_sequence_wvtree() {}
 
 static_sequence_wvtree::~static_sequence_wvtree() {
index f972648..c67d0f8 100644 (file)
@@ -46,6 +46,8 @@ class static_sequence_wvtree : public static_sequence {
      * equals n */
     static_sequence_wvtree(uint * symbols, uint n, wt_coder * coder, static_bitsequence_builder * bmb, alphabet_mapper * am);
 
+    static_sequence_wvtree(uchar * symbols, uint n, wt_coder * coder, static_bitsequence_builder * bmb, alphabet_mapper * am);
+
     virtual ~static_sequence_wvtree();
 
     virtual uint rank(uint symbol, uint pos);
index cf831dc..5ba633b 100644 (file)
@@ -31,6 +31,16 @@ wt_coder_huff::wt_coder_huff(uint * symbs, uint n, alphabet_mapper * am) {
     symbs[i] = am->unmap(symbs[i]);
 }
 
+wt_coder_huff::wt_coder_huff(uchar * symbs, uint n, alphabet_mapper * am) {
+  for(uint i=0;i<n;i++)
+    symbs[i] = (uchar)am->map((uint)symbs[i]);
+       hc = new huffman_codes(symbs, n);
+  buffer = new uint[hc->max_length()/W+1]; 
+  s_len = 0; last_symbol = (uint)-1;
+  for(uint i=0;i<n;i++)
+    symbs[i] = (uchar)am->unmap((uint)symbs[i]);
+}
+
 wt_coder_huff::wt_coder_huff() {}
 
 wt_coder_huff::~wt_coder_huff() {
index 85fa2f7..1811ff0 100644 (file)
@@ -36,6 +36,7 @@ class wt_coder_huff: public wt_coder {
     /** Buils a wt_coder_huff using the sequence of length n and the alphabet_mapper
      *  to determine the huffman codes */
                wt_coder_huff(uint *symbs, uint n, alphabet_mapper * am);
+               wt_coder_huff(uchar *symbs, uint n, alphabet_mapper * am);
                virtual ~wt_coder_huff();
                virtual bool is_set(uint symbol, uint l);
                virtual bool done(uint symbol, uint l);
index b4727a3..d2d0fa8 100644 (file)
@@ -70,6 +70,56 @@ wt_node_internal::wt_node_internal(uint * symbols, uint n, uint l, wt_coder * c,
        delete [] right;
 }
 
+wt_node_internal::wt_node_internal(uchar * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb) {
+       uint * ibitmap = new uint[n/W+1];
+       for(uint i=0;i<n/W+1;i++)
+               ibitmap[i]=0;
+       for(uint i=0;i<n;i++) 
+               if(c->is_set((uint)symbols[i],l))
+                       bitset(ibitmap,i);
+       bitmap = bmb->build(ibitmap, n);
+  delete [] ibitmap;
+       uint count_right = bitmap->rank1(n-1);
+       uint count_left = n-count_right+1;
+       uchar * left = new uchar[count_left+1];
+       uchar * right = new uchar[count_right+1];
+       count_right = count_left = 0;
+       bool match_left = true, match_right = true;
+       for(uint i=0;i<n;i++) {
+               if(bitmap->access(i)) {
+                       right[count_right++]=symbols[i];
+                       if(count_right>1)
+                               if(right[count_right-1]!=right[count_right-2])
+                                       match_right = false;
+               }
+               else {
+                       left[count_left++]=symbols[i];
+                       if(count_left>1)
+                               if(left[count_left-1]!=left[count_left-2])
+                                       match_left = false;
+               }
+       }
+       if(count_left>0) {
+               if(match_left/* && c->done(left[0],l+1)*/)
+                       left_child = new wt_node_leaf((uint)left[0], count_left);
+               else
+                       left_child = new wt_node_internal(left, count_left, l+1, c, bmb);
+       } else {
+               left_child = NULL;
+       }
+       if(count_right>0) {
+               if(match_right/* && c->done(right[0],l+1)*/)
+                       right_child = new wt_node_leaf((uint)right[0], count_right);
+               else
+                       right_child = new wt_node_internal(right, count_right, l+1, c, bmb);
+       } else {
+               right_child = NULL;
+       }
+       delete [] left;
+       delete [] right;
+}
+
+
 wt_node_internal::wt_node_internal() { }
 
 wt_node_internal::~wt_node_internal() {
index 7d9dad6..b39efbd 100644 (file)
@@ -37,6 +37,7 @@
 class wt_node_internal: public wt_node {
        public:
                wt_node_internal(uint * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb);
+               wt_node_internal(uchar * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb);
                virtual ~wt_node_internal();
                virtual uint rank(uint symbol, uint pos, uint level, wt_coder * c);
                virtual uint select(uint symbol, uint pos, uint level, wt_coder * c);