Construction space for WT
authornvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Tue, 7 Apr 2009 08:55:31 +0000 (08:55 +0000)
committernvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Tue, 7 Apr 2009 08:55:31 +0000 (08:55 +0000)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@306 3cdefd35-fc62-479d-8e8d-bae585ffb9ca

libcds/src/static_sequence/static_sequence_wvtree.cpp
libcds/src/static_sequence/wt_node_internal.cpp
libcds/src/static_sequence/wt_node_internal.h

index ef504bd..0e08676 100644 (file)
@@ -40,7 +40,12 @@ static_sequence_wvtree::static_sequence_wvtree(uchar * symbols, uint n, wt_coder
        am->use();
   this->c=c;
        c->use();
-       root = new wt_node_internal(symbols, n, 0, c, bmb); 
+        uint *done = new uint[n/W+1];
+        for (uint i = 0; i < n/W+1; i++)
+            done[i] = 0;
+       root = new wt_node_internal(symbols, n, 0, c, bmb, 0, done);
+        delete [] done;
+        delete [] symbols;
         symbols = 0; // Already deleted!
 //  for(uint i=0;i<n;i++) 
 //    symbols[i] = (uchar)am->unmap((uint)symbols[i]);  
index d39ea4b..2219e9b 100644 (file)
@@ -138,6 +138,95 @@ wt_node_internal::wt_node_internal(uchar * symbols, uint n, uint l, wt_coder * c
        delete [] right;
 }
 
+wt_node_internal::wt_node_internal(uchar * symbols, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb, uint left, uint *done) {
+       uint * ibitmap = new uint[n/W+1];
+       for(uint i=0;i<n/W+1;i++)
+               ibitmap[i]=0;
+       for(uint i=0;i<n;i++) 
+               if(c->is_set((uint)symbols[i + left],l))
+                       bitset(ibitmap,i);
+       bitmap = bmb->build(ibitmap, n);
+        delete [] ibitmap;
+
+       uint count_right = bitmap->rank1(n-1);
+       uint count_left = n-count_right;
+/*     uchar * leftarr = new uchar[count_left+1];
+       uchar * rightarr = new uchar[count_right+1];
+       count_right = count_left = 0;
+       for(uint i=0;i<n;i++) {
+               if(bitmap->access(i)) {
+                       rightarr[count_right++]=symbols[i+left];
+               }
+               else {
+                       leftarr[count_left++]=symbols[i+left];
+               }
+                }
+*/
+
+        for (uint i=0;i<n;i++)
+            set_field(done, 1, i+left, 0);
+
+        for (uint i = 0; i < n; ) 
+        {
+            uint j = i;
+            uchar swap = symbols[j+left];
+            while (!get_field(done, 1, j+left)) { // swapping
+                ulong k = j; 
+                if (!c->is_set(swap,l)) 
+                    j = bitmap->rank0(k)-1;
+                else 
+                    j = count_left + bitmap->rank1(k)-1;
+                uchar temp = symbols[j+left];
+                symbols[j+left] = swap;
+                swap = temp;
+                set_field(done,1,k+left,1);
+            }
+
+            while (get_field(done,1,i+left))
+                   ++i;
+        }
+
+        // checking
+        /*       for (uint i=0;i<n;i++)
+            if (!bitget(done,i+left)) 
+                std::cout << "not swapped: " << i << "\n";
+               for (uint i=0;i<count_left;i++)
+            if (leftarr[i] != symbols[i+left]) //c->is_set(symbols[i+left], l)) 
+            {    
+                std::cout << symbols[i+left] << " != " << leftarr[i] << " lev = " << l << "\n";
+                exit(0);
+            }
+        for (uint i=count_left;i<n;i++)
+            if (rightarr[i-count_left] != symbols[i+left]) //!c->is_set(symbols[i+left],l)) 
+                std::cout << symbols[i+left] << " != " << rightarr[i-count_left] <<  " lev = " << l <<  "\n";    
+        */
+       bool match_left = true, match_right = true;
+        for (uint i=1; i < count_left; i++)
+            if (symbols[i+left] != symbols[i+left-1])
+                match_left = false;
+        for (uint i=count_left + 1; i < n; i++)
+            if (symbols[i+left] != symbols[i+left-1])
+                match_right = false;
+
+
+       if(count_left>0) {
+               if(match_left/* && c->done(left[0],l+1)*/)
+                    left_child = new wt_node_leaf((uint)symbols[left], count_left);
+               else
+                    left_child = new wt_node_internal(symbols, count_left, l+1, c, bmb, left, done);
+       } else {
+               left_child = NULL;
+       }
+       if(count_right>0) {
+               if(match_right/* && c->done(right[0],l+1)*/)
+                    right_child = new wt_node_leaf((uint)symbols[left+count_left], count_right);
+               else 
+                    right_child = new wt_node_internal(symbols, count_right, l+1, c, bmb, left+count_left, done);
+       } else {
+               right_child = NULL;
+       }
+}
+
 
 wt_node_internal::wt_node_internal() { }
 
index 0b1bbc3..f4bb3f1 100644 (file)
@@ -38,6 +38,7 @@ class wt_node_internal: public wt_node {
        public:
                wt_node_internal(uint * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb);
                wt_node_internal(uchar * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb);
+               wt_node_internal(uchar * seq, uint n, uint l, wt_coder * c, static_bitsequence_builder * bmb, uint, uint *);
                virtual ~wt_node_internal();
                virtual uint rank(uint symbol, uint pos, uint level, wt_coder * c);
                virtual uint rankLessThan(uint &symbol, uint pos, uint level, wt_coder * c);