Removed
authornvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Mon, 20 Apr 2009 11:35:43 +0000 (11:35 +0000)
committernvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Mon, 20 Apr 2009 11:35:43 +0000 (11:35 +0000)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@336 3cdefd35-fc62-479d-8e8d-bae585ffb9ca

RLWaveletTree.cpp [deleted file]
RLWaveletTree.h [deleted file]

diff --git a/RLWaveletTree.cpp b/RLWaveletTree.cpp
deleted file mode 100644 (file)
index 6b8d232..0000000
+++ /dev/null
@@ -1,384 +0,0 @@
-/***************************************************************************
- *   Copyright (C) 2007 by Veli Mäkinen                                    *
- *                                                                         *
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- *   This program is distributed in the hope that it will be useful,       *
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
- *   GNU General Public License for more details.                          *
- *                                                                         *
- *   You should have received a copy of the GNU General Public License     *
- *   along with this program; if not, write to the                         *
- *   Free Software Foundation, Inc.,                                       *
- *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.         *
- ***************************************************************************/
-#include "RLWaveletTree.h"
-#include <stdexcept>
-using namespace std;
-
-void RLWaveletTree::remapAlphabet(uchar *text, ulong n)
-{
-    ulong *chars = new ulong[256/W+1];
-    ulong i;
-    for (i = 0; i < 256/W+1; ++i)
-        chars[i] = 0;
-    // Set up flags:
-    for (i = 0; i < n; ++i)
-        Tools::SetField(chars, 1, text[i], 1);
-    charMap = new BitRank(chars, 256, true);
-
-    // Remap text
-    for (i = 0; i < n; ++i)
-        text[i] = charMap->rank(text[i])-1;
-}
-
-RLWaveletTree::RLWaveletTree(uchar *D, ulong n) {
-    remapAlphabet(D, n);
-//    std::cout << "Number of chars: " << charMap->rank(255) << ", maxlevel = " << Tools::CeilLog2(charMap->rank(255)) << std::endl;
-
-    this->n = n;
-    this->maxlevel = Tools::CeilLog2(charMap->rank(255));
-
-    ulong i, k, value;
-    ulong** bucket = new ulong*[maxlevel+1];
-    ulong *wtree = new ulong[n*maxlevel/(8*sizeof(ulong))+1];
-    for (i = 0; i < n*maxlevel/(8*sizeof(ulong))+1; i ++)
-        wtree[i] = 0lu;
-
-   // allocate memory for storing the locations of boudaries 
-    for (k=0; k<=maxlevel; k++) {
-        bucket[k] = (ulong *)new ulong[(ulong)1 << k];
-        for (i=0; i<((ulong)1 << k); i++)
-            bucket[k][i]=0;
-    }
-
-   // compute the sizes of buckets
-    for (i=0; i<n; i++) {
-        value = D[i];
-        for (k=0; k<=maxlevel; k++)
-            bucket[k][value >> (maxlevel-k)]++;
-    }
-
-   // cumulate the bucket sizes into boundary locations 
-    for (k=0; k<=maxlevel; k++) {
-        for (i=1; i<((ulong)1 << k); i++)
-            bucket[k][i]+=bucket[k][i-1];
-        for (i=((ulong)1 << k)-1; i>=1; i--)
-            bucket[k][i]=bucket[k][i-1];
-        bucket[k][0]=0;
-    }
-
-   //store the leaf information
-    leaves = new ulong[((ulong)1 << maxlevel)+1];
-    for (k=0; k<(ulong)1<<maxlevel; k++)
-        leaves[k] = bucket[maxlevel][k];
-    leaves[k]=n;
-
-   // compute wtree using the information about boundaries 
-    for (i=0; i<n; i++) {
-        value = D[i];
-        for (k=0; k<maxlevel; k++)
-            Tools::SetField(wtree,1,k*n+(bucket[k][value >> (maxlevel-k)])++,(value>>(maxlevel-k-1)) & (ulong)1);
-    }
-    delete [] D;
-    // deallocate boundaries
-    for (k=0; k<=maxlevel; k++)
-        delete[] bucket[k];
-    delete[] bucket;
-   
-    bitrank = new GapEncode(wtree, n*maxlevel, true);
-    prevRunLen = 0;
-    prevChar = 0;
-    prevPos = 0;
-}
-
-RLWaveletTree::~RLWaveletTree() {
-    delete charMap;
-    delete [] leaves;
-    delete bitrank;
-}
-
-ulong RLWaveletTree::rank(ulong c, ulong i) {
-//    if (i >= n) FIXME Replace with assert?
-//        std::cout << "RLWT::rank parameter i out of range" << std::endl;
-   ulong k;
-   ulong j=i;
-   ulong left = 0, right = (ulong)1 << maxlevel;
-   ulong before=0;
-   c = charMap->rank(c)-1;
-   
-   // first round separately to avoid extra "if"  
-   if ((c >> (maxlevel-1)) & (ulong)1) { // go to the right
-      j = bitrank->rank(j)-1;
-      left = right>>1; 
-   }
-   else { // go to the left
-      j -= bitrank->rank(j);
-      right = right>>1;
-   }
-   // then the next without the extra if
-   for (k=1;k<maxlevel;k++) {
-      before = bitrank->rank(n*k+leaves[left]-1);
-      if ((c >> (maxlevel-k-1)) & (ulong)1) { // go to the right
-         j = bitrank->rank(n*k+leaves[left]+j)-before-1;
-         left = (right+left)>>1; 
-      }
-      else { // go to the left
-         j -= bitrank->rank(n*k+leaves[left]+j)-before;
-        right = (right+left)>>1;
-      }
-      // can never happen: if (left > right) return 0;
-   }
-   return j+1;   
-}
-
-ulong RLWaveletTree::select(ulong c, ulong j) {
-    c = charMap->rank(c)-1;
-   ulong left=c, right=c+1;
-   ulong k, i=j-1, before;
-   for (k=maxlevel-1; k>0;--k) {
-       if ((c >> (maxlevel-k-1)) & (ulong)1) { // right side of parent
-          left -= right-left;
-         before = bitrank->rank(n*k+leaves[left]-1);
-         i = bitrank->select(before+i+1)-n*k-leaves[left];
-       }
-       else { // left side of parent
-          right +=right-left;
-          before = n*k+leaves[left]-bitrank->rank(n*k+leaves[left]-1);
-         i = bitrank->select0(before+i+1)-n*k-leaves[left];
-       }                         
-   }
-   // last level separately to avoid one extra if
-   if ((c >> (maxlevel-1)) & (ulong)1) // right side of parent
-      return bitrank->select(i+1);
-   else // left side of parent
-      return bitrank->select0(i+1);
-}
-
-bool RLWaveletTree::IsCharAtPos(ulong c, ulong i) {
-   return charAtPos(i)==c;
-}
-
-
-ulong RLWaveletTree::charAtPos(ulong i) 
-{
-//    if (i >= n) // FIXME Replace with assert?
-//        std::cout << "RLWT::charAtPos parameter i out of range" << std::endl;
-    ulong k;
-    ulong j=i;
-    ulong left = 0, right = (ulong)1 << maxlevel;
-    ulong before=0;
-    ulong c = 0;
-    ulong rank_tmp = 0;
-    // first round separately to avoid extra "if"  
-    if (bitrank->IsBitSet(j, &rank_tmp)) { // go to the right
-        c = c | ((ulong)1 << (maxlevel-1));
-        j = rank_tmp-1; //bitrank->rank(j)-1;
-        left = right>>1; 
-    }
-    else { // go to the left
-        // c = c
-        j -= rank_tmp; // bitrank->rank(j);
-        right = right>>1;
-    }
-    // then the next without the extra if
-    for (k=1;k<maxlevel;k++) {
-        before = bitrank->rank(n*k+leaves[left]-1);
-        if (bitrank->IsBitSet(n*k+leaves[left]+j, &rank_tmp)) { // go to the right
-            c = c | ((ulong)1 << (maxlevel-k-1));
-            j = rank_tmp-before-1; //bitrank->rank(n*k+leaves[left]+j)-before-1;
-            left = (right+left)>>1; 
-        }
-        else { // go to the left
-            // c = c
-            j -= rank_tmp-before; //bitrank->rank(n*k+leaves[left]+j)-before;
-            right = (right+left)>>1;
-        }
-        // can never happen: if (left > right) return 0;
-    }
-    return charMap->select(c+1);         
-}
-
-/* returns also the rank-1 of char at given position i */
-ulong RLWaveletTree::charAtPos(ulong i, ulong *rank) 
-{
-//    if (i >= n) // FIXME Replace with assert?
-//        std::cout << "RLWT::charAtPos2 parameter i out of range" << std::endl;
-    ulong k;
-    ulong j=i;
-    ulong left = 0, right = (ulong)1 << maxlevel;
-    ulong before=0;
-    ulong c = 0;
-    ulong rank_tmp = 0;
-    // first round separately to avoid extra "if"  
-    if (bitrank->IsBitSet(j, &rank_tmp)) { // FIXME j or leaves[left]+j ?
-        c = c | ((ulong)1 << (maxlevel-1));
-        j = rank_tmp-1; //bitrank->rank(j)-1;
-        left = right>>1; 
-    }
-    else { // go to the left
-        // c = c
-        j -= rank_tmp; //bitrank->rank(j);
-        right = right>>1;
-    }
-    // then the next without the extra if
-    for (k=1;k<maxlevel;k++) {
-        before = bitrank->rank(n*k+leaves[left]-1);
-        if (bitrank->IsBitSet(n*k+leaves[left]+j, &rank_tmp)) { // go to the right
-            c = c | ((ulong)1 << (maxlevel-k-1));
-            j = rank_tmp-before-1; //bitrank->rank(n*k+leaves[left]+j)-before-1;
-            left = (right+left)>>1; 
-        }
-        else { // go to the left
-            // c = c
-            j -= rank_tmp-before; //bitrank->rank(n*k+leaves[left]+j)-before;
-            right = (right+left)>>1;
-        }
-        // can never happen: if (left > right) return 0;
-    }
-    *rank = j;
-    return charMap->select(c+1);         
-}
-
-/* returns also the rank-1 of char at given position i
- * and trailing char-run length */
-ulong RLWaveletTree::charAtPosRun(ulong i, ulong *rank) 
-{
-    // Check for buffered data from previous call
-    if (i > prevPos && i <= prevPos + prevRunLen)
-    {
-        *rank = prevRank + (i-prevPos);
-        return prevChar;
-    }
-
-    prevPos = i;
-//    if (i >= n) // FIXME Replace with assert?
-//        std::cout << "RLWT::charAtPos2 parameter i out of range" << std::endl;
-    ulong k;
-    ulong j=i;
-    ulong left = 0, right = (ulong)1 << maxlevel;
-    ulong before=0;
-    ulong c = 0;
-    ulong rank_tmp = 0;
-    ulong runl_tmp = 0;
-    // first round separately to avoid extra "if"  
-    if (bitrank->IsBitSet(j, &rank_tmp, &prevRunLen)) {
-        if (j+prevRunLen > n)
-            prevRunLen = n-j-1;
-        c = c | ((ulong)1 << (maxlevel-1));
-        j = rank_tmp-1; //bitrank->rank(j)-1;
-        left = right>>1; 
-    }
-    else { // go to the left
-        // c = c
-        if (j+prevRunLen > n)
-            prevRunLen = n-j-1;
-        j -= rank_tmp; //bitrank->rank(j);
-        right = right>>1;
-    }
-
-    // then the next without the extra if
-    for (k=1;k<maxlevel;k++) {
-        before = bitrank->rank(n*k+leaves[left]-1);
-        if (bitrank->IsBitSet(n*k+leaves[left]+j, &rank_tmp, &runl_tmp)) { // go to the right
-            if (leaves[left]+j+runl_tmp > leaves[right])
-                runl_tmp = leaves[right] - (leaves[left]+j) - 1;
-            if (prevRunLen > runl_tmp) // FIXME Better way to keep track of a run?
-                prevRunLen = runl_tmp;
-            c = c | ((ulong)1 << (maxlevel-k-1));
-            j = rank_tmp-before-1; //bitrank->rank(n*k+leaves[left]+j)-before-1;
-            left = (right+left)>>1; 
-        }
-        else { // go to the left
-            if (leaves[left]+j+runl_tmp > leaves[right])
-                runl_tmp = leaves[right] - (leaves[left]+j) - 1;
-            if (prevRunLen > runl_tmp)  // FIXME Better way to keep track of a run?
-                prevRunLen = runl_tmp;
-            // c = c
-            j -= rank_tmp-before; //bitrank->rank(n*k+leaves[left]+j)-before;
-            right = (right+left)>>1;
-        }
-        // can never happen: if (left > right) return 0;
-    }
-    *rank = j;
-    prevRank = j;
-    prevChar = charMap->select(c+1);
-    return charMap->select(c+1);         
-}
-
-/**
- * Saving data fields:
-       ulong n;
-       ulong maxlevel;
-       ulong *leaves; // stores the leaf positions of characters 0..2^maxlevel-1
-        GapEncode *bitrank;
-        BitRank *charMap;
-*/
-void RLWaveletTree::Save(FILE *file) const
-{
-    if (std::fwrite(&(this->n), sizeof(ulong), 1, file) != 1)
-        throw std::runtime_error("RLWaveletTree::Save(): file write error (n).");
-    if (std::fwrite(&(this->maxlevel), sizeof(ulong), 1, file) != 1)
-        throw std::runtime_error("RLWaveletTree::Save(): file write error (maxlevel).");
-
-    for (ulong offset = 0; offset < ((ulong)1 << maxlevel)+1; ++offset)
-    {
-        if (std::fwrite(this->leaves + offset, sizeof(ulong), 1, file) != 1)
-            throw std::runtime_error("RLWaveletTree::Save(): file write error (leaves).");
-    }
-
-    bitrank->Save(file);
-
-    // Silly
-    ulong *chars = new ulong[256/W+1];
-    // Set up flags:
-    for (ulong i = 0; i < 256; ++i)
-        Tools::SetField(chars, 1, i, charMap->IsBitSet(i));
-
-    for (ulong offset = 0; offset < 256/W+1; ++offset)
-    {
-        if (std::fwrite(chars + offset, sizeof(ulong), 1, file) != 1)
-            throw std::runtime_error("RLWaveletTree::Save(): file write error (chars).");
-    }
-
-    delete [] chars;
-}
-
-/**
- * Load from file
- */
-RLWaveletTree::RLWaveletTree(FILE *file)
-{
-    if (std::fread(&(this->n), sizeof(ulong), 1, file) != 1)
-        throw std::runtime_error("RLWaveletTree::Load(): file read error (n).");
-    if (std::fread(&(this->maxlevel), sizeof(ulong), 1, file) != 1)
-        throw std::runtime_error("RLWaveletTree::Load(): file read error (maxlevel).");
-
-    for (ulong offset = 0; offset < ((ulong)1 << maxlevel)+1; ++offset)
-    {
-        if (std::fread(this->leaves + offset, sizeof(ulong), 1, file) != 1)
-            throw std::runtime_error("RLWaveletTree::Load(): file read error (leaves).");
-    }
-
-    bitrank = new GapEncode(file);
-
-    // Silly
-    ulong *chars = new ulong[256/W+1];
-    // Read flags
-    for (ulong offset = 0; offset < 256/W+1; ++offset)
-    {
-        if (std::fread(chars + offset, sizeof(ulong), 1, file) != 1)
-            throw std::runtime_error("RLWaveletTree::Load(): file read error (chars).");
-    }
-    charMap = new BitRank(chars, 256, true);
-
-    prevRunLen = 0; // Init "buffer" values
-    prevChar = 0;
-    prevPos = 0;
-}
diff --git a/RLWaveletTree.h b/RLWaveletTree.h
deleted file mode 100644 (file)
index f3ed9ed..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-/***************************************************************************
- *   Copyright (C) 2007 by Veli Mäkinen                                    *
- *                                                                         *
- *                                                                         *
- *   This program is free software; you can redistribute it and/or modify  *
- *   it under the terms of the GNU General Public License as published by  *
- *   the Free Software Foundation; either version 2 of the License, or     *
- *   (at your option) any later version.                                   *
- *                                                                         *
- *   This program is distributed in the hope that it will be useful,       *
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of        *
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the         *
- *   GNU General Public License for more details.                          *
- *                                                                         *
- *   You should have received a copy of the GNU General Public License     *
- *   along with this program; if not, write to the                         *
- *   Free Software Foundation, Inc.,                                       *
- *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.         *
- ***************************************************************************/
-
-/*
- * Wavelet tree by Veli Mäkinen,
- */
-
-#ifndef _RLWAVELETTREE_H_
-#define _RLWAVELETTREE_H_
-#include "GapEncode.h"
-#include "Tools.h"
-#include "BitRank.h"
-
-class RLWaveletTree {
-private:
-       ulong n;
-       ulong maxlevel;
-       ulong *leaves; // stores the leaf positions of characters 0..2^maxlevel-1
-        GapEncode *bitrank;
-        BitRank *charMap;
-        void remapAlphabet(uchar*, ulong);
-
-        ulong prevRunLen; // Used by charAtPosRun()
-        ulong prevPos;
-        ulong prevChar;
-        ulong prevRank;
-public:            
-    RLWaveletTree(uchar *, ulong);
-    RLWaveletTree(FILE *);
-    ~RLWaveletTree();
-
-    void Save(FILE *) const;
-   ulong rank(ulong c, ulong i);
-    
-   ulong select(ulong c, ulong j);
-        
-   bool IsCharAtPos(ulong c, ulong i);
-
-   ulong charAtPos(ulong i);
-   ulong charAtPos(ulong i, ulong *rank);
-   ulong charAtPosRun(ulong i, ulong *rank);
-   double spaceInBits() {
-       return 0; // Not implemented
-   }
-};
-
-#endif