Update: libcds WT, empty texts in bitv.
[SXSI/TextCollection.git] / GapEncode.h
diff --git a/GapEncode.h b/GapEncode.h
new file mode 100644 (file)
index 0000000..a73e595
--- /dev/null
@@ -0,0 +1,113 @@
+/**
+ * Gap encoding scheme
+ * Niko Välimäki
+ *
+ *
+ */
+
+#ifndef _GapEncode_H_
+#define _GapEncode_H_
+
+#include "Tools.h"
+#include "BlockArray.h"
+#include "BSGAP.h"
+
+class GapEncode
+{
+public:
+    GapEncode(ulong *, ulong, bool);
+    GapEncode(FILE *);
+    ~GapEncode();
+    
+    void Save(FILE *) const;
+    ulong rank(ulong);
+    ulong rankrun(ulong i) {return bitRun->rank(i);};
+    ulong selectrun(ulong i) {return bitRun->select(i);};
+    ulong rank0(ulong);
+    ulong select(ulong);
+    ulong select0(ulong); // Not implemented, yet...
+    bool IsBitSet(ulong i) {
+        ulong r = bitRun->rank(i);
+        if (r == 0)
+            return false;
+        ulong pred = bitRun->select(r);
+
+        ulong bitsBeforeRun = 0;
+        if (r > 1)
+            bitsBeforeRun = runLength->select(r - 1) + 1;
+
+        ulong bitsInRun = runLength->select(r) + 1 - bitsBeforeRun;
+        if (i - pred >= bitsInRun)
+            return false;
+        
+        return true;
+    }
+
+    /**
+     * Returns also the rank_1 of given position i
+     */
+    bool IsBitSet(ulong i, ulong *rank) {
+        *rank = 0;
+        ulong r = bitRun->rank(i);
+        if (r == 0)
+            return false;
+        ulong pred = bitRun->select(r);
+
+        ulong bitsBeforeRun = 0;
+        if (r > 1)
+            bitsBeforeRun = runLength->select(r - 1) + 1;
+        
+        ulong bitsInRun = runLength->select(r) + 1 - bitsBeforeRun;
+        if (i - pred >= bitsInRun)
+        {
+            *rank = bitsBeforeRun + bitsInRun;
+            return false;
+        }
+        
+        *rank = bitsBeforeRun + i - pred + 1;
+        return true;
+    }
+
+    /**
+     * Returns also the rank_1 of given position i
+     * and the length of the trailing 0/1-run.
+     */
+    bool IsBitSet(ulong i, ulong *rank, ulong *runl) {
+        *rank = 0;
+        ulong r = bitRun->rank(i);
+        if (r == 0)
+        {
+            // Length of 0-bit run after pos i
+            *runl = bitRun->select(1) - i - 1;
+            return false;
+        }
+        ulong pred = bitRun->select(r);
+
+        ulong bitsBeforeRun = 0;
+        if (r > 1)
+            bitsBeforeRun = runLength->select(r - 1) + 1;
+        
+        ulong bitsInRun = runLength->select(r) + 1 - bitsBeforeRun;
+        if (i - pred >= bitsInRun)
+        {
+            *rank = bitsBeforeRun + bitsInRun;
+            *runl = u - i - 1;
+            if (r < numberOfRuns)
+                *runl = bitRun->select(r+1) - i - 1;
+            return false;
+        }
+                
+        *rank = bitsBeforeRun + i - pred + 1;
+        *runl = bitsInRun - (i - pred) - 1;
+        return true;
+    }
+
+private:
+    ulong u;                // Universe size, number of bits in B
+    ulong numberOfRuns;     // Number of 1-bit runs
+    BSGAP *bitRun,          // Contains 1-bit at the start of each 1-bit-run
+          *runLength;       // Contains run lengths for each 1-bit-run
+    ulong totalLength;      // Total length of runs
+};
+
+#endif