X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=GapEncode.h;fp=GapEncode.h;h=a73e5956854c3e4d7804c8ab20df53bde235f0e5;hb=dbd62fe27e80414b4cf4fbf512ad1910916e6967;hp=0000000000000000000000000000000000000000;hpb=dee0161e4f31f06e9389db9986766395c1b1d2b8;p=SXSI%2FTextCollection.git diff --git a/GapEncode.h b/GapEncode.h new file mode 100644 index 0000000..a73e595 --- /dev/null +++ b/GapEncode.h @@ -0,0 +1,113 @@ +/** + * Gap encoding scheme + * Niko Välimäki + * + * + */ + +#ifndef _GapEncode_H_ +#define _GapEncode_H_ + +#include "Tools.h" +#include "BlockArray.h" +#include "BSGAP.h" + +class GapEncode +{ +public: + GapEncode(ulong *, ulong, bool); + GapEncode(FILE *); + ~GapEncode(); + + void Save(FILE *) const; + ulong rank(ulong); + ulong rankrun(ulong i) {return bitRun->rank(i);}; + ulong selectrun(ulong i) {return bitRun->select(i);}; + ulong rank0(ulong); + ulong select(ulong); + ulong select0(ulong); // Not implemented, yet... + bool IsBitSet(ulong i) { + ulong r = bitRun->rank(i); + if (r == 0) + return false; + ulong pred = bitRun->select(r); + + ulong bitsBeforeRun = 0; + if (r > 1) + bitsBeforeRun = runLength->select(r - 1) + 1; + + ulong bitsInRun = runLength->select(r) + 1 - bitsBeforeRun; + if (i - pred >= bitsInRun) + return false; + + return true; + } + + /** + * Returns also the rank_1 of given position i + */ + bool IsBitSet(ulong i, ulong *rank) { + *rank = 0; + ulong r = bitRun->rank(i); + if (r == 0) + return false; + ulong pred = bitRun->select(r); + + ulong bitsBeforeRun = 0; + if (r > 1) + bitsBeforeRun = runLength->select(r - 1) + 1; + + ulong bitsInRun = runLength->select(r) + 1 - bitsBeforeRun; + if (i - pred >= bitsInRun) + { + *rank = bitsBeforeRun + bitsInRun; + return false; + } + + *rank = bitsBeforeRun + i - pred + 1; + return true; + } + + /** + * Returns also the rank_1 of given position i + * and the length of the trailing 0/1-run. + */ + bool IsBitSet(ulong i, ulong *rank, ulong *runl) { + *rank = 0; + ulong r = bitRun->rank(i); + if (r == 0) + { + // Length of 0-bit run after pos i + *runl = bitRun->select(1) - i - 1; + return false; + } + ulong pred = bitRun->select(r); + + ulong bitsBeforeRun = 0; + if (r > 1) + bitsBeforeRun = runLength->select(r - 1) + 1; + + ulong bitsInRun = runLength->select(r) + 1 - bitsBeforeRun; + if (i - pred >= bitsInRun) + { + *rank = bitsBeforeRun + bitsInRun; + *runl = u - i - 1; + if (r < numberOfRuns) + *runl = bitRun->select(r+1) - i - 1; + return false; + } + + *rank = bitsBeforeRun + i - pred + 1; + *runl = bitsInRun - (i - pred) - 1; + return true; + } + +private: + ulong u; // Universe size, number of bits in B + ulong numberOfRuns; // Number of 1-bit runs + BSGAP *bitRun, // Contains 1-bit at the start of each 1-bit-run + *runLength; // Contains run lengths for each 1-bit-run + ulong totalLength; // Total length of runs +}; + +#endif