4 * Copyright 2003 Max-Planck-institut fuer Informatik
8 //======================================================================
9 // This file defines two classes:
11 // class difference_cover implements basic difference covers
12 // modulo any power of two
13 // class dc_sampler implements the difference cover based sampling
14 // used in the suffix array construction
16 // Part of the implementation of class difference_cover is in
17 // difference_cover.cpp
18 //======================================================================
20 #ifndef DIFFERENCE_COVER_HPP
21 #define DIFFERENCE_COVER_HPP
25 //#include <stdexcept>
27 //----------------------------------------------------------------------
28 // class difference_cover
30 // A difference cover D modulo v is a set of integers in the range [0,v)
31 // such that for all i in [0,v), there exists j,k in D such that
34 // This class represents a difference cover modulo any power of two.
35 // The sizes of the difference covers for small v are:
36 // v 1 2 4 8 16 32 64 128 256 512 1024 ...
37 // |D| 1 2 3 4 5 7 9 13 20 28 40 ...
38 // For v<=128, the size of the difference cover is minimal.
39 // For any v, the size is less than sqrt(1.5*v)+6 (sqrt(v) is a lower bound).
42 // - All arithmetic is done with unsigned int, because it guarantees
43 // correct modular arithmetic including conversion to/from other
44 // integral types. This is a main reason why supporting values of v
45 // other than powers of two would be difficult.
46 //----------------------------------------------------------------------
47 class difference_cover {
51 // create a difference cover modulo 2^lv
52 explicit difference_cover(unsigned lv); // defined in difference_cover.cpp
55 unsigned modulus() const { return 1<<logmod; }
56 unsigned size() const { return coversize; }
58 // division by modulus
59 unsigned mod(unsigned i) const { return i&mask; }
60 template <typename IntType>
61 IntType div(IntType i) const { return i>>logmod; }
63 // mapping [0,modulus) <-> [0,size)
64 unsigned rank(unsigned pos) const { return coverrank[pos&mask]; }
65 unsigned pos(unsigned rank) const { return cover[rank]; }
67 // compute k such that i+k and j+k are both in the cover
68 unsigned offset(unsigned i, unsigned j) const {
69 return (diff2pos[(j-i)&mask]-i)&mask;
72 typedef std::vector<unsigned>::const_iterator iterator;
73 iterator begin() const { return cover.begin(); }
74 iterator end() const { return cover.end(); }
78 unsigned logmod; // log v
79 unsigned mask; // v-1 = 11...1 in binary
80 unsigned coversize; // |D|
82 std::vector<unsigned> cover; // D
83 std::vector<unsigned> coverrank; // rank of a value in D
84 std::vector<unsigned> diff2pos; // lookup table for finding a
85 // pair with a given difference
90 //----------------------------------------------------------------------
93 // Represents the periodic sample positions chosen according
94 // to a difference cover.
96 // The main duties are:
97 // 1. Fill the sparse suffix array with the sample positions.
98 // 2. Provide a mapping pack() from sample positions to packed positions.
99 // A packed position pack(i) of a sample position i is used as
100 // the corresponding position in the inverse sparse suffix array.
101 // pack(i) must satisfy:
102 // a) pack(i) is unique (different for each sample position i) and
103 // in the range [0,samplesize)
104 // b) The difference pack(i+v)-pack(i), where v is the length of
105 // the period (modulus of the difference cover), is the same for
106 // all i. This difference is called the packed period.
107 // 3. Find the representative pair for two arbitrary positions i and j.
108 // The representative pair is a pair of positions in the inverse
109 // sparse suffix array that can be used for comparing the suffixes
110 // i and j if they have a common prefix of length period-1.
111 // The representative pair is computed by finding k in [0,period)
112 // such that i+k and j+k are both sample positions and returning
113 // the packed positions corresponding to i+k and j+k.
114 //----------------------------------------------------------------------
115 template <typename OffsetType>
119 dc_sampler(OffsetType s, OffsetType v) : dcover(v), fullsize_(s),
120 samplesize_(dcover.div(s)*dcover.size()+dcover.rank(s))
123 OffsetType fullsize() const { return fullsize_; }
124 OffsetType samplesize() const { return samplesize_; }
125 OffsetType period() const { return dcover.modulus(); }
126 OffsetType packed_period() const { return dcover.size(); }
128 OffsetType pack(OffsetType i) const {
129 return dcover.div(i) * dcover.size() + dcover.rank(i);
132 // representative pair
133 std::pair<OffsetType,OffsetType>
134 pair(OffsetType i, OffsetType j) const {
135 OffsetType k = dcover.offset(i, j);
136 return std::make_pair(pack(i+k), pack(j+k));
139 // fills a range with the sample positions
140 template <typename OutputIterator>
141 OutputIterator fill(OutputIterator it) const {
142 difference_cover::iterator j = dcover.begin();
144 OffsetType pos = i + static_cast<OffsetType>(*j);
145 while (pos < fullsize()) {
147 if (++j == dcover.end()) {
148 i += dcover.modulus();
151 pos = i + static_cast<OffsetType>(*j);
158 const difference_cover dcover;
159 const OffsetType fullsize_; // size of the range where samples are chosen
160 const OffsetType samplesize_;
163 #endif // DIFFERENCE_COVER_HPP