4 * Copyright 2003 Max-Planck-Institut fuer Informatik
8 //======================================================================
9 // Doubling algorithm for suffix array construction.
10 // Follows closely the algorithm described in
12 // N.J. Larsson and K. Sadakane. Faster Suffix Sorting.
13 // Technical Report LU-CS-TR:99-214, Dept. of Computer Science,
14 // Lund University, Sweden, 1999.
15 //======================================================================
20 #include "partition.hpp"
28 //----------------------------------------------------------------------
31 // Given a suffix array iterator computes the corresponding
32 // inverse suffix array entry to be used in comparisons.
33 //----------------------------------------------------------------------
34 template <typename ISAIterator, typename SAIterator>
36 : public std::unary_function<SAIterator,
37 typename std::iterator_traits<ISAIterator>::value_type>
40 typedef typename std::iterator_traits<ISAIterator>::difference_type
42 const ISAIterator isa;
44 isa_access(ISAIterator s, difference_type lcp) : isa(s+lcp) {}
45 typename std::iterator_traits<ISAIterator>::value_type
46 operator() (SAIterator i) const {
51 //----------------------------------------------------------------------
52 // function update_group
54 // Update a group after doubling.
55 // WARNING: cannot handle an empty group
56 //----------------------------------------------------------------------
57 template <typename SAIterator, typename ISAIterator>
58 void update_group(SAIterator beg, SAIterator end,
59 SAIterator sa, ISAIterator isa)
61 typedef typename std::iterator_traits<SAIterator>::value_type
63 pos_type groupnumber = std::distance(sa, end) - 1;
65 for (SAIterator i = beg; i != end; ++i) {
66 isa[*i] = groupnumber;
69 isa[*beg] = groupnumber;
75 //----------------------------------------------------------------------
76 // function selection_partition
78 // Performs doubling partitioning for small groups.
79 //----------------------------------------------------------------------
80 template <typename SAIterator, typename ISAIterator>
81 void selection_partition(SAIterator sa, ISAIterator isa,
82 SAIterator beg, SAIterator end,
83 typename std::iterator_traits<SAIterator>::difference_type lcp)
85 typedef typename std::iterator_traits<SAIterator>::value_type
87 isa_access<ISAIterator, SAIterator> key(isa, lcp);
89 SAIterator min_end = beg+1;
90 pos_type min_key = key(beg);
91 for (SAIterator i = beg+1; i != end; ++i) {
92 if (key(i) < min_key) {
96 } else if (!(min_key < key(i))) {
97 std::swap(*min_end, *i);
101 update_group(beg, min_end, sa, isa);
104 if (beg != end) update_group(beg, end, sa, isa);
108 //----------------------------------------------------------------------
109 // function doubling_partition
111 // Given a group of suffixes with a common prefix of length lcp
112 // partition it to subgroups according to the next lcp characters.
113 // The next lcp characters are represented by a single value
114 // in the inverse suffix array.
115 //----------------------------------------------------------------------
116 template <typename SAIterator, typename ISAIterator>
117 void doubling_partition(SAIterator sa, ISAIterator isa,
118 SAIterator beg, SAIterator end,
119 typename std::iterator_traits<SAIterator>::difference_type lcp)
121 typedef typename std::iterator_traits<SAIterator>::value_type
124 while (end-beg > 10) {
127 isa_access<ISAIterator, SAIterator> get_isa_entry(isa, lcp);
128 SAIterator pivot = random_pivot(beg, end, std::less<pos_type>(),
130 std::pair<SAIterator, SAIterator> midrange
131 = ternary_partition(beg, end, pivot, std::less<pos_type>(),
135 if (beg != midrange.first) {
136 doubling_partition(sa, isa, beg, midrange.first, lcp);
140 update_group(midrange.first, midrange.second, sa, isa);
143 beg = midrange.second;
146 selection_partition(sa, isa, beg, end, lcp);
151 //======================================================================
152 // function doubling_sort
154 // The main doubling algorithm
155 //======================================================================
156 template <typename SAIterator, typename ISAIterator>
158 doubling_sort(SAIterator sa, SAIterator sa_end,
159 ISAIterator isa, ISAIterator isa_end,
160 typename std::iterator_traits<ISAIterator>::difference_type lcp)
162 typedef typename std::iterator_traits<SAIterator>::difference_type
165 difference_type size = std::distance(sa, sa_end);
166 if (isa_end-isa != size) {
167 std::ostringstream os;
168 os << "doubling_sort:"
169 << " wrong range size";
170 throw std::logic_error(os.str());
173 while (*sa > -size) {
176 difference_type nrunlen = 0;
177 while (beg != sa_end) {
178 difference_type val = *beg;
180 // jump over already sorted part
184 // combine preceding sorted parts
186 *(beg+nrunlen) = nrunlen;
189 // partition unsorted group
190 SAIterator end = sa + isa[val] + 1;
191 doubling_partition(sa, isa, beg, end, lcp);
196 *(beg+nrunlen) = nrunlen;
204 #endif // DOUBLING_HPP