--- /dev/null
+/*
+ * bwt.hpp
+ *
+ * Copyright 2004 Juha Ka"rkka"inen <juha.karkkainen@cs.helsinki.fi>
+ *
+ */
+
+#include "dcsuffixsort.hpp"
+#include "dcsamplesort.hpp"
+#include "stringsort.hpp"
+#include "difference_cover.hpp"
+#include "distribute.hpp"
+#include "kmpsplit.hpp"
+/*
+#include "doubling.hpp"
+*/
+
+#include <iterator>
+#include <vector>
+#include <set>
+#include <iostream>
+#include <iomanip>
+#include <cassert>
+/*
+#include <algorithm>
+#include <cstdlib>
+*/
+
+#ifndef DCOVER
+#define DCOVER 8
+#endif
+
+#ifndef RANDSEED
+#define RANDSEED 46
+#endif
+
+#define MIN_PIVOTS 3
+#define OVERSAMPLING_RATE 10.0
+
+
+struct do_nothing {
+ template <typename Iterator>
+ void operator() (Iterator, Iterator) {}
+};
+
+
+
+
+template <class StringIterator, class OutputIterator>
+typename std::iterator_traits<StringIterator>::difference_type
+compute_bwt (StringIterator str, StringIterator str_end,
+ OutputIterator bwt, unsigned endmarkers, unsigned int dcover = DCOVER)
+{
+
+ typedef typename std::iterator_traits<StringIterator>::difference_type
+ difftype; // signed integral type
+ typedef unsigned long sizetype; // unsigned integral type
+ typedef difftype postype; // type for representing vector positions
+
+ typedef typename std::vector<postype>::iterator pos_iterator;
+
+#if DEBUG > 0
+ std::cout << "BWT construction starts" << std::endl;
+#endif
+
+ //-----------------
+ // setup and check
+ //-----------------
+
+ sizetype length = std::distance(str, str_end);
+
+#if DEBUG > 1
+ typedef typename std::iterator_traits<StringIterator>::value_type
+ chartype;
+ typedef typename std::iterator_traits<OutputIterator>::value_type
+ bwttype;
+ std::cout << " text memory=" << length*sizeof(chartype) << "\n"
+ << " bwt memory=" << (length+1)*sizeof(chartype) << std::endl;
+#endif
+
+ dc_sampler<postype> dcsampler(length+1, dcover);
+
+#if DEBUG > 1
+ std::cout << " dcsampler memory="
+ << (dcsampler.packed_period() + 2*dcsampler.period())
+ * sizeof(unsigned)
+ << std::endl;
+#endif
+
+ sizetype blocksize = dcsampler.samplesize();
+ sizetype npivots = MIN_PIVOTS;
+
+ // make average bucket size <= blocksize/OVERSAMPLING_RATE
+ while ((OVERSAMPLING_RATE*length)/(npivots+1.0) > blocksize) ++npivots;
+
+#if DEBUG > 1
+ std::cout << " length=" << length
+ << " samplesize=" << dcsampler.samplesize()
+ << " blocksize=" << blocksize
+ << " npivots=" << npivots
+ << std::endl;
+#endif
+
+ if (dcsampler.period() + 2*npivots > length) {
+ // too small text for sampling
+
+#if DEBUG > 0
+ std::cout << "small text: sort all suffixes\n";
+#endif
+
+ std::vector<postype> sa(length);
+ for (pos_iterator i=sa.begin(); i!=sa.end(); ++i) {
+ *i = i - sa.begin();
+ }
+ suffix_mkqsort(str, str_end, sa.begin(), sa.end(), length, do_nothing());
+ difftype eof_position = 0;
+ for (pos_iterator i=sa.begin(); i!=sa.end(); ++i) {
+ if (*i != 0) {
+ if (str[*i-1] <= endmarkers)
+ *bwt++ = 0;
+ else
+ *bwt++ = str[*i-1] - endmarkers;
+ } else {
+ if (str[*(i-1)-1] <= endmarkers)
+ *bwt++ = 0;
+ else
+ *bwt++ = str[*(i-1)-1] - endmarkers;
+ eof_position = i - sa.begin() +1; // FIXME
+ }
+ }
+ return eof_position;
+ }
+
+ // don't take too short suffixes as pivots
+ unsigned int pivotrange = length-dcsampler.period()+1;
+
+ //-----------------------------------
+ // setup difference cover sample
+ //-----------------------------------
+
+#if DEBUG > 0
+ std::cout << "difference cover construction ... " << std::endl;
+#endif
+
+ sizetype samplesize = dcsampler.samplesize();
+ std::vector<difftype> dcranks(samplesize);
+ std::vector<difftype> suffixes(samplesize);
+
+#if DEBUG > 1
+ std::cout << "dcranks memory="
+ << samplesize * sizeof(difftype)
+ << "\n";
+ std::cout << "suffixes memory="
+ << samplesize * sizeof(difftype)
+ << std::endl;
+#endif
+
+ sort_dc_sample(dcsampler, str, str_end,
+ dcranks.begin(), dcranks.end(),
+ suffixes.begin(), suffixes.end());
+
+#if DEBUG > 3
+ std::cout << "sample ranks:\n";
+ std::copy(dcranks.begin(), dcranks.end(),
+ std::ostream_iterator<int>(std::cout,"\n"));
+ std::cout << "end of sample ranks" << std::endl;
+#endif
+
+
+
+ //-------------------
+ // setup pivots
+ //-------------------
+
+#if DEBUG > 0
+ std::cout << "setting up pivots ..." << std::endl;
+#endif
+
+ std::set<difftype> pivotset;
+ double scale_factor = (pivotrange * 1.0L)/(RAND_MAX+1.0L);
+ std::srand(RANDSEED);
+ while (pivotset.size() < npivots) {
+ int r = std::rand();
+ int newpivot = static_cast<difftype>(scale_factor*r);
+ pivotset.insert(newpivot);
+ }
+
+#if DEBUG > 1
+ std::cout << " pivotset memory = " << pivotset.size()
+ << " elements" << std::endl;
+#endif
+
+ std::vector<difftype> pivots(pivotset.begin(), pivotset.end());
+ pivotset.clear();
+
+ // sort the pivots
+ dc_sort_suffixes(str, str_end, pivots.begin(), pivots.end(),
+ dcsampler, dcranks.begin(), dcranks.end());
+
+#if DEBUG > 2
+ std::cout << "pivots sorted lexicographically:\n";
+ for (pos_iterator i = pivots.begin(); i != pivots.end(); ++i) {
+ std::cout << std::setw(8) << *i << " ";
+ std::copy(str+*i, std::min(str_end, str+*i+40),
+ std::ostream_iterator<char>(std::cout,""));
+ std::cout << std::endl;
+ }
+#endif
+
+
+#if DEBUG > 0
+ std::cout << " constructing distributor ... " << std::endl;
+#endif
+
+ suffix_distributor<StringIterator, pos_iterator,
+ dc_sampler<postype>, pos_iterator>
+ distributor(str, str_end, pivots.begin(), pivots.end(),
+ dcsampler, dcranks.begin());
+
+#if DEBUG > 1
+ distributor.print();
+#endif
+
+
+
+ //------------------------
+ // compute bucketsizes
+ //------------------------
+
+#if DEBUG > 0
+ std::cout << "computing bucket sizes ... " << std::endl;
+#endif
+
+ std::vector<difftype> buckets(distributor.nbuckets());
+
+#if DEBUG > 1
+ std::cout << " number of buckets: " << distributor.nbuckets() << "\n";
+ std::cout << " bucket vector memory="
+ << buckets.size() * sizeof(difftype)
+ << std::endl;
+#endif
+
+ for (StringIterator suf = str; suf != str_end; ++suf) {
+ unsigned int bucket = distributor.find_bucket(suf);
+ ++buckets[bucket];
+
+#if DEBUG > 3
+ std::cout << " into bucket " << bucket;
+ std::cout << ":" << std::setw(7) << suf-str << " ";
+ std::copy(suf, std::min(str_end, suf+40),
+ std::ostream_iterator<char>(std::cout,""));
+ std::cout << std::endl;
+#endif
+
+ }
+ // ++buckets[0]; // empty suffix
+
+#if DEBUG > 1
+ std::cout << " bucket sizes: ";
+ std::copy(buckets.begin(), buckets.end(),
+ std::ostream_iterator<int>(std::cout," "));
+ std::cout << std::endl;
+#endif
+
+
+ //-----------------------
+ // build and sort blocks
+ //-----------------------
+
+#if DEBUG > 0
+ std::cout << "Computing and sorting blocks ... " << std::endl;
+#endif
+
+// *bwt++ = *(str_end-1); //FIXME no empty suffix?
+
+ postype eof_position = 0;
+ sizetype total_sum = 1;
+
+ postype bucket_begin = 0;
+ while (bucket_begin < (postype)buckets.size()) {
+
+ sizetype sum = buckets[bucket_begin];
+ postype bucket_end = bucket_begin + 1;
+ while (bucket_end < (postype)buckets.size()
+ && sum+buckets[bucket_end] <= blocksize) {
+ sum += buckets[bucket_end];
+ ++bucket_end;
+ }
+
+ postype left_pivot = bucket_begin-1;
+ postype right_pivot = bucket_end-1;
+
+ short lcp = 0;
+ if (left_pivot >= 0 && right_pivot < (postype)pivots.size()) {
+#if __WORDSIZE == 32
+ lcp = suffix_lcp(str+pivots[left_pivot], str+pivots[right_pivot],
+ str_end, 0, dcsampler.period()-1);
+#else
+ lcp = suffix_lcp(str+pivots[left_pivot], str+pivots[right_pivot],
+ str_end, 0l, dcsampler.period()-1);
+#endif
+ }
+
+#if DEBUG > 1
+ std::cout << "block [" << bucket_begin << "," << bucket_end << ")"
+ << " size=" << sum
+ << " lcp=" << lcp
+ << std::endl;
+#endif
+
+ if (sum > blocksize) {
+ blocksize = sum;
+ suffixes.resize(blocksize);
+
+#if DEBUG > 1
+ std::cout << " blocksize increased to "
+ << blocksize << "\n";
+ std::cout << " suffixes memory resized to "
+ << blocksize*sizeof(postype)
+ << std::endl;
+#endif
+
+ }
+
+ pos_iterator i = suffixes.begin();
+ //if (bucket_begin == 0) { *i++ = length; // empty suffix }
+
+ postype left_pivot_not_beyond = left_pivot<0 ? 0 : left_pivot;
+ kmp_split<StringIterator, dc_sampler<postype>, pos_iterator>
+ left_cmp(str, str_end, str+pivots[left_pivot_not_beyond],
+ dcsampler, dcranks.begin());
+
+ postype right_pivot_not_beyond =
+ right_pivot==(postype)pivots.size() ? right_pivot-1 : right_pivot;
+ kmp_split<StringIterator, dc_sampler<postype>, pos_iterator>
+ right_cmp(str, str_end, str+pivots[right_pivot_not_beyond],
+ dcsampler, dcranks.begin());
+
+ for (StringIterator suf = str; suf != str_end; ++suf) {
+
+#if DEBUG > 3
+ std::cout << ":" << std::setw(3) << suf-str << " ";
+ std::copy(suf, std::min(str_end, suf+40),
+ std::ostream_iterator<char>(std::cout,""));
+ std::cout << std::endl;
+#endif
+
+ /*
+ // is the suffix left of the block
+ if (left_pivot >=0) {
+ postype pivotpos = pivots[left_pivot];
+ if (*suf < str[pivotpos]) continue;
+ int lcp = suffix_lcp(suf, str+pivotpos,
+ str_end, 0, dcsampler.period()-1);
+ if (lcp < dcsampler.period()-1) {
+ if (suf+lcp == str_end || suf[lcp] < (str+pivotpos)[lcp]) {
+ continue;
+ }
+ } else {
+ postype sufpos = suf-str;
+ std::pair<postype,postype> result
+ = dcsampler.pair(sufpos, pivotpos);
+ if (dcranks[result.first] < dcranks[result.second]){
+ continue;
+ }
+ }
+ }
+
+ // is the suffix right of the block
+ if (right_pivot < (postype)pivots.size()) {
+ postype pivotpos = pivots[right_pivot];
+ if (*suf > str[pivotpos]) continue;
+ int lcp = suffix_lcp(str+pivotpos, suf,
+ str_end, 0, dcsampler.period()-1);
+ if (lcp < dcsampler.period()-1) {
+ if (suf+lcp < str_end && (str+pivotpos)[lcp] < suf[lcp]) {
+ continue;
+ }
+ } else {
+ postype sufpos = suf-str;
+ std::pair<postype,postype> result
+ = dcsampler.pair(pivotpos, sufpos);
+ if (dcranks[result.first] <= dcranks[result.second]){
+ continue;
+ }
+ }
+ }
+ */
+
+ //postype bucket = distributor.find_bucket(suf);
+
+ bool to_left = left_pivot>0 && left_cmp.is_next_less();
+ bool to_right = right_pivot<(postype)pivots.size() && !right_cmp.is_next_less() ;
+
+ if (to_left || to_right) {
+ //assert (bucket_begin > bucket || bucket >= bucket_end);
+ continue;
+ }
+
+ // it is in the bucket
+ //postype bucket = distributor.find_bucket(suf);
+
+ //if (bucket_begin > bucket || bucket >= bucket_end) continue;
+
+ //assert (bucket_begin <= bucket && bucket < bucket_end);
+
+ *i++ = suf-str;
+#if DEBUG > 3
+ //std::cout << "into bucket " << bucket;
+ std::cout << ":" << std::setw(3) << suf-str << " ";
+ std::copy(suf, std::min(str_end, suf+40),
+ std::ostream_iterator<char>(std::cout,""));
+ std::cout << std::endl;
+#endif
+
+ }
+
+ assert(i == suffixes.begin()+sum);
+
+ dc_sort_suffixes(str, str_end, suffixes.begin(), i,
+ dcsampler, dcranks.begin(), dcranks.end());
+
+#if DEBUG > 3
+ std::cout << "sorted bucket:\n";
+ for (pos_iterator j = suffixes.begin(); j != i; ++j) {
+ std::cout << std::setw(3) << *j << " ";
+ std::copy(str+*j, std::min(str_end, str+*j+40),
+ std::ostream_iterator<char>(std::cout,""));
+ std::cout << std::endl;
+ }
+#endif
+
+ for (pos_iterator j=suffixes.begin(); j != i; ++j) {
+ if (*j != 0) {
+ if (str[*j-1] <= endmarkers)
+ *bwt++ = 0;
+ else
+ *bwt++ = str[*j-1] - endmarkers;
+ } else {
+ if (str[*(j-1)-1] <= endmarkers)
+ *bwt++ = 0;
+ else
+ *bwt++ = str[*(j-1)-1] - endmarkers;
+ eof_position = total_sum + (j - suffixes.begin());
+ }
+ }
+
+ total_sum += sum;
+
+ bucket_begin = bucket_end;
+ }
+
+ assert(total_sum == length+1);
+
+ return eof_position;
+}
--- /dev/null
+/*
+ * dcsamplesort.hpp
+ *
+ * Copyright 2003 Max-Planck-Institut fuer Informatik
+ * Copyright 2004 Juha Ka"rkka"inen <juha.karkkainen@cs.helsinki.fi>
+ *
+ */
+
+#ifndef DCSAMPLESORT_HPP
+#define DCSAMPLESORT_HPP
+
+#include "difference_cover.hpp"
+#include "doubling.hpp"
+#include "stringsort.hpp"
+/*
+*/
+
+#include <sstream>
+#include <iostream>
+/*
+#include <algorithm>
+#include <iterator>
+*/
+
+
+//----------------------------------------------------------------------
+// functor mark_unsorted_group
+//
+// When string sorting leaves a group of suffixes unsorted,
+// it marks that group by calling this functor.
+//
+// The marking is done by one's complementing the first and last
+// entries of the group. Complemented values are assumed to become
+// negative. Complementing is used instead of negation to be able
+// mark the zero.
+//----------------------------------------------------------------------
+class mark_unsorted_group {
+public:
+ template <typename SArrayIterator>
+ void operator() (SArrayIterator beg, SArrayIterator end) const {
+ *beg = ~*beg;
+ --end;
+ *end = ~*end; // note: group of size one stays unmarked
+ }
+};
+
+
+
+
+template <class Sampler, class RandomAccessIterator1,
+ class RandomAccessIterator2, class RandomAccessIterator3>
+void
+sort_dc_sample(Sampler sampler,
+ RandomAccessIterator1 str, RandomAccessIterator1 str_end,
+ RandomAccessIterator2 ranks, RandomAccessIterator2 ranks_end,
+ RandomAccessIterator3 suffixes,
+ RandomAccessIterator3 suffixes_end)
+{
+ int size = sampler.samplesize();
+ if ((ranks_end-ranks != size) || (suffixes_end-suffixes != size)) {
+ std::ostringstream os;
+ os << "sort_dc_sample:"
+ << " wrong range size";
+ throw std::logic_error(os.str());
+ }
+
+ // collect the sample
+ RandomAccessIterator3 beyond = sampler.fill(suffixes);
+ if (suffixes_end != beyond) {
+ std::ostringstream os;
+ os << "sort_dc_sample:"
+ << " sample.fill produced " << beyond-suffixes
+ << " samples but samplesize is " << size;
+ throw std::logic_error(os.str());
+ }
+
+ // sort the sample to limited depth
+ suffix_mkqsort(str, str_end, suffixes, suffixes_end, sampler.period(),
+ mark_unsorted_group());
+
+ // compute ranks
+ RandomAccessIterator3 first = suffixes;
+ while (first != suffixes_end) {
+ if (*first >= 0) {
+ RandomAccessIterator3 last;
+ ranks[sampler.pack(*first)] = first - suffixes;
+ for (last=first+1; last != suffixes_end && *last >= 0; ++last) {
+ ranks[sampler.pack(*last)] = last - suffixes;
+ }
+ *first = -(last-first);
+ first = last;
+ } else {
+ RandomAccessIterator3 last;
+ *first = ~*first;
+ for (last=first+1; *last >= 0; ++last) ;
+ *last = ~*last;
+ for ( ; first <= last; ++first) {
+ *first = sampler.pack(*first);
+ ranks[*first] = last - suffixes;
+ }
+ }
+ }
+
+ // complete sorting with the doubling algorithm
+ doubling_sort(suffixes, suffixes_end, ranks, ranks_end,
+ sampler.packed_period());
+
+}
+
+#endif // DCSAMPLESORT_HPP
--- /dev/null
+/*
+ * dcsuffixsort.hpp
+ *
+ * Copyright 2003 Max-Planck-Institut fuer Informatik
+ * Copyright 2004 Juha Ka"rkka"inen <juha.karkkainen@cs.helsinki.fi>
+ *
+ */
+
+#ifndef DCSUFFIXSORT_HPP
+#define DCSUFFIXSORT_HPP
+
+#include "stringsort.hpp"
+#include "difference_cover.hpp"
+#include "partition.hpp"
+/*
+#include "doubling.hpp"
+#include "stringsort.hpp"
+#include "checker.hpp"
+#include "timing.hpp"
+*/
+
+#include <utility>
+#include <iterator>
+#include <stdexcept>
+#include <sstream>
+/*
+#include <algorithm>
+#include <vector>
+#include <string>
+#include <functional>
+*/
+
+//----------------------------------------------------------------------
+// dc_suffix_compare
+//
+// compare two suffixes using difference covers
+//----------------------------------------------------------------------
+template <typename RankIterator>
+class dc_suffix_compare {
+private:
+ typedef typename std::iterator_traits<RankIterator>::value_type
+ rank_type;
+ const RankIterator sample_ranks;
+ const dc_sampler<rank_type>& sample;
+public:
+ dc_suffix_compare(RankIterator beg, const dc_sampler<rank_type>& s)
+ : sample_ranks(beg), sample(s) {}
+ template <typename StringPosType>
+ bool operator() (StringPosType a, StringPosType b) const {
+ std::pair<rank_type,rank_type> rep = sample.pair(a, b);
+ return sample_ranks[rep.first] < sample_ranks[rep.second];
+ }
+};
+
+
+//----------------------------------------------------------------------
+// functor mark_unsorted_group
+//
+// When string sorting leaves a group of suffixes unsorted,
+// it marks that group by calling this functor.
+//
+// The marking is done by one's complementing the first and last
+// entries of the group. Complemented values are assumed to become
+// negative. Complementing is used instead of negation to be able
+// mark the zero.
+//----------------------------------------------------------------------
+class mark_unsorted_group2 {
+public:
+ template <typename SArrayIterator>
+ void operator() (SArrayIterator beg, SArrayIterator end) const {
+ *beg = ~*beg;
+ --end;
+ *end = ~*end; // note: group of size one stays unmarked
+ }
+};
+
+/*
+//----------------------------------------------------------------------
+// functor do_nothing
+//
+// Replaces mark_unsorted_group when nothing needs to be done
+// to unsorted groups (this happens when doing stringsort only)
+//----------------------------------------------------------------------
+class do_nothing {
+public:
+ template <typename Iterator>
+ void operator() (Iterator, Iterator) {}
+};
+*/
+
+
+//----------------------------------------------------------------------
+// function dc_sort_suffixes
+//
+// Sort a set of suffixes using a difference cover sample.
+//----------------------------------------------------------------------
+template <typename StringIterator, typename SArrayIterator,
+ typename DCSampler, typename RankIterator>
+void
+dc_sort_suffixes(StringIterator str, StringIterator str_end,
+ SArrayIterator sa, SArrayIterator sa_end,
+ const DCSampler& sampler,
+ RankIterator ranks, RankIterator ranks_end)
+{
+ if (ranks_end-ranks != sampler.samplesize()) {
+ std::ostringstream os;
+ os << "dc_suffix_sort:"
+ << " wrong range size";
+ throw std::logic_error(os.str());
+ }
+
+ suffix_mkqsort(str, str_end, sa, sa_end, sampler.period(),
+ mark_unsorted_group2());
+
+ SArrayIterator first = sa;
+ while (true) {
+ while (first != sa_end && *first >= 0) ++first;
+ if (first == sa_end) break;
+ *first = ~*first;
+ SArrayIterator last = first + 1;
+ while (*last >= 0) ++last;
+ *last = ~*last;
+ ++last;
+ quicksort(first, last,
+ dc_suffix_compare<RankIterator>(ranks, sampler));
+ first = last;
+ }
+
+}
+
+#endif // DCSUFFIXSORT_HPP
--- /dev/null
+/*
+ * differerence_cover.cpp
+ *
+ * Copyright 2003 Max-Planck-institut fuer Informatik
+ *
+ */
+
+//======================================================================
+// This file implements the computation of difference covers
+//======================================================================
+
+#include <algorithm>
+#include <numeric>
+#include <stdexcept>
+#include <iostream>
+
+#include "difference_cover.hpp"
+
+
+//----------------------------------------------------------------------
+// Precomputed difference covers.
+//
+// The covers from 0 to 6 are from
+// Luk, Wong: Two new quorum base algorithms for distributed mutual
+// exclusion. In Proc. 17th International Conference on Distributed
+// Computing Systems, pp. 100-106. IEEE, 1997.
+// Covers 7 and 8 were found using an exhaustive search algorithm.
+// All except cover 8 are known to be optimal.
+//----------------------------------------------------------------------
+namespace {
+ const unsigned max_precomputed_cover = 8;
+ const unsigned coversizes[max_precomputed_cover+1] =
+ {1,2,3,4,5,7,9,13,20};
+ const unsigned cover0[] = {0};
+ const unsigned cover1[] = {0,1};
+ const unsigned cover2[] = {0,1,2};
+ const unsigned cover3[] = {0,1,2,4};
+ const unsigned cover4[] = {0,1,2,5,8};
+ const unsigned cover5[] = {0,1,2,3,7,11,19}; //{0,7,8,10,14,19,23};
+ const unsigned cover6[] = {0,1,2,5,14,16,34,42,59};
+ const unsigned cover7[] = {0,1,3,7,17,40,55,64,75,85,104,109,117};
+ const unsigned cover8[] = {0,1,3,7,12,20,30,44,65,80,89,96,114,122,
+ 128,150,196,197,201,219};
+ const unsigned * covers[] = { cover0, cover1, cover2, cover3, cover4,
+ cover5, cover6, cover7, cover8 };
+}
+
+
+//----------------------------------------------------------------------
+// constructor of difference_cover
+//
+// Constructs a difference cover modulo a power of two.
+// (see difference_cover.hpp)
+//
+// Small difference covers have been precomputed (see above).
+// Larger difference covers are computed by the algorithm in
+// C.J. Colbourn and A.C.H. Ling. Quorums from Difference Covers.
+// Information Processing Letters 75(1-2):9-12, 2000
+//----------------------------------------------------------------------
+difference_cover::difference_cover(unsigned lv)
+ : logmod(lv), mask((1<<lv)-1),
+ coverrank(1<<lv), diff2pos(1<<lv,1<<lv)
+{
+ if (logmod <= max_precomputed_cover) {
+
+ // use precomputed covers
+ coversize = coversizes[logmod];
+ cover.assign(covers[logmod], covers[logmod] + coversize);
+
+ } else {
+
+ // use the Colbourn-Ling algorithm
+ int v = (1<<logmod);
+ int r = 0;
+ while (24*r*r+36*r+13 < v) ++r;
+ coversize = 6*r+4;
+ cover.resize(coversize);
+ std::vector<unsigned>::iterator i = cover.begin();
+ *i = 0; ++i;
+ std::fill(i, i+r, 1); i+=r;
+ *i = r+1; ++i;
+ std::fill(i, i+r, 2*r+1); i+=r;
+ std::fill(i, i+2*r+1, 4*r+3); i+=2*r+1;
+ std::fill(i, i+r+1, 2*r+2); i+=r+1;
+ std::fill(i, i+r, 1); i+=r;
+ if (i != cover.end()) {
+ throw std::logic_error("error in the Coulbourn-Ling algorithm");
+ }
+ std::partial_sum(cover.begin(), cover.end(), cover.begin());
+
+ }
+
+#if DEBUG > 1
+ std::cerr << "difference cover modulo " << (1<<logmod);
+ std::cerr << " of size " << coversize << ": " << std::endl;
+ std::copy(cover.begin(), cover.end(),
+ std::ostream_iterator<unsigned>(std::cerr, " ") );
+ std::cerr << std::endl;
+#endif
+
+ // compute the lookup tables
+ unsigned i, j, modulus=1<<logmod;
+ for (i=0, j=0; i<modulus; ++i) {
+ coverrank[i] = j;
+ if (j<coversize && cover[j] <= i) { ++j; }
+ }
+ for (i=coversize-1; i<coversize; --i) {
+ for (j=0; j<coversize; ++j) {
+ diff2pos[(cover[j]-cover[i])&mask] = cover[i];
+ }
+ }
+
+ // check that this is a difference cover
+ for (i=0; i<modulus; ++i) {
+ if (diff2pos[i] == modulus) {
+ throw std::logic_error("bad difference cover");
+ }
+#if DEBUG > 2
+ std::cerr << i << " = " << ((diff2pos[i]+i)&mask);
+ std::cerr << " - " << diff2pos[i];
+ std::cerr << " (mod " << modulus << ")";
+ std::cerr << std::endl;
+#endif
+ }
+}
--- /dev/null
+/*
+ * difference_cover.hpp
+ *
+ * Copyright 2003 Max-Planck-institut fuer Informatik
+ *
+ */
+
+//======================================================================
+// This file defines two classes:
+//
+// class difference_cover implements basic difference covers
+// modulo any power of two
+// class dc_sampler implements the difference cover based sampling
+// used in the suffix array construction
+//
+// Part of the implementation of class difference_cover is in
+// difference_cover.cpp
+//======================================================================
+
+#ifndef DIFFERENCE_COVER_HPP
+#define DIFFERENCE_COVER_HPP
+
+#include <vector>
+#include <utility>
+//#include <stdexcept>
+
+//----------------------------------------------------------------------
+// class difference_cover
+//
+// A difference cover D modulo v is a set of integers in the range [0,v)
+// such that for all i in [0,v), there exists j,k in D such that
+// i = k-j (mod v).
+//
+// This class represents a difference cover modulo any power of two.
+// The sizes of the difference covers for small v are:
+// v 1 2 4 8 16 32 64 128 256 512 1024 ...
+// |D| 1 2 3 4 5 7 9 13 20 28 40 ...
+// For v<=128, the size of the difference cover is minimal.
+// For any v, the size is less than sqrt(1.5*v)+6 (sqrt(v) is a lower bound).
+//
+// Notes:
+// - All arithmetic is done with unsigned int, because it guarantees
+// correct modular arithmetic including conversion to/from other
+// integral types. This is a main reason why supporting values of v
+// other than powers of two would be difficult.
+//----------------------------------------------------------------------
+class difference_cover {
+
+public:
+
+ // create a difference cover modulo 2^lv
+ explicit difference_cover(unsigned lv); // defined in difference_cover.cpp
+
+ // basic dimensions
+ unsigned modulus() const { return 1<<logmod; }
+ unsigned size() const { return coversize; }
+
+ // division by modulus
+ unsigned mod(unsigned i) const { return i&mask; }
+ template <typename IntType>
+ IntType div(IntType i) const { return i>>logmod; }
+
+ // mapping [0,modulus) <-> [0,size)
+ unsigned rank(unsigned pos) const { return coverrank[pos&mask]; }
+ unsigned pos(unsigned rank) const { return cover[rank]; }
+
+ // compute k such that i+k and j+k are both in the cover
+ unsigned offset(unsigned i, unsigned j) const {
+ return (diff2pos[(j-i)&mask]-i)&mask;
+ }
+
+ typedef std::vector<unsigned>::const_iterator iterator;
+ iterator begin() const { return cover.begin(); }
+ iterator end() const { return cover.end(); }
+
+private:
+
+ unsigned logmod; // log v
+ unsigned mask; // v-1 = 11...1 in binary
+ unsigned coversize; // |D|
+
+ std::vector<unsigned> cover; // D
+ std::vector<unsigned> coverrank; // rank of a value in D
+ std::vector<unsigned> diff2pos; // lookup table for finding a
+ // pair with a given difference
+};
+
+
+
+//----------------------------------------------------------------------
+// class dc_sampler
+//
+// Represents the periodic sample positions chosen according
+// to a difference cover.
+//
+// The main duties are:
+// 1. Fill the sparse suffix array with the sample positions.
+// 2. Provide a mapping pack() from sample positions to packed positions.
+// A packed position pack(i) of a sample position i is used as
+// the corresponding position in the inverse sparse suffix array.
+// pack(i) must satisfy:
+// a) pack(i) is unique (different for each sample position i) and
+// in the range [0,samplesize)
+// b) The difference pack(i+v)-pack(i), where v is the length of
+// the period (modulus of the difference cover), is the same for
+// all i. This difference is called the packed period.
+// 3. Find the representative pair for two arbitrary positions i and j.
+// The representative pair is a pair of positions in the inverse
+// sparse suffix array that can be used for comparing the suffixes
+// i and j if they have a common prefix of length period-1.
+// The representative pair is computed by finding k in [0,period)
+// such that i+k and j+k are both sample positions and returning
+// the packed positions corresponding to i+k and j+k.
+//----------------------------------------------------------------------
+template <typename OffsetType>
+class dc_sampler {
+public:
+
+ dc_sampler(OffsetType s, OffsetType v) : dcover(v), fullsize_(s),
+ samplesize_(dcover.div(s)*dcover.size()+dcover.rank(s))
+ {}
+
+ OffsetType fullsize() const { return fullsize_; }
+ OffsetType samplesize() const { return samplesize_; }
+ OffsetType period() const { return dcover.modulus(); }
+ OffsetType packed_period() const { return dcover.size(); }
+
+ OffsetType pack(OffsetType i) const {
+ return dcover.div(i) * dcover.size() + dcover.rank(i);
+ }
+
+ // representative pair
+ std::pair<OffsetType,OffsetType>
+ pair(OffsetType i, OffsetType j) const {
+ OffsetType k = dcover.offset(i, j);
+ return std::make_pair(pack(i+k), pack(j+k));
+ }
+
+ // fills a range with the sample positions
+ template <typename OutputIterator>
+ OutputIterator fill(OutputIterator it) const {
+ difference_cover::iterator j = dcover.begin();
+ OffsetType i = 0;
+ OffsetType pos = i + static_cast<OffsetType>(*j);
+ while (pos < fullsize()) {
+ *it++ = pos;
+ if (++j == dcover.end()) {
+ i += dcover.modulus();
+ j = dcover.begin();
+ }
+ pos = i + static_cast<OffsetType>(*j);
+ }
+ return it;
+ }
+
+private:
+
+ const difference_cover dcover;
+ const OffsetType fullsize_; // size of the range where samples are chosen
+ const OffsetType samplesize_;
+};
+
+#endif // DIFFERENCE_COVER_HPP
--- /dev/null
+/*
+ * distribute.hpp
+ *
+ * Copyright 2004 Juha K"arkk"ainen <juha.karkkainen@cs.helsinki.fi>
+ *
+ */
+
+#ifndef DISTRIBUTE_HPP
+#define DISTRIBUTE_HPP
+
+#include "dcsuffixsort.hpp"
+
+#include <iterator>
+#include <algorithm>
+#include <utility>
+
+
+//----------------------------------------------------------------------
+// suffix_lcp
+//
+// Compute the length of the longest common prefix between two
+// suffixes a and b.
+// str_end = end of the text
+// minlcp = length of the already known common prefix
+// maxlcp = stop when the length reaches maxlcp
+//----------------------------------------------------------------------
+template <class StringIterator, class LcpType>
+LcpType
+suffix_lcp(StringIterator a, StringIterator b, StringIterator str_end,
+ LcpType minlcp, LcpType maxlcp)
+{
+ if (a < b) swap(a,b); // make a the shorter suffix
+ if ((str_end-a) > maxlcp) str_end = a + maxlcp;
+ std::pair<StringIterator,StringIterator> mm =
+ std::mismatch(a+minlcp, str_end, b+minlcp);
+ //std::cout << " lcp=" << mm.first-a << "\n";
+ return static_cast<LcpType>(mm.first-a);
+}
+
+
+//----------------------------------------------------------------------
+// class suffix_distributor
+//----------------------------------------------------------------------
+template <class StringIterator, class SuffixIterator,
+ class DCSampler, class RankIterator>
+class suffix_distributor
+{
+private:
+ //typedef typename std::iterator_traits<StringIterator>::difference_type
+ // lcptype;
+ typedef short lcptype;
+ typedef typename
+ std::iterator_traits<SuffixIterator>::difference_type difftype;
+
+
+ StringIterator str, str_end;
+ SuffixIterator pivots, pivots_end;
+ std::vector<lcptype> lcps;
+ const DCSampler& dcsampler;
+ RankIterator dcranks;
+ lcptype maxlcp;
+
+ typedef typename std::vector<lcptype>::iterator lcpiterator;
+
+public:
+
+ suffix_distributor(StringIterator str_, StringIterator str_end_,
+ SuffixIterator pivots_, SuffixIterator pivots_end_,
+ const DCSampler& dcsampler_, RankIterator dcranks_)
+ : str(str_), str_end(str_end_), pivots(pivots_), pivots_end(pivots_end_),
+ lcps(pivots_end-pivots), dcsampler(dcsampler_), dcranks(dcranks_),
+ maxlcp(dcsampler.period())
+ {
+ compute_lcps();
+ }
+
+ void reset(SuffixIterator pivots_, SuffixIterator pivots_end_)
+ {
+ pivots=pivots_;
+ pivots_end=pivots_end_;
+ lcps.resize(pivots_end-pivots);
+
+ compute_lcps();
+ }
+
+
+ difftype nbuckets() const{ return pivots_end-pivots+1; }
+
+ difftype find_bucket(StringIterator suf) const {
+ return binary_search(suf);
+ //return i - pivots;
+ }
+
+ void
+ print() const {
+#if DEBUG > 1
+ std::cout << "distributor memory="
+ << lcps.size()*sizeof(lcptype)
+ << std::endl;
+#if DEBUG > 2
+ std::cout << "pivots and lcps:\n";
+ for (SuffixIterator i = pivots; i != pivots_end; ++i) {
+ std::cout << std::setw(4) << i-pivots
+ << std::setw(4) << int(lcps[i-pivots])
+ << std::setw(9) << *i << " ";
+ std::copy(str+*i, std::min(str_end, str+*i+40),
+ std::ostream_iterator<char>(std::cout,""));
+ std::cout << "\n";
+ }
+ std::cout << "distributor end\n";
+#endif
+#endif
+ }
+
+
+private:
+
+ bool dc_less_or_equal(difftype a, difftype b) const {
+ std::pair<difftype,difftype> rep = dcsampler.pair(a, b);
+ return dcranks[rep.first] <= dcranks[rep.second];
+ }
+
+ //----------------------------------------------------------------------
+ // compute_lcps_bounded
+ //
+ // Compute all the lcp values in a subrange bounded by
+ // actual pivot suffixes.
+ //----------------------------------------------------------------------
+ void
+ compute_lcps_bounded(SuffixIterator left_suf, SuffixIterator right_suf,
+ lcpiterator left_lcp,
+ lcptype minlcp, lcptype maxlcp)
+ {
+ if (left_suf == right_suf) return;
+ difftype midpoint = (right_suf-left_suf)/2;
+ //std::cout << " midpoint of [0," << right_suf-left_suf
+ // << ")=" << midpoint << "\n";
+
+ SuffixIterator mid_suf = left_suf + midpoint;
+ lcpiterator mid_lcp = left_lcp + midpoint;
+
+ lcptype llcp_left = suffix_lcp(str+*(left_suf-1), str+*mid_suf,
+ str_end, minlcp, maxlcp);
+ lcptype llcp_right = suffix_lcp(str+*mid_suf, str+*right_suf,
+ str_end, minlcp, maxlcp);
+ *mid_lcp = llcp_right - llcp_left;
+ //std::cout << *mid_lcp << std::endl;
+
+ compute_lcps_bounded(left_suf, mid_suf, left_lcp, llcp_left, maxlcp);
+ compute_lcps_bounded(mid_suf+1, right_suf, mid_lcp+1, llcp_right, maxlcp);
+ }
+
+
+ //----------------------------------------------------------------------
+ // compute_lcps
+ //
+ // Compute all the lcp values in the full range.
+ //----------------------------------------------------------------------
+ void
+ compute_lcps()
+ {
+ if (pivots == pivots_end) return;
+ difftype centerpoint = (pivots_end - pivots)/2;
+ //std::cout << "midpoint of [0," << (pivots_end - pivots) << ")="
+ // << centerpoint << "\n";
+ lcps[centerpoint] = 0;
+
+ // unbounded ranges on the left
+ difftype rightpoint = centerpoint;
+ while (rightpoint > 0) {
+ difftype midpoint = rightpoint/2;
+ //std::cout << "midpoint of [0," << rightpoint << ")=" << midpoint << "\n";
+ lcptype llcp = suffix_lcp(str+pivots[midpoint],
+ str+pivots[rightpoint],
+ str_end, static_cast<lcptype>(0), maxlcp);
+ lcps[midpoint] = llcp;
+ //std::cout << llcp << std::endl;
+ compute_lcps_bounded(pivots+midpoint+1, pivots+rightpoint,
+ lcps.begin()+midpoint+1, llcp, maxlcp);
+ rightpoint = midpoint;
+ }
+
+ // unbounded ranges on the right
+ SuffixIterator left_suf = pivots + centerpoint + 1;
+ lcpiterator left_lcp = lcps.begin() + centerpoint + 1;
+ while (left_suf != pivots_end) {
+ difftype midpoint = (pivots_end - left_suf)/2;
+ //std::cout << "midpoint of [" << (left_suf-pivots) << "," <<
+ // (pivots_end-pivots) << ")=" << (left_suf-pivots)+midpoint << "\n";
+ lcptype llcp = suffix_lcp(str+left_suf[-1], str+left_suf[midpoint],
+ str_end, static_cast<lcptype>(0), maxlcp);
+ left_lcp[midpoint] = -llcp;
+ //std::cout << -llcp << std::endl;
+ compute_lcps_bounded(left_suf, left_suf+midpoint, left_lcp,
+ llcp, maxlcp);
+ left_suf += midpoint + 1;
+ left_lcp += midpoint + 1;
+ }
+
+ }
+
+
+
+#ifndef BINSEARCH
+
+//----------------------------------------------------------------------
+// binary_search
+//----------------------------------------------------------------------
+difftype
+//SuffixIterator
+binary_search(StringIterator query) const
+{
+ //SuffixIterator left = pivots;
+ //SuffixIterator right = pivots_end;
+ //SuffixIterator mid;
+
+ difftype left = 0;
+ difftype right = pivots_end-pivots;
+ difftype mid;
+
+ lcptype minlcp = 0;
+ lcptype lcp_diff = 0;
+ lcptype lcp_left, lcp_right;
+
+ in_the_middle:
+ while (true) {
+ /*
+ std::cout << "middle minlcp=" << int(minlcp)
+ << " lcp_diff=" << int(lcp_diff)
+ << " mid=" << left + (right-left)/2
+ << "\n";
+ */
+ if (left == right) return left;
+ mid = left + (right-left)/2;
+ if (str+pivots[mid] == query) return mid+1;
+ lcptype lcp_mid = lcps[mid];
+ if (lcp_mid == 0) {
+ lcptype newlcp = suffix_lcp(query, str+pivots[mid], str_end,
+ minlcp, maxlcp);
+ if (newlcp == maxlcp) {
+ lcp_left = lcp_right = minlcp;
+ goto fullmatch;
+ }
+ if (query+newlcp == str_end ||
+ query[newlcp] < str[pivots[mid]+newlcp]) {
+ right = mid;
+ lcp_diff = newlcp - minlcp;
+ if (lcp_diff != 0) goto closer_to_right;
+ } else {
+ left = mid+1;
+ lcp_diff = minlcp - newlcp;
+ if (lcp_diff != 0) goto closer_to_left;
+ }
+ } else {
+ if (lcp_mid < lcp_diff) {
+ left = mid+1;
+ } else {
+ right = mid;
+ }
+ }
+ }
+
+ closer_to_left:
+ while (true) {
+ /*
+ std::cout << "left minlcp=" << int(minlcp)
+ << " lcp_diff=" << int(lcp_diff)
+ << " mid=" << left + (right-left)/2
+ << "\n";
+ */
+ if (left == right) return left;
+ mid = left + (right-left)/2;
+ if (str+pivots[mid] == query) return mid+1;
+ lcptype lcp_mid = lcps[mid];
+ if (lcp_mid > lcp_diff) {
+ if (lcp_mid < 0) {
+ lcp_diff -= lcp_mid;
+ minlcp -= lcp_mid;
+ }
+ right = mid;
+ } else {
+ if (lcp_mid == lcp_diff) {
+ lcptype lcp = minlcp - lcp_diff;
+ lcptype newlcp = suffix_lcp(query, str+pivots[mid], str_end,
+ lcp, maxlcp);
+ if (newlcp == maxlcp) {
+ lcp_left = lcp;
+ lcp_right = minlcp;
+ goto fullmatch;
+ }
+ if (query+newlcp == str_end ||
+ query[newlcp] < str[pivots[mid]+newlcp]) {
+ right = mid;
+ minlcp = lcp;
+ lcp_diff = newlcp - minlcp;
+ if (lcp_diff == 0) goto in_the_middle;
+ else goto closer_to_right;
+ } else {
+ left = mid+1;
+ lcp_diff = minlcp - newlcp;
+ }
+ } else {
+ left = mid+1;
+ }
+ }
+ }
+
+ closer_to_right:
+ while (true) {
+ /*
+ std::cout << "right minlcp=" << int(minlcp)
+ << " lcp_diff=" << int(lcp_diff)
+ << " mid=" << left + (right-left)/2
+ << "\n";
+ */
+ if (left == right) return left;
+ mid = left + (right-left)/2;
+ if (str+pivots[mid] == query) return mid+1;
+ lcptype lcp_mid = lcps[mid];
+ if (lcp_mid < lcp_diff) {
+ if (lcp_mid > 0) {
+ lcp_diff -= lcp_mid;
+ minlcp += lcp_mid;
+ }
+ left = mid+1;
+ } else {
+ if (lcp_mid == lcp_diff) {
+ lcptype lcp = minlcp + lcp_diff;
+ lcptype newlcp = suffix_lcp(query, str+pivots[mid], str_end,
+ lcp, maxlcp);
+ if (newlcp == maxlcp) {
+ lcp_left = minlcp;
+ lcp_right = lcp;
+ goto fullmatch;
+ }
+ if (query+newlcp == str_end ||
+ query[newlcp] < str[pivots[mid]+newlcp]) {
+ right = mid;
+ lcp_diff = newlcp - minlcp;
+ } else {
+ left = mid+1;
+ minlcp = lcp;
+ lcp_diff = minlcp - newlcp;
+ if (lcp_diff == 0) goto in_the_middle;
+ else goto closer_to_left;
+ }
+ }
+ right = mid;
+ }
+ }
+
+ fullmatch:
+ // find the range of fully matching pivots
+ //std::cout << "fullmatch: left\n";
+ {
+ difftype right_tmp = mid;
+ while (left != right_tmp) {
+ difftype mid = left + (right_tmp-left)/2;
+ //std::cout << left << mid << right_tmp << std::endl;
+ difftype lcp_diff = lcps[mid];
+ //std::cout << " " << lcp_left << " " << lcp_diff << std::endl;
+ if (lcp_left + lcp_diff == maxlcp) {
+ right_tmp = mid;
+ } else {
+ if (lcp_diff > 0) { lcp_left += lcp_diff; }
+ left = mid+1;
+ }
+ }
+ }
+ //std::cout << "fullmatch: right\n";
+ {
+ difftype left_tmp = mid+1;
+ while (left_tmp != right) {
+ difftype mid = left_tmp + (right-left_tmp)/2;
+ //std::cout << left_tmp << mid << right << std::endl;
+ difftype lcp_diff = lcps[mid];
+ //std::cout << " " << lcp_right << " " << lcp_diff << std::endl;
+ if (lcp_right - lcp_diff == maxlcp) {
+ left_tmp = mid+1;
+ } else {
+ if (lcp_diff < 0) { lcp_right -= lcp_diff; }
+ right = mid;
+ }
+ }
+ }
+ //std::cout << "fullmatch: dcsearch\n";
+ while (left != right) {
+ difftype mid = left + (right-left)/2;
+ //std::cout << left << mid << right << std::endl;
+ if (dc_less_or_equal(pivots[mid], query-str)) {
+ left = mid+1;
+ } else {
+ right = mid;
+ }
+ }
+ return left;
+
+}
+
+#endif
+
+};
+
+
+#endif // DISTRIBUTE_HPP
--- /dev/null
+/*
+ * doubling.hpp
+ *
+ * Copyright 2003 Max-Planck-Institut fuer Informatik
+ *
+ */
+
+//======================================================================
+// Doubling algorithm for suffix array construction.
+// Follows closely the algorithm described in
+//
+// N.J. Larsson and K. Sadakane. Faster Suffix Sorting.
+// Technical Report LU-CS-TR:99-214, Dept. of Computer Science,
+// Lund University, Sweden, 1999.
+//======================================================================
+
+#ifndef DOUBLING_HPP
+#define DOUBLING_HPP
+
+#include "partition.hpp"
+
+#include <iterator>
+#include <algorithm>
+#include <utility>
+#include <sstream>
+#include <stdexcept>
+
+//----------------------------------------------------------------------
+// functor isa_access
+//
+// Given a suffix array iterator computes the corresponding
+// inverse suffix array entry to be used in comparisons.
+//----------------------------------------------------------------------
+template <typename ISAIterator, typename SAIterator>
+class isa_access
+ : public std::unary_function<SAIterator,
+ typename std::iterator_traits<ISAIterator>::value_type>
+{
+private:
+ typedef typename std::iterator_traits<ISAIterator>::difference_type
+ difference_type;
+ const ISAIterator isa;
+public:
+ isa_access(ISAIterator s, difference_type lcp) : isa(s+lcp) {}
+ typename std::iterator_traits<ISAIterator>::value_type
+ operator() (SAIterator i) const {
+ return isa[*i];
+ }
+};
+
+//----------------------------------------------------------------------
+// function update_group
+//
+// Update a group after doubling.
+// WARNING: cannot handle an empty group
+//----------------------------------------------------------------------
+template <typename SAIterator, typename ISAIterator>
+void update_group(SAIterator beg, SAIterator end,
+ SAIterator sa, ISAIterator isa)
+{
+ typedef typename std::iterator_traits<SAIterator>::value_type
+ pos_type;
+ pos_type groupnumber = std::distance(sa, end) - 1;
+ if (end-beg > 1) {
+ for (SAIterator i = beg; i != end; ++i) {
+ isa[*i] = groupnumber;
+ }
+ } else {
+ isa[*beg] = groupnumber;
+ *beg = -1;
+ }
+}
+
+
+//----------------------------------------------------------------------
+// function selection_partition
+//
+// Performs doubling partitioning for small groups.
+//----------------------------------------------------------------------
+template <typename SAIterator, typename ISAIterator>
+void selection_partition(SAIterator sa, ISAIterator isa,
+ SAIterator beg, SAIterator end,
+ typename std::iterator_traits<SAIterator>::difference_type lcp)
+{
+ typedef typename std::iterator_traits<SAIterator>::value_type
+ pos_type;
+ isa_access<ISAIterator, SAIterator> key(isa, lcp);
+ while (end-beg > 1) {
+ SAIterator min_end = beg+1;
+ pos_type min_key = key(beg);
+ for (SAIterator i = beg+1; i != end; ++i) {
+ if (key(i) < min_key) {
+ std::swap(*beg, *i);
+ min_end = beg+1;
+ min_key = key(beg);
+ } else if (!(min_key < key(i))) {
+ std::swap(*min_end, *i);
+ ++min_end;
+ }
+ }
+ update_group(beg, min_end, sa, isa);
+ beg = min_end;
+ }
+ if (beg != end) update_group(beg, end, sa, isa);
+}
+
+
+//----------------------------------------------------------------------
+// function doubling_partition
+//
+// Given a group of suffixes with a common prefix of length lcp
+// partition it to subgroups according to the next lcp characters.
+// The next lcp characters are represented by a single value
+// in the inverse suffix array.
+//----------------------------------------------------------------------
+template <typename SAIterator, typename ISAIterator>
+void doubling_partition(SAIterator sa, ISAIterator isa,
+ SAIterator beg, SAIterator end,
+ typename std::iterator_traits<SAIterator>::difference_type lcp)
+{
+ typedef typename std::iterator_traits<SAIterator>::value_type
+ pos_type;
+
+ while (end-beg > 10) {
+
+ // ternary partition
+ isa_access<ISAIterator, SAIterator> get_isa_entry(isa, lcp);
+ SAIterator pivot = random_pivot(beg, end, std::less<pos_type>(),
+ get_isa_entry);
+ std::pair<SAIterator, SAIterator> midrange
+ = ternary_partition(beg, end, pivot, std::less<pos_type>(),
+ get_isa_entry);
+
+ // recurse on <-part
+ if (beg != midrange.first) {
+ doubling_partition(sa, isa, beg, midrange.first, lcp);
+ }
+
+ // update =-part
+ update_group(midrange.first, midrange.second, sa, isa);
+
+ // loop on >-part
+ beg = midrange.second;
+ }
+
+ selection_partition(sa, isa, beg, end, lcp);
+}
+
+
+
+//======================================================================
+// function doubling_sort
+//
+// The main doubling algorithm
+//======================================================================
+template <typename SAIterator, typename ISAIterator>
+void
+doubling_sort(SAIterator sa, SAIterator sa_end,
+ ISAIterator isa, ISAIterator isa_end,
+ typename std::iterator_traits<ISAIterator>::difference_type lcp)
+{
+ typedef typename std::iterator_traits<SAIterator>::difference_type
+ difference_type;
+
+ difference_type size = std::distance(sa, sa_end);
+ if (isa_end-isa != size) {
+ std::ostringstream os;
+ os << "doubling_sort:"
+ << " wrong range size";
+ throw std::logic_error(os.str());
+ }
+
+ while (*sa > -size) {
+
+ SAIterator beg = sa;
+ difference_type nrunlen = 0;
+ while (beg != sa_end) {
+ difference_type val = *beg;
+ if (val < 0) {
+ // jump over already sorted part
+ beg -= val;
+ nrunlen += val;
+ } else {
+ // combine preceding sorted parts
+ if (nrunlen < 0) {
+ *(beg+nrunlen) = nrunlen;
+ nrunlen = 0;
+ }
+ // partition unsorted group
+ SAIterator end = sa + isa[val] + 1;
+ doubling_partition(sa, isa, beg, end, lcp);
+ beg = end;
+ }
+ }
+ if (nrunlen < 0) {
+ *(beg+nrunlen) = nrunlen;
+ }
+
+ lcp *= 2;
+ }
+}
+
+
+#endif // DOUBLING_HPP
--- /dev/null
+
+#include <vector>
+#include <iostream>
+#include <iterator>
+
+template <class Iterator, class DCSampler, class RankIterator>
+class kmp_split {
+public:
+
+ kmp_split(Iterator s, Iterator se, Iterator p,
+ const DCSampler& d, RankIterator r)
+ : str(s), str_end(se), suf(str), pivot(p), pivot_end(p+d.period()-1),
+ last(1), last_lcp(0), dcsampler(d), dcranks(r)
+ {
+ preprocess();
+ }
+
+private:
+
+ bool dc_less(Iterator a, Iterator b) const {
+ std::pair<int,int> rep = dcsampler.pair(a-str, b-str);
+ return dcranks[rep.first] <= dcranks[rep.second];
+ }
+
+public:
+
+ bool is_next_less() {
+ // state:
+ // suf == str_end ||
+ // last_lcp == lcp((suf-last), pivot) &&
+ // last_less == (suf-last)+last_lcp == str_end ||
+ // (suf-last)[last_lcp] < pivot[last_lcp]
+
+ bool result = true;
+ if (suf==str_end) return true;
+ if (suf==pivot) {
+ last_lcp = pivot_end-pivot;
+ last_less = false;
+ ++suf; last=1;
+ return false;
+ }
+
+ /*
+ std::cout << " last=" << last
+ << " last_lcp=" << last_lcp
+ << " last+lcp[last]=" << last+lcp[last]
+ << " last_less=" << last_less
+ << " less[last]=" << bool(less[last]);
+ */
+
+ if (last_lcp < last || last_lcp == last+lcp[last]) {
+ int lcp;
+ if (last_lcp < last) lcp = 0;
+ else lcp = last_lcp - last;
+ while (suf+lcp != str_end && pivot+lcp != pivot_end
+ && suf[lcp]==pivot[lcp]) {
+ ++lcp;
+ }
+ /*
+ std::cout << " lcp=" << lcp
+ << " suf+lcp-str=" << suf+lcp-str
+ << " str_end-str=" << str_end-str
+ << " suf+lcp==str=" << (suf+lcp==str);
+ */
+ result = (suf+lcp==str_end)
+ || (pivot+lcp==pivot_end ? dc_less(suf, pivot) : suf[lcp] < pivot[lcp]);
+ //std::cout << " result=" << result << std::endl;
+ last_lcp = lcp;
+ last_less = result;
+ ++suf; last = 1;
+ } else if (last_lcp > last+lcp[last]) {
+ result = !less[last];
+ ++suf; ++last;
+ } else if (last_lcp < last+lcp[last]) {
+ result = last_less;
+ last_lcp -= last;
+ ++suf; last = 1;
+ }
+
+ //std::cout << " result=" << result << std::endl;
+ return result;
+ }
+
+ //81 0 212 cabcbacacabcbacacabcbacacabcbacacabcbaca
+
+private:
+ Iterator str;
+ Iterator str_end;
+ Iterator suf;
+ Iterator pivot;
+ Iterator pivot_end;
+
+ int last;
+ int last_lcp;
+ bool last_less;
+
+ std::vector<int> lcp;
+ std::vector<char> less;
+
+ const DCSampler& dcsampler;
+ RankIterator dcranks;
+
+ void preprocess() {
+
+ int length = pivot_end-pivot;
+
+ int i = 0;
+ int j = 1;
+
+ /*
+ std::vector<int> shift(length+1);
+ shift[0] = -1;
+
+ while (true) {
+ while ( j < length && pivot[i] == pivot[j] ) {
+ shift[j] = shift[i];
+ ++i; ++j;
+ }
+ shift[j] = i;
+ if (j == length) break;
+ do {
+ i = shift[i];
+ } while ( i != -1 && pivot[i] != pivot[j] );
+ if (i==-1) { ++i; ++j; }
+ }
+
+ std::copy (shift.begin(), shift.end(),
+ std::ostream_iterator<int>(std::cout, " "));
+ std::cout << std::endl;
+ */
+
+ lcp.resize(length+1);
+ less.resize(length+1);
+
+ lcp[0] = length;
+ less[0] = 0;
+ i = 0; j = 1;
+
+ while (j <= length) {
+ while ( j < length && pivot[i] == pivot[j] ) {
+ ++i; ++j;
+ }
+ if ( j == length || pivot[i] < pivot[j] ) {
+ lcp[j-i] = i;
+ less[j-i] = 1;
+ } else {
+ lcp[j-i] = i;
+ less[j-i] = 0;
+ }
+ int k = i;
+ do {
+ --i;
+ if (i == -1) {
+ ++i; ++j;
+ break;
+ } else if (lcp[k-i] == i) {
+ break;
+ } else if (lcp[k-i] < i) {
+ lcp[j-i] = lcp[k-i];
+ less[j-i] = less[k-i];
+ } else {
+ lcp[j-i] = i; // i == lcp[j-k]
+ less[j-i] = less[j-k];
+ }
+ } while (true);
+ }
+
+ /*
+ std::copy (lcp.begin(), lcp.end(),
+ std::ostream_iterator<int>(std::cout, " "));
+ std::cout << std::endl;
+ std::copy (less.begin(), less.end(),
+ std::ostream_iterator<int>(std::cout, " "));
+ std::cout << std::endl;
+ */
+ }
+
+
+};
+
+
+/*
+
+gagcgag
+gagcgag 7 0
+ g 0 0
+ ga 1 1
+ g 0 0
+ gag 3 1
+ g 0 0
+ g 1 1
+ 0 1
+
+aagagcgagta
+
+*/
--- /dev/null
+/*
+ * partition.hpp
+ *
+ * Copyright 2003 Max-Planck-Institut fuer Informatik
+ *
+ */
+
+//======================================================================
+// A collection of flexible, generic, STL-like algorithm components
+// for comparison-based distribution (i.e., quicksort-like) sorting.
+//======================================================================
+
+#ifndef PARTITION_HPP
+#define PARTITION_HPP
+
+#include <algorithm>
+#include <utility>
+#include <functional>
+#include <iterator>
+#include <cstdlib> // for rand()
+
+//======================================================================
+// Most of the algorithms here have three versions. For example a sort
+// function can have the following forms:
+// sort(beg, end);
+// sort(beg, end, less);
+// sort(beg, end, less, key);
+// The first two forms are standard STL. The third form adds an
+// argument key, which is a unary function. Where the second
+// form would compare two iterators i and j as less(*i, *j),
+// the third form does less(key(i), key(j)) instead. This allows
+// moving some of the computation related to the comparison from
+// less() into key(). When the same iterator is used in many
+// comparisons (for example the pivot), key() needs to be evaluated
+// only once.
+//
+// For example, to sort objects of type T with a member function
+// size() returning int into increasing order of size() do
+// (TODO: test/expand this, use vector of vectors)
+//
+// sort(beg, end, std::less<unsigned>(), std::mem_fun(&T::size));
+//
+// key() may return a pointer or reference to (a member of) the object.
+// All the algorithms assume that the value returned by key() stays
+// valid only as long as the object is not moved.
+//
+// TYPE REQUIREMENTS
+// For example:
+// template <typename Iterator, typename Compare, typename KeyAccess>
+// void sort(Iterator, Iterator, Compare, KeyAccess);
+// * KeyAccess is a model of Adaptable Unary Function
+// * Iterator is convertible to KeyAccess::argument_type
+// * KeyAccess::result_type is convertible to the argument type
+// of Compare.
+//======================================================================
+
+//----------------------------------------------------------------------
+// functor copy_dereference
+// functor const_dereference
+//
+// Default functors to be used as key().
+// (TODO: which one should be used as the default?)
+// (TODO: is const_dereference::result_type a problem?)
+//----------------------------------------------------------------------
+template <typename Iterator>
+class const_dereference
+ : public std::unary_function<Iterator,
+ const typename std::iterator_traits<Iterator>::value_type &>
+{
+public:
+ const typename std::iterator_traits<Iterator>::value_type &
+ operator() (Iterator i) const { return *i; }
+};
+
+template <typename Iterator>
+class copy_dereference
+ : public std::unary_function<Iterator,
+ typename std::iterator_traits<Iterator>::value_type>
+{
+public:
+ typename std::iterator_traits<Iterator>::value_type
+ operator() (Iterator i) const { return *i; }
+};
+
+template <typename Iterator>
+class default_key : public copy_dereference<Iterator> {};
+
+
+
+//======================================================================
+//
+// Pivot selection
+//
+// (TODO: non-randomized pivot selectors)
+//======================================================================
+
+//----------------------------------------------------------------------
+// function iter_median3
+//
+// Median of three
+//----------------------------------------------------------------------
+template <typename Iterator, typename Compare, typename KeyAccess>
+Iterator
+iter_median3(Iterator a, Iterator b, Iterator c,
+ Compare less, KeyAccess key) {
+ typedef typename KeyAccess::result_type key_type;
+ key_type ka = key(a), kb = key(b), kc = key(c);
+ if (less(ka, kb))
+ if (less(kb, kc)) return b;
+ else if (less(ka, kc)) return c;
+ else return a;
+ else if (less(ka, kc)) return a;
+ else if (less(kb, kc)) return c;
+ else return b;
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+template <typename Iterator, typename Compare>
+Iterator
+iter_median3(Iterator a, Iterator b, Iterator c, Compare less) {
+ return iter_median3(a, b, c, less, default_key<Iterator>());
+}
+
+template <typename Iterator>
+Iterator
+iter_median3(Iterator a, Iterator b, Iterator c) {
+ typedef typename std::iterator_traits<Iterator>::value_type key_type;
+ return iter_median3(a, b, c, std::less<key_type>(),
+ default_key<Iterator>());
+}
+
+
+//----------------------------------------------------------------------
+// function random_pivot
+//
+// Choose pivot of as a median of three random elements
+// (or as a single random element for small ranges)
+//
+// (TODO: better/faster random number generation?)
+//----------------------------------------------------------------------
+template <typename RandomAccessIterator, typename Compare,
+ typename KeyAccess>
+RandomAccessIterator
+random_pivot (RandomAccessIterator beg, RandomAccessIterator end,
+ Compare less, KeyAccess key)
+{
+ static double scale_to_one = 1.0L/(RAND_MAX+1.0);
+
+ typedef typename std::iterator_traits<RandomAccessIterator>::difference_type
+ difference_type;
+ difference_type size = std::distance(beg, end);
+ double scale_to_size = size * scale_to_one;
+ RandomAccessIterator pivot;
+ pivot = beg + static_cast<difference_type>(scale_to_size*std::rand());
+ if (size > 50) {
+ RandomAccessIterator pivot2, pivot3;
+ pivot2 = beg + static_cast<difference_type>(scale_to_size*std::rand());
+ pivot3 = beg + static_cast<difference_type>(scale_to_size*std::rand());
+ pivot = iter_median3(pivot, pivot2, pivot3, less, key);
+ }
+ return pivot;
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+template <typename RandomAccessIterator, typename Compare>
+RandomAccessIterator
+random_pivot(RandomAccessIterator beg, RandomAccessIterator end,
+ Compare less)
+{
+ return random_pivot(beg, end, less,
+ default_key<RandomAccessIterator>());
+}
+
+template <typename RandomAccessIterator>
+RandomAccessIterator
+random_pivot(RandomAccessIterator beg, RandomAccessIterator end)
+{
+ typedef typename std::iterator_traits<RandomAccessIterator>::value_type
+ key_type;
+ return random_pivot(beg, end, std::less<key_type>(),
+ default_key<RandomAccessIterator>());
+}
+
+
+
+
+//======================================================================
+// Partitioning
+//======================================================================
+
+//----------------------------------------------------------------------
+// function binary_partition
+//
+// Partitions the input range [beg,end) into two parts [beg,mid) and
+// [mid,end) with all elements smaller than pivot in [beg, mid) and
+// all elements larger than pivot in [mid,end). Equal elements may
+// be in either subrange. Returns mid.
+// PRECONDITION: pivot is in the range [beg,end)
+//----------------------------------------------------------------------
+template <typename RandomAccessIterator, typename Compare,
+ typename KeyAccess>
+RandomAccessIterator
+binary_partition(RandomAccessIterator beg, RandomAccessIterator end,
+ RandomAccessIterator pivot, Compare less, KeyAccess key)
+{
+ if (beg == --end) return beg;
+
+ // move the pivot to a safe place and setup sentinels
+ std::swap(*beg, *pivot);
+ if (less(key(end), key(beg))) {
+ std::swap(*beg, *end);
+ pivot = end;
+ } else {
+ pivot = beg;
+ }
+
+ // now the pivot won't move anymore
+ typename KeyAccess::result_type pivot_key = key(pivot);
+
+ while (true) {
+ do { ++beg; } while (less(key(beg), pivot_key));
+ do { --end; } while (less(pivot_key, key(end)));
+ if (!(beg < end)) return beg;
+ std::swap(*beg, *end);
+ }
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+template <typename RandomAccessIterator, typename Compare>
+RandomAccessIterator
+binary_partition(RandomAccessIterator beg, RandomAccessIterator end,
+ RandomAccessIterator pivot, Compare less)
+{
+ return binary_partition(beg, end, pivot, less,
+ default_key<RandomAccessIterator>());
+}
+
+template <typename RandomAccessIterator>
+RandomAccessIterator
+binary_partition(RandomAccessIterator beg, RandomAccessIterator end,
+ RandomAccessIterator pivot)
+{
+ typedef typename std::iterator_traits<RandomAccessIterator>::value_type
+ key_type;
+ return binary_partition(beg, end, pivot, std::less<key_type>(),
+ default_key<RandomAccessIterator>());
+}
+
+
+//----------------------------------------------------------------------
+// function ternary_partition
+//
+// Partitions the input range [beg,end) into three parts according to
+// comparisons to the pivot: | less | equal | greater |
+// Returns the middle (equal) range.
+// PRECONDITION: pivot is in the range [beg,end)
+//
+// METHOD
+// The algorithm maintains the following invariant:
+//
+// | equal | less | unknown | greater | equal |
+// ^ ^ ^ ^ ^ ^
+// beg first_less left right last_greater end
+//
+//----------------------------------------------------------------------
+template <typename RandomAccessIterator, typename Compare,
+ typename KeyAccess>
+std::pair<RandomAccessIterator,RandomAccessIterator>
+ternary_partition(RandomAccessIterator beg, RandomAccessIterator end,
+ RandomAccessIterator pivot, Compare less,
+ KeyAccess key)
+{
+ RandomAccessIterator left = beg, right = end-1;
+ RandomAccessIterator first_less = left, last_greater = right;
+
+ // first move pivot to a safe place and setup sentinels
+ std::iter_swap(pivot, left);
+ if (less(key(right), key(left))) {
+ std::iter_swap(left, right);
+ pivot = right;
+ ++left; --right; --last_greater;
+ } else {
+ pivot = left;
+ ++left; ++first_less;
+ }
+
+ // now the pivot won't move anymore
+ typename KeyAccess::result_type pivot_key = key(pivot);
+
+ while(true) {
+ while(less(key(left), pivot_key)) ++left;
+ while (!less(pivot_key, key(left)) && left < right) {
+ std::iter_swap(left++, first_less++);
+ while(less(key(left), pivot_key)) ++left;
+ }
+ while(less(pivot_key, key(right))) --right;
+ while (!less(key(right), pivot_key) && left < right) {
+ std::iter_swap(right--, last_greater--);
+ while(less(pivot_key, key(right))) --right;
+ }
+ if (!(left < right)) break;
+ std::iter_swap(left++, right--);
+ }
+
+ // now the situation is this:
+ //
+ // | equal | less | greater | equal |
+ // ^ ^ ^ ^ ^ ^
+ // beg first_less right left last_greater end
+ //
+ // or this:
+ //
+ // | equal | less |=| greater | equal |
+ // ^ ^ ^ ^ ^
+ // beg first_less left last_greater end
+ // right
+ //
+ // next: swap equal parts to the middle
+
+ typename std::iterator_traits<RandomAccessIterator>::difference_type size;
+ size = std::min(first_less-beg, left-first_less);
+ std::swap_ranges(beg, beg+size, left-size);
+ size = std::min(end-1-last_greater, last_greater-right);
+ std::swap_ranges(right+1, right+1+size, end-size);
+ return std::make_pair(beg+(left-first_less), end-(last_greater-right));
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+template <typename RandomAccessIterator, typename Compare>
+std::pair<RandomAccessIterator,RandomAccessIterator>
+ternary_partition(RandomAccessIterator beg, RandomAccessIterator end,
+ RandomAccessIterator pivot, Compare less)
+{
+ return ternary_partition(beg, end, pivot, less,
+ default_key<RandomAccessIterator>());
+}
+
+template <typename RandomAccessIterator>
+std::pair<RandomAccessIterator,RandomAccessIterator>
+ternary_partition(RandomAccessIterator beg, RandomAccessIterator end,
+ RandomAccessIterator pivot)
+{
+ typedef typename std::iterator_traits<RandomAccessIterator>::value_type
+ key_type;
+ return ternary_partition(beg, end, pivot, std::less<key_type>(),
+ default_key<RandomAccessIterator>());
+}
+
+
+//======================================================================
+// Sorting
+//======================================================================
+
+//----------------------------------------------------------------------
+// function insertion_sort
+//
+//----------------------------------------------------------------------
+template <typename RandomAccessIterator, typename Compare,
+ typename KeyAccess>
+void
+insertion_sort(RandomAccessIterator beg, RandomAccessIterator end,
+ Compare less, KeyAccess key)
+{
+ if (end-beg <= 1) return;
+ for (RandomAccessIterator i = beg+1; i != end; ++i) {
+ typename KeyAccess::result_type i_key = key(i); // i is not moved ...
+ RandomAccessIterator j = i-1;
+ if (!less(i_key, key(j))) continue;
+ for ( ; j != beg; --j) {
+ if (!less(i_key, key(j-1))) break;
+ std::swap(*(j-1), *j);
+ }
+ std::swap(*i, *j); // ... until here
+ }
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+template <typename RandomAccessIterator, typename Compare>
+void
+insertion_sort(RandomAccessIterator beg, RandomAccessIterator end,
+ Compare less)
+{
+ insertion_sort(beg, end, less, default_key<RandomAccessIterator>());
+}
+
+template <typename RandomAccessIterator>
+void
+insertion_sort(RandomAccessIterator beg, RandomAccessIterator end)
+{
+ typedef typename std::iterator_traits<RandomAccessIterator>::value_type
+ key_type;
+ insertion_sort(beg, end, std::less<key_type>(),
+ default_key<RandomAccessIterator>());
+}
+
+
+//----------------------------------------------------------------------
+// function quicksort
+//
+//----------------------------------------------------------------------
+template <typename RandomAccessIterator, typename Compare,
+ typename KeyAccess>
+void
+quicksort(RandomAccessIterator beg, RandomAccessIterator end,
+ Compare less, KeyAccess key)
+{
+ while (end-beg > 15) {
+ RandomAccessIterator pivot = random_pivot(beg, end, less, key);
+ RandomAccessIterator mid = binary_partition(beg, end, pivot, less, key);
+ quicksort(beg, mid, less, key);
+ beg = mid;
+ }
+ insertion_sort(beg, end, less, key);
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+template <typename RandomAccessIterator, typename Compare>
+void
+quicksort(RandomAccessIterator beg, RandomAccessIterator end, Compare less)
+{
+ quicksort(beg, end, less, default_key<RandomAccessIterator>());
+}
+
+template <typename RandomAccessIterator>
+void
+quicksort(RandomAccessIterator beg, RandomAccessIterator end)
+{
+ typedef typename std::iterator_traits<RandomAccessIterator>::value_type
+ key_type;
+ quicksort(beg, end, std::less<key_type>(),
+ default_key<RandomAccessIterator>());
+}
+
+
+#endif // PARTITION_HPP
--- /dev/null
+/*
+ * stringsort.hpp
+ *
+ * Copyright 2003 Max-Planck-Institut fuer Informatik
+ *
+ */
+
+//======================================================================
+// Multikey quicksort for suffixes
+//======================================================================
+
+#ifndef STRINGSORT_HPP
+#define STRINGSORT_HPP
+
+#include "partition.hpp"
+
+#include <algorithm>
+#include <iterator>
+#include <functional>
+#include <vector>
+
+
+//----------------------------------------------------------------------
+// function string_compare
+//
+// Compare two strings until a mismatch but no longer than limit
+// characters. The sign of the result indicates the order of the
+// strings and the absolute value the distance of the mismatch from
+// the limit. Zero means that no mismatch was found within limit
+// characters.
+//
+// WARNING: A check for the end of a string is user's responsibility.
+//----------------------------------------------------------------------
+template <typename StringIterator, typename DifferenceType,
+ typename CharCompare>
+DifferenceType
+string_compare(StringIterator s1, StringIterator s2,
+ DifferenceType limit, CharCompare less) {
+ for ( ; limit > 0; --limit, ++s1, ++s2) {
+ if (less(*s1,*s2)) return -limit;
+ if (less(*s2,*s1)) return limit;
+ }
+ return 0;
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+template <typename StringIterator, typename DifferenceType>
+inline DifferenceType
+string_compare(StringIterator s1, StringIterator s2, DifferenceType limit)
+{
+ typedef typename std::iterator_traits<StringIterator>::value_type
+ char_type;
+ return string_compare(s1, s2, limit, std::less<char_type>());
+}
+
+
+//----------------------------------------------------------------------
+// const int suffix_insertion_sort_threshold_
+//
+// Groups of suffixes with size less or equal to this value are
+// sorted by insertion sort.
+//----------------------------------------------------------------------
+static const int suffix_insertion_sort_threshold_ = 15;
+
+//----------------------------------------------------------------------
+// function suffix_insertion_sort
+//
+// Sort suffixes using insertion sort.
+// A difference from a straightforward insertion sort is that
+// the lengths of longest common prefixes of the already sorted
+// suffixes are stored and used in later insertions.
+//----------------------------------------------------------------------
+template <typename StringIterator, typename SArrayIterator,
+ typename GroupHandler, typename CharCompare>
+void
+suffix_insertion_sort(StringIterator str, StringIterator str_end,
+ SArrayIterator sa, SArrayIterator sa_end,
+ typename std::iterator_traits<StringIterator>::difference_type limit,
+ GroupHandler handle_unsorted_group,
+ CharCompare less)
+{
+ typedef typename std::iterator_traits<StringIterator>::difference_type
+ str_diff_type;
+ typedef typename std::iterator_traits<SArrayIterator>::difference_type
+ sa_diff_type;
+ typedef typename std::iterator_traits<SArrayIterator>::value_type
+ pos_type;
+
+ static std::vector<str_diff_type> lcp(suffix_insertion_sort_threshold_);
+ lcp.resize(sa_end-sa);
+ lcp.back() = -1;
+
+ for (sa_diff_type i = sa_end-sa-2; i >= 0; --i) {
+ pos_type i_pos = sa[i];
+ StringIterator i_str = str + i_pos;
+ StringIterator j_str = str + sa[i+1];
+ str_diff_type i_limit = std::min(limit, str_end - i_str);
+ str_diff_type ij_limit = std::min(i_limit, str_end - j_str);
+ str_diff_type dist = string_compare(i_str, j_str, ij_limit, less);
+ if (dist < 0 || (dist == 0 && i_limit == ij_limit)) {
+ lcp[i] = ij_limit + dist;
+ } else {
+ sa[i] = sa[i+1];
+ str_diff_type i_lcp = ij_limit - dist;
+
+ sa_diff_type j = i+1;
+ for ( ; lcp[j] >= i_lcp; ++j) {
+ if (lcp[j] == i_lcp) {
+ j_str = str+sa[j+1];
+ ij_limit = std::min(i_limit, str_end-j_str);
+ dist = string_compare(i_str+i_lcp, j_str+i_lcp,
+ ij_limit-i_lcp, less);
+ if (dist < 0 || (dist == 0 && i_limit == ij_limit)) {
+ lcp[j] = ij_limit + dist;
+ break;
+ } else {
+ i_lcp = ij_limit - dist;
+ }
+ }
+ sa[j] = sa[j+1];
+ lcp[j-1] = lcp[j];
+ }
+
+ sa[j] = i_pos;
+ lcp[j-1] = i_lcp;
+ }
+ }
+
+ // find and process unsorted groups
+ typename std::vector<pos_type>::iterator beg, end, last = lcp.end()-1;
+ for (beg = lcp.begin(); beg < last; ++beg) {
+ if (*beg == limit) {
+ end = beg;
+ do { ++end; } while (*end == limit);
+ handle_unsorted_group(sa+(beg-lcp.begin()), sa+(end-lcp.begin())+1);
+ beg = end;
+ }
+ }
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+template <typename StringIterator, typename SArrayIterator,
+ typename GroupHandler>
+void
+suffix_insertion_sort(StringIterator str, StringIterator str_end,
+ SArrayIterator sa, SArrayIterator sa_end,
+ typename std::iterator_traits<StringIterator>::difference_type limit,
+ GroupHandler handle_unsorted_group)
+{
+ typedef typename std::iterator_traits<StringIterator>::value_type
+ char_type;
+ suffix_insertion_sort(str, str_end, sa, sa_end, limit,
+ handle_unsorted_group, std::less<char_type>());
+}
+
+
+//----------------------------------------------------------------------
+// functor char_access
+//
+// Compute the character that represents a suffix in comparisons.
+//----------------------------------------------------------------------
+template <typename StringIterator, typename SArrayIterator>
+class char_access
+ : public std::unary_function<SArrayIterator,
+ typename std::iterator_traits<StringIterator>::value_type>
+{
+private:
+ const StringIterator str;
+public:
+ char_access(StringIterator s) : str(s) {}
+
+ typename std::iterator_traits<StringIterator>::value_type
+ operator() (SArrayIterator i) const {
+ return str[*i];
+ }
+};
+
+
+//======================================================================
+// function suffix_mkqsort
+// function suffix_mkqsort_noempty
+//
+// Sort suffixes using multikey quicksort.
+// These two functions call each other to implement multikey quicksort.
+// Either can be used as the initial call. suffix_mkqsort_noempty
+// places more restrictions on the input but can be slightly faster.
+//======================================================================
+
+//----------------------------------------------------------------------
+// function suffix_mkqsort
+//
+// Sort suffixes using multikey quicksort.
+// This is the more general form.
+//----------------------------------------------------------------------
+template <typename StringIterator, typename SArrayIterator,
+ typename GroupHandler, typename CharCompare>
+void
+suffix_mkqsort(StringIterator str, StringIterator str_end,
+ SArrayIterator sa, SArrayIterator sa_end,
+ typename std::iterator_traits<StringIterator>::difference_type limit,
+ GroupHandler handle_unsorted_group,
+ CharCompare less)
+{
+ typedef typename std::iterator_traits<SArrayIterator>::value_type pos_type;
+
+ while (sa_end-sa > suffix_insertion_sort_threshold_ && limit > 0) {
+
+ // check for empty suffix(es)
+ pos_type length = static_cast<pos_type>(str_end-str);
+ sa = std::partition(sa, sa_end,
+ std::bind2nd(std::equal_to<pos_type>(), length));
+
+ // this would check only for one empty suffix
+ //SArrayIterator empty = std::find(sa, sa_end, length);
+ //if (empty != sa_end) {
+ // std::swap(*sa, *empty);
+ // ++sa;
+ //}
+
+ // partition
+ char_access<StringIterator, SArrayIterator> key(str);
+ SArrayIterator pivot = random_pivot(sa, sa_end, less, key);
+ std::pair<SArrayIterator,SArrayIterator> midrange;
+ midrange = ternary_partition(sa, sa_end, pivot, less, key);
+
+ // recurse on <-part
+ if (midrange.first - sa > 1) {
+ suffix_mkqsort_noempty(str, str_end, sa, midrange.first,
+ limit, handle_unsorted_group, less);
+ }
+
+ // recurse on >-part
+ if (sa_end - midrange.second > 1) {
+ suffix_mkqsort_noempty(str, str_end, midrange.second, sa_end,
+ limit, handle_unsorted_group, less);
+ }
+
+ // loop on =-part
+ sa = midrange.first;
+ sa_end = midrange.second;
+
+ // move to the next position in the suffixes
+ ++str;
+ --limit;
+
+ }
+
+ if (sa_end-sa > 1) {
+ if (limit == 0) {
+ handle_unsorted_group(sa, sa_end);
+ } else {
+ suffix_insertion_sort(str, str_end, sa, sa_end, limit,
+ handle_unsorted_group, less);
+ }
+ }
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+template <typename StringIterator, typename SArrayIterator,
+ typename GroupHandler>
+void
+suffix_mkqsort(StringIterator str, StringIterator str_end,
+ SArrayIterator sa, SArrayIterator sa_end,
+ typename std::iterator_traits<StringIterator>::difference_type limit,
+ GroupHandler handle_unsorted_group)
+{
+ typedef typename std::iterator_traits<StringIterator>::value_type
+ char_type;
+ suffix_mkqsort(str, str_end, sa, sa_end, limit,
+ handle_unsorted_group, std::less<char_type>());
+}
+
+
+//----------------------------------------------------------------------
+// function suffix_mkqsort_noempty
+//
+// Sort suffixes using multikey quicksort.
+// Used as a subroutine of suffix_mkqsort but can also be called
+// directly. Places more restrictions on input (see below) but
+// can be slightly faster than suffix_mkqsort.
+//
+// The additional restrictions:
+// PRECONDITION: limit>0 and there are no empty suffixes
+//----------------------------------------------------------------------
+template <typename StringIterator, typename SArrayIterator,
+ typename GroupHandler, typename CharCompare>
+void
+suffix_mkqsort_noempty(StringIterator str, StringIterator str_end,
+ SArrayIterator sa, SArrayIterator sa_end,
+ typename std::iterator_traits<StringIterator>::difference_type limit,
+ GroupHandler handle_unsorted_group,
+ CharCompare less)
+{
+ typedef typename std::iterator_traits<SArrayIterator>::value_type pos_type;
+
+ while (sa_end-sa > suffix_insertion_sort_threshold_) {
+
+ // partition
+ char_access<StringIterator, SArrayIterator> key(str);
+ SArrayIterator pivot = random_pivot(sa, sa_end, less, key);
+ std::pair<SArrayIterator,SArrayIterator> midrange;
+ midrange = ternary_partition(sa, sa_end, pivot, less, key);
+
+ // recurse on <-part
+ if (midrange.first - sa > 1) {
+ suffix_mkqsort_noempty(str, str_end, sa, midrange.first,
+ limit, handle_unsorted_group, less);
+ }
+
+ // recurse on =-part
+ if (midrange.second - midrange.first > 1) {
+ suffix_mkqsort(str+1, str_end, midrange.first, midrange.second,
+ limit-1, handle_unsorted_group, less);
+ }
+
+ // loop on >-part
+ sa = midrange.second;
+ }
+
+ if (sa_end-sa > 1) {
+ suffix_insertion_sort(str, str_end, sa, sa_end, limit,
+ handle_unsorted_group, less);
+ }
+}
+// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+template <typename StringIterator, typename SArrayIterator,
+ typename GroupHandler>
+void
+suffix_mkqsort_noempty(StringIterator str, StringIterator str_end,
+ SArrayIterator sa, SArrayIterator sa_end,
+ typename std::iterator_traits<StringIterator>::difference_type limit,
+ GroupHandler handle_unsorted_group)
+{
+ typedef typename std::iterator_traits<StringIterator>::value_type
+ char_type;
+ suffix_mkqsort_noempty(str, str_end, sa, sa_end, limit,
+ handle_unsorted_group, std::less<char_type>());
+}
+
+#endif // STRINGSORT_HPP