X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;ds=sidebyside;f=dcover%2Fpartition.hpp;fp=dcover%2Fpartition.hpp;h=6b72c948b3f7af5c4af084159de995bbda75f41f;hb=368c3edc500ee74859732705e46b7c346e8a6d65;hp=0000000000000000000000000000000000000000;hpb=7eaf8bbcb18ab4a481905d219ab0b9e8286c75b2;p=SXSI%2FTextCollection.git diff --git a/dcover/partition.hpp b/dcover/partition.hpp new file mode 100644 index 0000000..6b72c94 --- /dev/null +++ b/dcover/partition.hpp @@ -0,0 +1,435 @@ +/* + * partition.hpp + * + * Copyright 2003 Max-Planck-Institut fuer Informatik + * + */ + +//====================================================================== +// A collection of flexible, generic, STL-like algorithm components +// for comparison-based distribution (i.e., quicksort-like) sorting. +//====================================================================== + +#ifndef PARTITION_HPP +#define PARTITION_HPP + +#include +#include +#include +#include +#include // for rand() + +//====================================================================== +// Most of the algorithms here have three versions. For example a sort +// function can have the following forms: +// sort(beg, end); +// sort(beg, end, less); +// sort(beg, end, less, key); +// The first two forms are standard STL. The third form adds an +// argument key, which is a unary function. Where the second +// form would compare two iterators i and j as less(*i, *j), +// the third form does less(key(i), key(j)) instead. This allows +// moving some of the computation related to the comparison from +// less() into key(). When the same iterator is used in many +// comparisons (for example the pivot), key() needs to be evaluated +// only once. +// +// For example, to sort objects of type T with a member function +// size() returning int into increasing order of size() do +// (TODO: test/expand this, use vector of vectors) +// +// sort(beg, end, std::less(), std::mem_fun(&T::size)); +// +// key() may return a pointer or reference to (a member of) the object. +// All the algorithms assume that the value returned by key() stays +// valid only as long as the object is not moved. +// +// TYPE REQUIREMENTS +// For example: +// template +// void sort(Iterator, Iterator, Compare, KeyAccess); +// * KeyAccess is a model of Adaptable Unary Function +// * Iterator is convertible to KeyAccess::argument_type +// * KeyAccess::result_type is convertible to the argument type +// of Compare. +//====================================================================== + +//---------------------------------------------------------------------- +// functor copy_dereference +// functor const_dereference +// +// Default functors to be used as key(). +// (TODO: which one should be used as the default?) +// (TODO: is const_dereference::result_type a problem?) +//---------------------------------------------------------------------- +template +class const_dereference + : public std::unary_function::value_type &> +{ +public: + const typename std::iterator_traits::value_type & + operator() (Iterator i) const { return *i; } +}; + +template +class copy_dereference + : public std::unary_function::value_type> +{ +public: + typename std::iterator_traits::value_type + operator() (Iterator i) const { return *i; } +}; + +template +class default_key : public copy_dereference {}; + + + +//====================================================================== +// +// Pivot selection +// +// (TODO: non-randomized pivot selectors) +//====================================================================== + +//---------------------------------------------------------------------- +// function iter_median3 +// +// Median of three +//---------------------------------------------------------------------- +template +Iterator +iter_median3(Iterator a, Iterator b, Iterator c, + Compare less, KeyAccess key) { + typedef typename KeyAccess::result_type key_type; + key_type ka = key(a), kb = key(b), kc = key(c); + if (less(ka, kb)) + if (less(kb, kc)) return b; + else if (less(ka, kc)) return c; + else return a; + else if (less(ka, kc)) return a; + else if (less(kb, kc)) return c; + else return b; +} +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +template +Iterator +iter_median3(Iterator a, Iterator b, Iterator c, Compare less) { + return iter_median3(a, b, c, less, default_key()); +} + +template +Iterator +iter_median3(Iterator a, Iterator b, Iterator c) { + typedef typename std::iterator_traits::value_type key_type; + return iter_median3(a, b, c, std::less(), + default_key()); +} + + +//---------------------------------------------------------------------- +// function random_pivot +// +// Choose pivot of as a median of three random elements +// (or as a single random element for small ranges) +// +// (TODO: better/faster random number generation?) +//---------------------------------------------------------------------- +template +RandomAccessIterator +random_pivot (RandomAccessIterator beg, RandomAccessIterator end, + Compare less, KeyAccess key) +{ + static double scale_to_one = 1.0L/(RAND_MAX+1.0); + + typedef typename std::iterator_traits::difference_type + difference_type; + difference_type size = std::distance(beg, end); + double scale_to_size = size * scale_to_one; + RandomAccessIterator pivot; + pivot = beg + static_cast(scale_to_size*std::rand()); + if (size > 50) { + RandomAccessIterator pivot2, pivot3; + pivot2 = beg + static_cast(scale_to_size*std::rand()); + pivot3 = beg + static_cast(scale_to_size*std::rand()); + pivot = iter_median3(pivot, pivot2, pivot3, less, key); + } + return pivot; +} +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +template +RandomAccessIterator +random_pivot(RandomAccessIterator beg, RandomAccessIterator end, + Compare less) +{ + return random_pivot(beg, end, less, + default_key()); +} + +template +RandomAccessIterator +random_pivot(RandomAccessIterator beg, RandomAccessIterator end) +{ + typedef typename std::iterator_traits::value_type + key_type; + return random_pivot(beg, end, std::less(), + default_key()); +} + + + + +//====================================================================== +// Partitioning +//====================================================================== + +//---------------------------------------------------------------------- +// function binary_partition +// +// Partitions the input range [beg,end) into two parts [beg,mid) and +// [mid,end) with all elements smaller than pivot in [beg, mid) and +// all elements larger than pivot in [mid,end). Equal elements may +// be in either subrange. Returns mid. +// PRECONDITION: pivot is in the range [beg,end) +//---------------------------------------------------------------------- +template +RandomAccessIterator +binary_partition(RandomAccessIterator beg, RandomAccessIterator end, + RandomAccessIterator pivot, Compare less, KeyAccess key) +{ + if (beg == --end) return beg; + + // move the pivot to a safe place and setup sentinels + std::swap(*beg, *pivot); + if (less(key(end), key(beg))) { + std::swap(*beg, *end); + pivot = end; + } else { + pivot = beg; + } + + // now the pivot won't move anymore + typename KeyAccess::result_type pivot_key = key(pivot); + + while (true) { + do { ++beg; } while (less(key(beg), pivot_key)); + do { --end; } while (less(pivot_key, key(end))); + if (!(beg < end)) return beg; + std::swap(*beg, *end); + } +} +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +template +RandomAccessIterator +binary_partition(RandomAccessIterator beg, RandomAccessIterator end, + RandomAccessIterator pivot, Compare less) +{ + return binary_partition(beg, end, pivot, less, + default_key()); +} + +template +RandomAccessIterator +binary_partition(RandomAccessIterator beg, RandomAccessIterator end, + RandomAccessIterator pivot) +{ + typedef typename std::iterator_traits::value_type + key_type; + return binary_partition(beg, end, pivot, std::less(), + default_key()); +} + + +//---------------------------------------------------------------------- +// function ternary_partition +// +// Partitions the input range [beg,end) into three parts according to +// comparisons to the pivot: | less | equal | greater | +// Returns the middle (equal) range. +// PRECONDITION: pivot is in the range [beg,end) +// +// METHOD +// The algorithm maintains the following invariant: +// +// | equal | less | unknown | greater | equal | +// ^ ^ ^ ^ ^ ^ +// beg first_less left right last_greater end +// +//---------------------------------------------------------------------- +template +std::pair +ternary_partition(RandomAccessIterator beg, RandomAccessIterator end, + RandomAccessIterator pivot, Compare less, + KeyAccess key) +{ + RandomAccessIterator left = beg, right = end-1; + RandomAccessIterator first_less = left, last_greater = right; + + // first move pivot to a safe place and setup sentinels + std::iter_swap(pivot, left); + if (less(key(right), key(left))) { + std::iter_swap(left, right); + pivot = right; + ++left; --right; --last_greater; + } else { + pivot = left; + ++left; ++first_less; + } + + // now the pivot won't move anymore + typename KeyAccess::result_type pivot_key = key(pivot); + + while(true) { + while(less(key(left), pivot_key)) ++left; + while (!less(pivot_key, key(left)) && left < right) { + std::iter_swap(left++, first_less++); + while(less(key(left), pivot_key)) ++left; + } + while(less(pivot_key, key(right))) --right; + while (!less(key(right), pivot_key) && left < right) { + std::iter_swap(right--, last_greater--); + while(less(pivot_key, key(right))) --right; + } + if (!(left < right)) break; + std::iter_swap(left++, right--); + } + + // now the situation is this: + // + // | equal | less | greater | equal | + // ^ ^ ^ ^ ^ ^ + // beg first_less right left last_greater end + // + // or this: + // + // | equal | less |=| greater | equal | + // ^ ^ ^ ^ ^ + // beg first_less left last_greater end + // right + // + // next: swap equal parts to the middle + + typename std::iterator_traits::difference_type size; + size = std::min(first_less-beg, left-first_less); + std::swap_ranges(beg, beg+size, left-size); + size = std::min(end-1-last_greater, last_greater-right); + std::swap_ranges(right+1, right+1+size, end-size); + return std::make_pair(beg+(left-first_less), end-(last_greater-right)); +} +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +template +std::pair +ternary_partition(RandomAccessIterator beg, RandomAccessIterator end, + RandomAccessIterator pivot, Compare less) +{ + return ternary_partition(beg, end, pivot, less, + default_key()); +} + +template +std::pair +ternary_partition(RandomAccessIterator beg, RandomAccessIterator end, + RandomAccessIterator pivot) +{ + typedef typename std::iterator_traits::value_type + key_type; + return ternary_partition(beg, end, pivot, std::less(), + default_key()); +} + + +//====================================================================== +// Sorting +//====================================================================== + +//---------------------------------------------------------------------- +// function insertion_sort +// +//---------------------------------------------------------------------- +template +void +insertion_sort(RandomAccessIterator beg, RandomAccessIterator end, + Compare less, KeyAccess key) +{ + if (end-beg <= 1) return; + for (RandomAccessIterator i = beg+1; i != end; ++i) { + typename KeyAccess::result_type i_key = key(i); // i is not moved ... + RandomAccessIterator j = i-1; + if (!less(i_key, key(j))) continue; + for ( ; j != beg; --j) { + if (!less(i_key, key(j-1))) break; + std::swap(*(j-1), *j); + } + std::swap(*i, *j); // ... until here + } +} +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +template +void +insertion_sort(RandomAccessIterator beg, RandomAccessIterator end, + Compare less) +{ + insertion_sort(beg, end, less, default_key()); +} + +template +void +insertion_sort(RandomAccessIterator beg, RandomAccessIterator end) +{ + typedef typename std::iterator_traits::value_type + key_type; + insertion_sort(beg, end, std::less(), + default_key()); +} + + +//---------------------------------------------------------------------- +// function quicksort +// +//---------------------------------------------------------------------- +template +void +quicksort(RandomAccessIterator beg, RandomAccessIterator end, + Compare less, KeyAccess key) +{ + while (end-beg > 15) { + RandomAccessIterator pivot = random_pivot(beg, end, less, key); + RandomAccessIterator mid = binary_partition(beg, end, pivot, less, key); + quicksort(beg, mid, less, key); + beg = mid; + } + insertion_sort(beg, end, less, key); +} +// - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +template +void +quicksort(RandomAccessIterator beg, RandomAccessIterator end, Compare less) +{ + quicksort(beg, end, less, default_key()); +} + +template +void +quicksort(RandomAccessIterator beg, RandomAccessIterator end) +{ + typedef typename std::iterator_traits::value_type + key_type; + quicksort(beg, end, std::less(), + default_key()); +} + + +#endif // PARTITION_HPP