of this software.*/
/*
- Replaced int -> sint for massive data support and moved into namespace CSA.
+ Moved everything into a struct to support parallel construction.
+ Created another version using sint instead of int to index more than 2 GB at a time.
+ Put everything into a namespace.
- - Jouni Siren 2009-03-16
+ - Jouni Siren 2009-03-20
*/
#include <climits>
{
-static sint *I, /* group array, ultimately suffix array.*/
+#define KEY(p) (V[*(p)+(h)])
+#define SWAP(p, q) (tmp=*(p), *(p)=*(q), *(q)=tmp)
+#define MED3(a, b, c) (KEY(a)<KEY(b) ? (KEY(b)<KEY(c) ? b : KEY(a)<KEY(c) ? c : (a)) : (KEY(b)>KEY(c) ? b : KEY(a)>KEY(c) ? c : (a)))
+
+
+//--------------------------------------------------------------------------
+
+template <class T>
+struct QSufSort
+{
+
+ T *I, /* group array, ultimately suffix array.*/
*V, /* inverse array, ultimately inverse of I.*/
r, /* number of symbols aggregated by transform.*/
h; /* length of already-sorted prefixes.*/
-#define KEY(p) (V[*(p)+(h)])
-#define SWAP(p, q) (tmp=*(p), *(p)=*(q), *(q)=tmp)
-#define MED3(a, b, c) (KEY(a)<KEY(b) ? (KEY(b)<KEY(c) ? b : KEY(a)<KEY(c) ? c : (a)) : (KEY(b)>KEY(c) ? b : KEY(a)>KEY(c) ? c : (a)))
/* Subroutine for select_sort_split and sort_split. Sets group numbers for a
group whose lowest position in I is pl and highest position is pm.*/
-static void update_group(sint *pl, sint *pm)
+void update_group(T *pl, T *pm)
{
- sint g;
+ T g;
g=pm-I; /* group number.*/
V[*pl]=g; /* update group number of first position.*/
/* Quadratic sorting method to use for small subarrays. To be able to update
group numbers consistently, a variant of selection sorting is used.*/
-static void select_sort_split(sint *p, sint n) {
- sint *pa, *pb, *pi, *pn;
- sint f, v, tmp;
+void select_sort_split(T *p, T n) {
+ T *pa, *pb, *pi, *pn;
+ T f, v, tmp;
pa=p; /* pa is start of group being picked out.*/
pn=p+n-1; /* pn is last position of subarray.*/
/* Subroutine for sort_split, algorithm by Bentley & McIlroy.*/
-static sint choose_pivot(sint *p, sint n) {
- sint *pl, *pm, *pn;
- sint s;
+T choose_pivot(T *p, T n) {
+ T *pl, *pm, *pn;
+ T s;
pm=p+(n>>1); /* small arrays, middle element.*/
if (n>7) {
Software -- Practice and Experience 23(11), 1249-1265 (November 1993). This
function is based on Program 7.*/
-static void sort_split(sint *p, sint n)
+void sort_split(T *p, T n)
{
- sint *pa, *pb, *pc, *pd, *pl, *pm, *pn;
- sint f, v, s, t, tmp;
+ T *pa, *pb, *pc, *pd, *pl, *pm, *pn;
+ T f, v, s, t, tmp;
if (n<7) { /* multi-selection sort smallest arrays.*/
select_sort_split(p, n);
Output: x is V and p is I after the initial sorting stage of the refined
suffix sorting algorithm.*/
-
-static void bucketsort(sint *x, sint *p, sint n, sint k)
+
+void bucketsort(T *x, T *p, T n, T k)
{
- sint *pi, i, c, d, g;
+ T *pi, i, c, d, g;
for (pi=p; pi<p+k; ++pi)
*pi=-1; /* mark linked lists empty.*/
new alphabet. If j<=n+1, the alphabet is compacted. The global variable r is
set to the number of old symbols grouped into one. Only x[n] is 0.*/
-static sint transform(sint *x, sint *p, sint n, sint k, sint l, sint q)
+T transform(T *x, T *p, T n, T k, T l, T q)
{
- sint b, c, d, e, i, j, m, s;
- sint *pi, *pj;
+ T b, c, d, e, i, j, m, s;
+ T *pi, *pj;
for (s=0, i=k-l; i; i>>=1)
++s; /* s is number of bits in old symbol.*/
contents of x[n] is disregarded, the n-th symbol being regarded as
end-of-string smaller than all other symbols.*/
-void suffixsort(sint *x, sint *p, sint n, sint k, sint l)
+void suffixsort(T *x, T *p, T n, T k, T l)
{
- sint *pi, *pk;
- sint i, j, s, sl;
+ T *pi, *pk;
+ T i, j, s, sl;
V=x; /* set global values.*/
I=p;
I[V[i]]=i;
}
+}; // QSufSort
+
+//--------------------------------------------------------------------------
+
+
+void suffixsort(int *x, int *p, int n, int k, int l)
+{
+ QSufSort<int> sorter;
+ sorter.suffixsort(x, p, n, k, l);
+}
+
+void massive_suffixsort(sint *x, sint *p, sint n, sint k, sint l)
+{
+ QSufSort<sint> sorter;
+ sorter.suffixsort(x, p, n, k, l);
+}
+
} // namespace CSA