X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=incbwt%2Fqsufsort%2Fqsufsort.c;h=4226ff356ba78777f9a63dbf8ef1dafdae8d2554;hb=HEAD;hp=3cc1cbe1135b5d21dabb99e7f1e1539819dac5eb;hpb=40ddf9aca842bdc081b6350a4ebfe36b066c94c9;p=SXSI%2FTextCollection.git diff --git a/incbwt/qsufsort/qsufsort.c b/incbwt/qsufsort/qsufsort.c index 3cc1cbe..4226ff3 100644 --- a/incbwt/qsufsort/qsufsort.c +++ b/incbwt/qsufsort/qsufsort.c @@ -12,9 +12,11 @@ of this software.*/ /* - Replaced int -> sint for massive data support and moved into namespace CSA. + Moved everything into a struct to support parallel construction. + Created another version using sint instead of int to index more than 2 GB at a time. + Put everything into a namespace. - - Jouni Siren 2009-03-16 + - Jouni Siren 2009-03-20 */ #include @@ -25,21 +27,29 @@ namespace CSA { -static sint *I, /* group array, ultimately suffix array.*/ +#define KEY(p) (V[*(p)+(h)]) +#define SWAP(p, q) (tmp=*(p), *(p)=*(q), *(q)=tmp) +#define MED3(a, b, c) (KEY(a)KEY(c) ? b : KEY(a)>KEY(c) ? c : (a))) + + +//-------------------------------------------------------------------------- + +template +struct QSufSort +{ + + T *I, /* group array, ultimately suffix array.*/ *V, /* inverse array, ultimately inverse of I.*/ r, /* number of symbols aggregated by transform.*/ h; /* length of already-sorted prefixes.*/ -#define KEY(p) (V[*(p)+(h)]) -#define SWAP(p, q) (tmp=*(p), *(p)=*(q), *(q)=tmp) -#define MED3(a, b, c) (KEY(a)KEY(c) ? b : KEY(a)>KEY(c) ? c : (a))) /* Subroutine for select_sort_split and sort_split. Sets group numbers for a group whose lowest position in I is pl and highest position is pm.*/ -static void update_group(sint *pl, sint *pm) +void update_group(T *pl, T *pm) { - sint g; + T g; g=pm-I; /* group number.*/ V[*pl]=g; /* update group number of first position.*/ @@ -54,9 +64,9 @@ static void update_group(sint *pl, sint *pm) /* Quadratic sorting method to use for small subarrays. To be able to update group numbers consistently, a variant of selection sorting is used.*/ -static void select_sort_split(sint *p, sint n) { - sint *pa, *pb, *pi, *pn; - sint f, v, tmp; +void select_sort_split(T *p, T n) { + T *pa, *pb, *pi, *pn; + T f, v, tmp; pa=p; /* pa is start of group being picked out.*/ pn=p+n-1; /* pn is last position of subarray.*/ @@ -81,9 +91,9 @@ static void select_sort_split(sint *p, sint n) { /* Subroutine for sort_split, algorithm by Bentley & McIlroy.*/ -static sint choose_pivot(sint *p, sint n) { - sint *pl, *pm, *pn; - sint s; +T choose_pivot(T *p, T n) { + T *pl, *pm, *pn; + T s; pm=p+(n>>1); /* small arrays, middle element.*/ if (n>7) { @@ -106,10 +116,10 @@ static sint choose_pivot(sint *p, sint n) { Software -- Practice and Experience 23(11), 1249-1265 (November 1993). This function is based on Program 7.*/ -static void sort_split(sint *p, sint n) +void sort_split(T *p, T n) { - sint *pa, *pb, *pc, *pd, *pl, *pm, *pn; - sint f, v, s, t, tmp; + T *pa, *pb, *pc, *pd, *pl, *pm, *pn; + T f, v, s, t, tmp; if (n<7) { /* multi-selection sort smallest arrays.*/ select_sort_split(p, n); @@ -167,10 +177,10 @@ static void sort_split(sint *p, sint n) Output: x is V and p is I after the initial sorting stage of the refined suffix sorting algorithm.*/ - -static void bucketsort(sint *x, sint *p, sint n, sint k) + +void bucketsort(T *x, T *p, T n, T k) { - sint *pi, i, c, d, g; + T *pi, i, c, d, g; for (pi=p; pi>=1) ++s; /* s is number of bits in old symbol.*/ @@ -265,10 +275,10 @@ static sint transform(sint *x, sint *p, sint n, sint k, sint l, sint q) contents of x[n] is disregarded, the n-th symbol being regarded as end-of-string smaller than all other symbols.*/ -void suffixsort(sint *x, sint *p, sint n, sint k, sint l) +void suffixsort(T *x, T *p, T n, T k, T l) { - sint *pi, *pk; - sint i, j, s, sl; + T *pi, *pk; + T i, j, s, sl; V=x; /* set global values.*/ I=p; @@ -311,5 +321,22 @@ void suffixsort(sint *x, sint *p, sint n, sint k, sint l) I[V[i]]=i; } +}; // QSufSort + +//-------------------------------------------------------------------------- + + +void suffixsort(int *x, int *p, int n, int k, int l) +{ + QSufSort sorter; + sorter.suffixsort(x, p, n, k, l); +} + +void massive_suffixsort(sint *x, sint *p, sint n, sint k, sint l) +{ + QSufSort sorter; + sorter.suffixsort(x, p, n, k, l); +} + } // namespace CSA