- for(ulong i=0; i<sampleLength; i++) {
- assert((*positions)[i] < n);
- ulong j = sampled->rank((*positions)[i]);
- if (j==0) j=sampleLength;
- TextPosition textPos = (*tmpSuffix)[i];
- (*suffixDocId)[j-1] = DocIdAtTextPos(textStartPos, textPos);
-
- assert((unsigned)DocIdAtTextPos(textStartPos, textPos) < numberOfTexts);
- assert((*suffixDocId)[j-1] < numberOfTexts);
- // calculate offset from text start:
- (*suffixes)[j-1] = textPos - (*textStartPos)[(*suffixDocId)[j-1]];
+ p=bwtEndPos;
+ textId = numberOfTexts;
+
+ TextStorageBuilder tsbuilder(n);
+
+ /**
+ * Second pass: populate tables suffixes and suffixDocId.
+ */
+ for (ulong i=n-1;i<ulongmax;i--) {
+ x=(i==n-1)?0:i+1;
+
+ if (sampled->access(p)) {
+ ulong j = sampled->rank1(p)-1;
+
+ (*suffixDocId)[j] = DocIdAtTextPos(textStartPos, x);
+
+ // calculate offset from text start:
+ (*suffixes)[j] = x - (*textStartPos)[(*suffixDocId)[j]];
+ }
+
+ uchar c = alphabetrank->access(p, alphabetrank_i_tmp);
+ tsbuilder[i] = c;
+
+ if (c == '\0')
+ {
+ --textId;
+ // LF-mapping from '\0' does not work with this (pseudo) BWT (see details from Wolfgang's thesis).
+ p = textId; // Correct LF-mapping to the last char of the previous text.
+ }
+ else // Now c != '\0', do LF-mapping:
+ p = C[c]+alphabetrank_i_tmp-1;