+
+ /**
+ * Enumerate documents in given interval [sp, ep]
+ */
+ inline void EnumerateDocuments(std::set<DocId> &resultSet, TextPosition sp, TextPosition ep) const
+ {
+ // We want unique document indentifiers, using std::set to collect them
+ // FIXME use unordered_set?
+ uint tmp_rank_c = 0; // Cache rank value of c.
+ for (; sp <= ep; ++sp)
+ {
+ TextPosition i = sp;
+ uchar c = alphabetrank->access(i, tmp_rank_c);
+ while (c != '\0' && !sampled->access(i))
+ {
+ i = C[c]+tmp_rank_c-1; //alphabetrank->rank(c,i)-1;
+ c = alphabetrank->access(i, tmp_rank_c);
+ }
+ if (c == '\0')
+ {
+ // Rank among the end-markers in BWT
+ unsigned endmarkerRank = tmp_rank_c-1; //alphabetrank->rank(0, i) - 1;
+ resultSet.insert(Doc->access(endmarkerRank));
+ }
+ else
+ {
+ DocId di = (*suffixDocId)[sampled->rank1(i)-1];
+ assert((unsigned)di < numberOfTexts);
+ resultSet.insert(di);
+ }
+ }
+ }
+
+ /**
+ * Enumerate documents in given interval [sp, ep]
+ * and within [begin, end]
+ */
+ inline void EnumerateDocuments(std::set<DocId> &resultSet, TextPosition sp, TextPosition ep, DocId begin, DocId end) const
+ {
+ // We want unique document indentifiers, using std::set to collect them
+ uint tmp_rank_c = 0; // Cache rank value of c.
+ for (; sp <= ep; ++sp)
+ {
+ TextPosition i = sp;
+ uchar c = alphabetrank->access(i, tmp_rank_c);
+ while (c != '\0' && !sampled->access(i))
+ {
+ i = C[c]+tmp_rank_c-1; //alphabetrank->rank(c,i)-1;
+ c = alphabetrank->access(i, tmp_rank_c);
+ }
+ if (c == '\0')
+ {
+ // Rank among the end-markers in BWT
+ unsigned endmarkerRank = tmp_rank_c-1; //alphabetrank->rank(0, i) - 1;
+ DocId docId = Doc->access(endmarkerRank);
+ if (docId >= begin && docId <= end)
+ resultSet.insert(docId);
+ }
+ else
+ {
+ DocId docId = (*suffixDocId)[sampled->rank1(i)-1];
+ assert((unsigned)docId < numberOfTexts);
+ if (docId >= begin && docId <= end)
+ resultSet.insert(docId);
+ }
+ }
+ }
+
+ /**
+ * Enumerate document+position pairs (full_result) of
+ * each suffix in given interval.
+ */
+ inline void EnumeratePositions(full_result &result, TextPosition sp, TextPosition ep) const
+ {
+ uint tmp_rank_c = 0; // Cache rank value of c.
+ for (; sp <= ep; ++sp)
+ {
+ TextPosition i = sp;
+ TextPosition dist = 0;
+ uchar c = alphabetrank->access(i, tmp_rank_c);
+ while (c != '\0' && !sampled->access(i))
+ {
+ i = C[c]+tmp_rank_c-1; //alphabetrank->rank(c,i)-1;
+ c = alphabetrank->access(i, tmp_rank_c);
+ ++ dist;
+ }
+ if (c == '\0')
+ {
+ // Rank among the end-markers in BWT
+ unsigned endmarkerRank = tmp_rank_c-1; //alphabetrank->rank(0, i) - 1;
+ DocId docId = Doc->access(endmarkerRank);
+ result.push_back(make_pair(docId, dist));
+ }
+ else
+ {
+ TextPosition textPos = (*suffixes)[sampled->rank1(i)-1] + dist;
+ DocId docId = (*suffixDocId)[sampled->rank1(i)-1];
+
+ result.push_back(make_pair(docId, textPos));
+ }
+ }
+ }
+
+ /**
+ * Enumerate document+position pairs (full_result) of
+ * each suffix in given interval and within [begin, end].
+ */
+ inline void EnumeratePositions(full_result &result, TextPosition sp, TextPosition ep, DocId begin, DocId end) const
+ {
+ uint tmp_rank_c = 0; // Cache rank value of c.
+ for (; sp <= ep; ++sp)
+ {
+ TextPosition i = sp;
+ TextPosition dist = 0;
+ uchar c = alphabetrank->access(i, tmp_rank_c);
+ while (c != '\0' && !sampled->access(i))
+ {
+ i = C[c]+tmp_rank_c-1; //alphabetrank->rank(c,i)-1;
+ c = alphabetrank->access(i, tmp_rank_c);
+ ++ dist;
+ }
+ if (c == '\0')
+ {
+ // Rank among the end-markers in BWT
+ unsigned endmarkerRank = tmp_rank_c-1; //alphabetrank->rank(0, i) - 1;
+ DocId docId = Doc->access(endmarkerRank);
+ if (docId >= begin && docId <= end)
+ result.push_back(make_pair(docId, dist));
+ }
+ else
+ {
+ TextPosition textPos = (*suffixes)[sampled->rank1(i)-1] + dist;
+ DocId docId = (*suffixDocId)[sampled->rank1(i)-1];
+
+ if (docId >= begin && docId <= end)
+ result.push_back(make_pair(docId, textPos));
+ }
+ }
+ }
+