+ // Convert std::set to std::vector
+ TextCollection::document_result result(resultSet.begin(), resultSet.end());
+ // Map to doc ID's
+ for (document_result::iterator it = result.begin(); it != result.end(); ++it)
+ *it = emptyTextRank->select0(*it+1);
+ return result;
+}
+
+TextCollection::document_result CSA::Contains(uchar const * pattern, DocId begin, DocId end) const
+{
+ TextPosition m = strlen((char *)pattern);
+ if (m == 0)
+ return TextCollection::document_result();
+
+ TextPosition sp = 0, ep = 0;
+ // Search all occurrences
+ Search(pattern, m, &sp, &ep);
+
+ // We want unique document indentifiers, using std::set to collect them
+ std::set<DocId> resultSet;
+
+ ulong sampled_rank_i = 0;
+ // Check each occurrence
+ for (; sp <= ep; ++sp)
+ {
+ TextPosition i = sp;
+ uchar c = alphabetrank->access(i);
+ while (c != '\0' && !sampled->IsBitSet(i, &sampled_rank_i))
+ {
+ i = C[c]+alphabetrank->rank(c,i)-1; // LF-mapping
+ c = alphabetrank->access(i);
+ }
+ if (c == '\0')
+ {
+ // Rank among the end-markers in BWT
+ unsigned endmarkerRank = alphabetrank->rank(0, i) - 1;
+
+ // End-marker that we found belongs to the "preceeding" doc in collection:
+ DocId docId = ((*endmarkerDocId)[endmarkerRank] + 1) % numberOfTexts;
+ if (docId >= begin && docId <= end)
+ resultSet.insert(docId);
+ }
+ else
+ {
+ DocId docId = (*suffixDocId)[sampled_rank_i-1]; //sampled->rank(i)-1];
+ assert((unsigned)docId < numberOfTexts);
+ if (docId >= begin && docId <= end)
+ resultSet.insert(docId);
+ }
+ }
+
+ // Convert std::set to std::vector
+ TextCollection::document_result result(resultSet.begin(), resultSet.end());
+ // Map to doc ID's
+ for (document_result::iterator it = result.begin(); it != result.end(); ++it)
+ *it = emptyTextRank->select0(*it+1);