+ {
+ /*************************************************************************
+ * Approximate pattern matching
+ *
+ * Using suffix filters. Has to enumerate *all* approx. occurrences (sloooow...)
+ * instead of just returning the best occurrence (which is usually much faster).
+ *
+ * Query format: contains("APM 3 GATTACA")
+ * where
+ * "APM" is the keyword for approximate queries.
+ * "3" is the maximum edit distance allowed.
+ * "GATTACA" is the query word to be aligned.
+ */
+ if (strncmp((char const *)pattern, "APM ", 4) == 0)
+ {
+ // Edit distance allowed.
+ int k = std::atoi((char const *)pattern + 4);
+ if (k < 0 || k == INT_MAX || k == INT_MIN)
+ goto exact_pattern_matching; // Invalid format
+
+ // Find the start of the pattern (i.e. the second ' ')
+ uchar const * tmp = pattern + 4;
+ while (*tmp != ' ' && *tmp != 0) ++tmp;
+ if (*tmp != ' ' || tmp == pattern + 4)
+ goto exact_pattern_matching; // Invalid format
+
+ IndelQuery iq(this);
+// std::cerr << "RLCSAWrapper::Contains(): Pattern: " << tmp+1 << ", k = " << k << std::endl;
+ return iq.align(tmp+1, k);
+ }
+
+ /*************************************************************************
+ * Position Specific Scoring Matrix (PSSM) matching
+ * See PssmQuery.h for usage information.
+ */
+ if (strncmp((char const *)pattern, "PSSM ", 4) == 0)
+ {
+ // Parse threshold
+ double thr = std::atof((char const *)pattern + 5);
+ if (thr <= 0)
+ goto exact_pattern_matching; // Invalid format
+
+ // Find the start of the pattern (i.e. the second ' ')
+ uchar const * tmp = pattern + 5;
+ while (*tmp != ' ' && *tmp != 0) ++tmp;
+ if (*tmp != ' ' || tmp == pattern + 5)
+ goto exact_pattern_matching; // Invalid format
+
+ PssmQuery pq(this, std::log(thr));
+ //std::cerr << "Pattern: " << tmp+1 << ", log(threshold) = " << std::log(thr) << std::endl;
+ return pq.align(tmp+1, 0);
+ }
+
+ /*************************************************************************
+ * Exact pattern matching
+ */
+ exact_pattern_matching: