From c1ec53b9fa4f6e03d63b1126861f83d5575eeec6 Mon Sep 17 00:00:00 2001 From: nvalimak Date: Sun, 7 Nov 2010 12:23:57 +0000 Subject: [PATCH 1/1] Added PSSM support for RLCSA git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@933 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- RLCSAWrapper.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/RLCSAWrapper.h b/RLCSAWrapper.h index 8fd1d68..44f7268 100644 --- a/RLCSAWrapper.h +++ b/RLCSAWrapper.h @@ -24,6 +24,7 @@ #include "TextCollection.h" #include "IndelQuery.h" +#include "PssmQuery.h" #include "incbwt/rlcsa.h" @@ -38,6 +39,7 @@ #include #include #include +#include namespace SXSI { @@ -169,6 +171,28 @@ public: return iq.align(tmp+1, k); } + /************************************************************************* + * Position Specific Scoring Matrix (PSSM) matching + * See PssmQuery.h for usage information. + */ + if (strncmp((char const *)pattern, "PSSM ", 4) == 0) + { + // Parse threshold + double thr = std::atof((char const *)pattern + 5); + if (thr <= 0) + goto exact_pattern_matching; // Invalid format + + // Find the start of the pattern (i.e. the second ' ') + uchar const * tmp = pattern + 5; + while (*tmp != ' ' && *tmp != 0) ++tmp; + if (*tmp != ' ' || tmp == pattern + 5) + goto exact_pattern_matching; // Invalid format + + PssmQuery pq(this, std::log(thr)); + //std::cerr << "Pattern: " << tmp+1 << ", log(threshold) = " << std::log(thr) << std::endl; + return pq.align(tmp+1, 0); + } + /************************************************************************* * Exact pattern matching */ -- 2.17.1