From: nvalimak Date: Sun, 7 Nov 2010 12:23:57 +0000 (+0000) Subject: Added PSSM support for RLCSA X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2FTextCollection.git;a=commitdiff_plain;h=c1ec53b9fa4f6e03d63b1126861f83d5575eeec6 Added PSSM support for RLCSA git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@933 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/RLCSAWrapper.h b/RLCSAWrapper.h index 8fd1d68..44f7268 100644 --- a/RLCSAWrapper.h +++ b/RLCSAWrapper.h @@ -24,6 +24,7 @@ #include "TextCollection.h" #include "IndelQuery.h" +#include "PssmQuery.h" #include "incbwt/rlcsa.h" @@ -38,6 +39,7 @@ #include #include #include +#include namespace SXSI { @@ -169,6 +171,28 @@ public: return iq.align(tmp+1, k); } + /************************************************************************* + * Position Specific Scoring Matrix (PSSM) matching + * See PssmQuery.h for usage information. + */ + if (strncmp((char const *)pattern, "PSSM ", 4) == 0) + { + // Parse threshold + double thr = std::atof((char const *)pattern + 5); + if (thr <= 0) + goto exact_pattern_matching; // Invalid format + + // Find the start of the pattern (i.e. the second ' ') + uchar const * tmp = pattern + 5; + while (*tmp != ' ' && *tmp != 0) ++tmp; + if (*tmp != ' ' || tmp == pattern + 5) + goto exact_pattern_matching; // Invalid format + + PssmQuery pq(this, std::log(thr)); + //std::cerr << "Pattern: " << tmp+1 << ", log(threshold) = " << std::log(thr) << std::endl; + return pq.align(tmp+1, 0); + } + /************************************************************************* * Exact pattern matching */