Added PSSM support for RLCSA
authornvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Sun, 7 Nov 2010 12:23:57 +0000 (12:23 +0000)
committernvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Sun, 7 Nov 2010 12:23:57 +0000 (12:23 +0000)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@933 3cdefd35-fc62-479d-8e8d-bae585ffb9ca

RLCSAWrapper.h

index 8fd1d68..44f7268 100644 (file)
@@ -24,6 +24,7 @@
 
 #include "TextCollection.h"
 #include "IndelQuery.h"
+#include "PssmQuery.h"
 
 #include "incbwt/rlcsa.h"
 
@@ -38,6 +39,7 @@
 #include <stdexcept>
 #include <set>
 #include <string>
+#include <cmath>
 
 namespace SXSI 
 {
@@ -169,6 +171,28 @@ public:
             return iq.align(tmp+1, k);
         }
 
+        /*************************************************************************
+         * Position Specific Scoring Matrix (PSSM) matching
+         * See PssmQuery.h for usage information.
+         */
+        if (strncmp((char const *)pattern, "PSSM ", 4) == 0)
+        {
+            // Parse threshold
+            double thr = std::atof((char const *)pattern + 5);
+            if (thr <= 0)
+                goto exact_pattern_matching; // Invalid format
+            
+            // Find the start of the pattern (i.e. the second ' ')
+            uchar const * tmp = pattern + 5;
+            while (*tmp != ' ' && *tmp != 0) ++tmp;
+            if (*tmp != ' ' || tmp == pattern + 5)
+                goto exact_pattern_matching; // Invalid format
+            
+            PssmQuery pq(this, std::log(thr));
+            //std::cerr << "Pattern: " << tmp+1 << ", log(threshold) = " << std::log(thr) << std::endl;
+            return pq.align(tmp+1, 0);
+        }
+
         /*************************************************************************
          * Exact pattern matching
          */