X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=RLCSAWrapper.h;h=c92bdb984eff9127566778cbba52facb17881c03;hb=c5698f4985dacd540ce715cb1ed90037f63d9b5e;hp=8fd1d68c48581f217158ad9781d6e27836666c1b;hpb=4ea6ddc07bb8223bb8f56008121b519fa6c75438;p=SXSI%2FTextCollection.git diff --git a/RLCSAWrapper.h b/RLCSAWrapper.h index 8fd1d68..c92bdb9 100644 --- a/RLCSAWrapper.h +++ b/RLCSAWrapper.h @@ -24,6 +24,7 @@ #include "TextCollection.h" #include "IndelQuery.h" +#include "PssmQuery.h" #include "incbwt/rlcsa.h" @@ -38,6 +39,7 @@ #include #include #include +#include namespace SXSI { @@ -165,10 +167,32 @@ public: goto exact_pattern_matching; // Invalid format IndelQuery iq(this); - //std::cerr << "Pattern: " << tmp+1 << ", k = " << k << std::endl; +// std::cerr << "RLCSAWrapper::Contains(): Pattern: " << tmp+1 << ", k = " << k << std::endl; return iq.align(tmp+1, k); } + /************************************************************************* + * Position Specific Scoring Matrix (PSSM) matching + * See PssmQuery.h for usage information. + */ + if (strncmp((char const *)pattern, "PSSM ", 4) == 0) + { + // Parse threshold + double thr = std::atof((char const *)pattern + 5); + if (thr <= 0) + goto exact_pattern_matching; // Invalid format + + // Find the start of the pattern (i.e. the second ' ') + uchar const * tmp = pattern + 5; + while (*tmp != ' ' && *tmp != 0) ++tmp; + if (*tmp != ' ' || tmp == pattern + 5) + goto exact_pattern_matching; // Invalid format + + PssmQuery pq(this, std::log(thr)); + //std::cerr << "Pattern: " << tmp+1 << ", log(threshold) = " << std::log(thr) << std::endl; + return pq.align(tmp+1, 0); + } + /************************************************************************* * Exact pattern matching */ @@ -202,7 +226,10 @@ public: // Index from/to disk RLCSAWrapper(FILE *file, char const *filename) : rlcsa(new CSA::RLCSA(std::string(filename))) - { /* NOP */ } + { + // Init the edit distance look-up tables + MyersEditDistanceIncremental::initMyersFourRussians(); + } void Save(FILE *file, char const *filename) const { @@ -210,6 +237,7 @@ public: // Saving type info: if (std::fwrite(&type, 1, 1, file) != 1) throw std::runtime_error("RLCSAWrapper::Save(): file write error (type flag)."); + fflush(file); this->rlcsa->writeTo(std::string(filename)); }