4 * Aligment using suffix filters.
10 #include "ResultSet.h"
11 #include "EditDistance.h"
21 class IndelQuery : public Query
24 IndelQuery(TextCollection const *tc_)
25 : Query(tc_), checked(0), splitPos(0), ed(0)
27 checked = new ResultSet(tc->getLength());
32 delete checked; checked = 0;
33 delete [] splitPos; splitPos = 0;
43 uchar *reversepat = new uchar[patlen+1];
44 for (unsigned i = klimit+1; i > 1; --i)
46 if (debug) std::cout << "new try: " << splitPos[i] << ", split = " << i << std::endl;
48 for (unsigned j = 0; j < splitPos[i-1]; j++)
49 reversepat[j] = pat[splitPos[i-1]-j-1];
50 reversepat[splitPos[i-1]] = 0;
51 ed = new MyersEditDistanceIncremental(reversepat, splitPos[i-1], i-1);
61 if (debug) std::cout << "new try: " << splitPos[1] << ", split = 1" << std::endl;
62 ed = 0; // Not used, exact search only for the first block
68 void exactMatch(const unsigned split)
70 unsigned i = splitPos[split];
71 const unsigned border = splitPos[split-1];
72 if (debug) std::cerr << "Exact match: i = " << i << ", border = " << border << std::endl;
74 while (i != border && pushChar(pat[i-1]))
77 if (debug) std::cerr << "matched upto: i = " << i << std::endl;
86 while (!smin.empty()) smin.pop();
87 while (!smax.empty()) smax.pop();
92 void nextStep(const unsigned split)
94 // Maximum edit distance for the current filter is split-1
95 if (ed->dist() <= split-1) checkMatch();
97 if (ed->end()) return;
99 // ALPHABET has been defined in Query
100 for (const char *cp = ALPHABET; cp < ALPHABET + ALPHABET_SIZE; ++cp) {
102 if (!pushChar(c)) continue;
103 const unsigned colmin = ed->pushChar(c);
105 // Edit distance is for reversed pattern, compute prefix lenght:
106 const unsigned prefixl = ed->length - ed->getSuffixLength();
107 // How many errors are we allowing on a prefix of given length?
109 while (splitPos[i] < prefixl)
112 assert(i < klimit+1);
113 assert(split-1 > i-1);
114 const unsigned maxk = (split-1) - (i-1);
117 std::cerr << "c = " << c << ", split = " << split << ", maxk = " << maxk << ", colmin = " << colmin
118 << ", prefixl = " << prefixl << ", dist = " << ed->dist() << std::endl;
120 if (colmin <= maxk) nextStep(split);
122 //if (debug) std::cerr << "pop c = " << c << std::endl;
128 inline void checkMatch(void)
130 // if (debug) std::cerr << "min,max: " << smin.top() << ", " << smax.top() << std::endl;
131 ulong min = smin.top();
132 ulong max = smax.top();
133 MyersEditDistance med(patlen, klimit);
134 for (ulong i = min; i <= max; i++) {
135 if (!checked->get(i))
138 med.init((uchar const *)pat);
139 validateCandidate(med, i);
144 void validateCandidate(MyersEditDistance &med, ulong sapos)
146 uchar *candidate = tc->getSuffix(sapos, patlen+klimit);
147 unsigned l = strlen((char*)candidate);
154 std::pair<unsigned, unsigned> dist(0, patlen);
156 { // MyersEditDistance class does not support klimit == 0
158 while (i < patlen && pat[i] == candidate[i])
163 return; // There is at least one mismatch (and klimit == 0)
166 else // Using prefixDist() when klimit > 0
167 dist = med.prefixDist(candidate, l);
169 if (dist.first <= klimit)
175 inline void newMatch(ulong sapos)
177 TextCollection::DocId di = tc->getDoc(sapos);
182 void constructPartition(void)
185 splitPos = new unsigned[klimit+2];
195 unsigned psize = patlen/(klimit+2);
198 std::cerr << "error: First psize == 0, for patlen = " << patlen << ", klimit = " << klimit << std::endl;
202 splitPos[1] = 2*psize;
204 // Last klimit are of size:
205 psize = (patlen-splitPos[1])/klimit;
208 std::cerr << "error: psize == 0, for patlen = " << patlen << ", klimit = " << klimit << std::endl;
212 for (unsigned i = 2; i <= klimit; i++)
213 splitPos[i]=splitPos[i-1]+psize;
214 splitPos[klimit+1]=patlen;
216 if (splitPos[klimit] >= patlen)
218 std::cerr << "error: splitPos[" << klimit << "] == " << splitPos[klimit] << " >= patlen, for patlen = " << patlen << ", klimit = " << klimit << std::endl;
224 std::cerr << "splitpos: ";
225 for (unsigned i= 0;i<=klimit+1; i++)
226 std::cerr << splitPos[i] << ", ";
227 std::cerr << std::endl;
234 MyersEditDistanceIncremental *ed;
237 // No copy constructor or assignment
238 IndelQuery(IndelQuery const&);
239 IndelQuery& operator = (IndelQuery const&);
244 #endif // _IndelQuery_H_