--- /dev/null
+#include "EditDistance.h"
+
+namespace SXSI
+{
+
+signed char MyersEditDistanceIncremental::myers_total[256 * 256];
+signed char MyersEditDistanceIncremental::myers_optimum[256 * 256];
+uchar MyersEditDistanceIncremental::myers_best_pos[256 * 256];
+
+}
--- /dev/null
+#ifndef _SXSI_EditDistance_h_
+#define _SXSI_EditDistance_h_
+
+/**
+ * TODO
+ * [ ] clean up code
+ */
+
+#include <utility>
+#include <cstring>
+#include "Tools.h"
+
+namespace SXSI
+{
+
+class EditDistance
+{
+public:
+ uchar const *pat;
+ unsigned patlen;
+ unsigned maxlen;
+ unsigned **dp;
+ unsigned p;
+
+ bool end()
+ {
+ if (p < maxlen)
+ return false;
+ return true;
+ }
+
+ EditDistance(uchar const *pattern, unsigned len, unsigned maxk) {
+ pat = pattern;
+ patlen = len;
+ maxlen = len + maxk;
+ p = 0;
+ dp = new unsigned*[patlen + 1];
+ for (unsigned i = 0; i <= patlen; i++) {
+ dp[i] = new unsigned[maxlen+1];
+ dp[i][0] = i;
+ }
+ }
+
+ ~EditDistance() {
+ for (unsigned i = 0; i <= patlen; i++)
+ delete [] dp[i];
+ delete [] dp;
+ }
+
+ std::pair<unsigned, unsigned> prefixDist(uchar const *cand, unsigned candlen) {
+ unsigned mindist = patlen+candlen;
+ unsigned minlen = patlen+candlen;
+ unsigned *col = new unsigned[patlen+1];
+ unsigned *ncol = new unsigned[patlen+1];
+ for (unsigned i = 0; i <= patlen; i++)
+ col[i] = i;
+ for (unsigned j = 1; j <= candlen; j++) {
+ ncol[0] = j;
+ for (unsigned i = 1; i <= patlen; i++) {
+ unsigned match = (cand[j-1] == pat[i-1])? col[i - 1] : (col[i - 1] + 1);
+ ncol[i] = std::min(col[i] + 1, ncol[i - 1] + 1);
+ ncol[i] = std::min(ncol[i],match);
+ }
+ if (ncol[patlen]<mindist)
+ {
+ mindist = ncol[patlen];
+ minlen = j;
+ }
+ for (unsigned i = 0; i <= patlen; i++)
+ col[i] = ncol[i];
+ }
+ delete [] col;
+ delete [] ncol;
+ return std::make_pair(mindist,minlen);
+ }
+
+
+ unsigned pushChar(uchar c) {
+ unsigned mindist = patlen;
+ p++;
+ if (p>maxlen)
+ std::cout << "out of bounds in DP computation, p = " << p << ", max = " << maxlen << std::endl;
+ dp[0][p] = p;
+ for (unsigned i = 1; i <= patlen; i++) {
+ unsigned match = (c == pat[i-1])? dp[i - 1][p-1] : (dp[i - 1][p-1] + 1);
+ dp[i][p] = std::min(dp[i][p-1] + 1, dp[i - 1][p] + 1);
+ dp[i][p] = std::min(dp[i][p],match);
+ if (dp[i][p]<mindist)
+ mindist = dp[i][p];
+ }
+ return mindist;
+ }
+
+ void popChar() {
+ --p;
+ return;
+ }
+
+ unsigned dist() {
+ return dp[patlen][p];
+ }
+
+ unsigned getSuffixLength()
+ {
+ unsigned mindist = dp[patlen][p];
+ unsigned minlen = patlen;
+ for (unsigned i = 0; i < patlen; ++i)
+ if (dp[i][p] < mindist)
+ {
+ mindist = dp[i][p];
+ minlen = i;
+ }
+ return minlen;
+ }
+
+};
+
+
+class MyersEditDistanceIncremental
+{
+ public:
+
+ MyersEditDistanceIncremental(const uchar *pattern, unsigned _length, unsigned _max_errors) :
+ length(_length), max_errors(_max_errors), min_row(~0u)
+ {
+ this->max_length = this->length + this->max_errors;
+ this->blocks = (this->length + W - 1) / W;
+ this->last_block_length = this->length % W;
+ if(this->last_block_length == 0) { this->last_block_length = W; }
+ this->last_block_mask = ~0lu >> (this->blocks * W - this->length);
+
+ this->initializeCharVectors(pattern);
+ this->initializePlusMinusVectors();
+ }
+
+ ~MyersEditDistanceIncremental()
+ {
+ for(unsigned c = 0; c < 256; c++)
+ {
+ delete[] this->char_vectors[c]; this->char_vectors[c] = 0;
+ }
+ delete[] this->char_vectors; this->char_vectors = 0;
+
+ for(unsigned i = 0; i <= this->max_length; i++)
+ {
+ delete[] this->plus_vectors[i]; this->plus_vectors[i] = 0;
+ delete[] this->minus_vectors[i]; this->minus_vectors[i] = 0;
+ }
+ delete[] this->plus_vectors; this->plus_vectors = 0;
+ delete[] this->minus_vectors; this->minus_vectors = 0;
+
+ delete[] this->score; this->score = 0;
+ }
+
+ unsigned pushChar(unsigned c)
+ {
+ if(c >= 256)
+ {
+ std::cerr << "MyersEditDistance: Invalid character value " << c << "!" << std::endl;
+ return ~0u;
+ }
+ if(this->pos >= this->max_length)
+ {
+ std::cerr << "MyersEditDistance: Cannot pushChar(c) past limit!" << std::endl;
+ std::cerr << "MyersEditDistance: Current text position " << this-> pos << ", maximum " << this->max_length << std::endl;
+ return ~0u;
+ }
+
+ this->pos++;
+ ulong c_vec, diagonal, h_plus = 0, h_minus = 0, plus, minus, temp;
+ ulong d_overflow = 0, hp_overflow = 1, hm_overflow = 0;
+
+ /*
+ Update rules from http://www.cs.uta.fi/~helmu/pubs/phd.pdf (Figure 25).
+ */
+ for(unsigned i = 0; i < this->blocks; i++)
+ {
+ c_vec = this->char_vectors[c][i];
+ plus = this->plus_vectors[this->pos - 1][i];
+ minus = this->minus_vectors[this->pos - 1][i];
+
+ diagonal = c_vec & plus;
+ temp = diagonal + plus + d_overflow;
+ d_overflow = ((temp < diagonal) || (temp == diagonal && d_overflow == 1) ? 1 : 0);
+ diagonal = (temp ^ plus) | c_vec | minus;
+
+ h_plus = minus | ~(diagonal | plus);
+ h_minus = diagonal & plus;
+
+ this->plus_vectors[this->pos][i] =
+ (h_minus << 1) | hm_overflow | ~(diagonal | (h_plus << 1) | hp_overflow);
+ this->minus_vectors[this->pos][i] = diagonal & ((h_plus << 1) | hp_overflow);
+
+ hp_overflow = (h_plus & HIGH_BIT ? 1 : 0);
+ hm_overflow = (h_minus & HIGH_BIT ? 1 : 0);
+ }
+ this->plus_vectors[this->pos][this->blocks - 1] &= this->last_block_mask;
+ this->minus_vectors[this->pos][this->blocks - 1] &= this->last_block_mask;
+
+ this->score[this->pos] = this->score[this->pos - 1];
+ if(h_plus & (1lu << (this->length % W - 1)))
+ {
+ this->score[this->pos]++;
+ }
+ else if(h_minus & (1lu << (this->length % W - 1)))
+ {
+ this->score[this->pos]--;
+ }
+
+ // Find the minimal value in the current column.
+ int minimum = this->score[this->pos], cur = minimum;
+ min_row = this->length;
+ for(unsigned i = this->blocks; i > 0;)
+ {
+ i--;
+ plus = this->plus_vectors[this->pos][i];
+ minus = this->minus_vectors[this->pos][i];
+
+ for(unsigned j = W; j > 0;)
+ {
+ j -= CHAR_BIT;
+ unsigned a = (plus >> j) & 0xFF, b = (minus >> j) & 0xFF;
+ if (minimum > cur + myers_optimum[256 * a + b])
+ {
+ minimum = cur + myers_optimum[256 * a + b];
+ min_row = i*W + j + myers_best_pos[256 * a + b];
+ }
+ cur += myers_total[256 * a + b];
+ }
+ }
+
+ return minimum;
+ }
+
+ unsigned dist() {
+ return this->score[this->pos];
+ }
+
+ void popChar()
+ {
+ if(this->pos == 0)
+ {
+ std::cerr << "MyersEditDistance: Cannot popChar() from text position 0!" << std::endl;
+ return;
+ }
+
+ this->pos--;
+ }
+
+ inline unsigned getSuffixLength()
+ {
+ return min_row;
+ }
+
+/*
+ // FIXME implement
+ std::pair<unsigned, unsigned> prefixDist(uchar const *text, unsigned n)
+*/
+
+ bool end()
+ {
+ if (pos < max_length)
+ return false;
+ return true;
+ }
+
+static void initMyersFourRussians()
+{
+// myers_total = new signed char[256 * 256];
+// myers_optimum = new signed char[256 * 256];
+// myers_best_pos = new uchar[256 * 256];
+
+ unsigned bp = 8;
+ for(unsigned i = 0; i < 256; i++)
+ {
+ for(unsigned j = 0; j < 256; j++)
+ {
+ int cur = 0, best = 0;
+ for(unsigned k = CHAR_BIT; k > 0;)
+ {
+ k--;
+ if(i & (1 << k)) { cur--; }
+ if(j & (1 << k)) { cur++; }
+ if(cur < best)
+ {
+ best = cur;
+ bp = k;
+ }
+ }
+
+ myers_total[256 * i + j] = cur;
+ myers_optimum[256 * i + j] = best;
+ myers_best_pos[256 * i + j] = bp;
+ }
+ }
+}
+
+ unsigned length;
+private:
+ static signed char myers_total[];
+ static signed char myers_optimum[];
+ static uchar myers_best_pos[];
+
+
+ const static ulong HIGH_BIT = 1lu << (W - 1);
+
+ ulong **char_vectors;
+ unsigned max_errors;
+ unsigned max_length;
+ unsigned min_row;
+
+ unsigned blocks; // Number of blocks required for the pattern
+ unsigned last_block_length; // Number of bits in the final block
+ ulong last_block_mask;
+
+ ulong **plus_vectors;
+ ulong **minus_vectors;
+ unsigned* score;
+
+ unsigned pos; // We have matched pos characters of text
+
+
+ void initializeCharVectors(uchar const *pattern)
+ {
+ this->char_vectors = new ulong*[256];
+ for(unsigned c = 0; c < 256; c++)
+ {
+ this->char_vectors[c] = new ulong[this->blocks];
+ std::memset(this->char_vectors[c], 0, this->blocks * sizeof(ulong));
+ }
+
+ for(unsigned i = 0; i < this->length; i++)
+ {
+ this->char_vectors[pattern[i]][i / W] |= (1lu << (i % W));
+ }
+ }
+
+ void initializePlusMinusVectors()
+ {
+ this->plus_vectors = new ulong*[this->max_length + 1];
+ this->minus_vectors = new ulong*[this->max_length + 1];
+ this->score = new unsigned[this->max_length + 1];
+
+ for(unsigned i = 0; i <= this->max_length; i++)
+ {
+ this->plus_vectors[i] = new ulong[this->blocks];
+ this->minus_vectors[i] = new ulong[this->blocks];
+ }
+
+ this->initialStep();
+ }
+
+ void initialStep()
+ {
+ for(unsigned i = 0; i < this->blocks; i++)
+ {
+ this->plus_vectors[0][i] = ~0lu;
+ this->minus_vectors[0][i] = 0;
+ }
+ this->plus_vectors[0][this->blocks - 1] = this->last_block_mask;
+
+ this->score[0] = this->length;
+ this->pos = 0;
+ }
+
+
+
+};
+
+
+
+
+ class MyersEditDistance
+ {
+ public:
+ ulong **Peq;
+ bool S[256];
+ unsigned k;
+ unsigned m;
+ unsigned blockCount;
+ unsigned block;
+ unsigned lastBlockLength;
+ ulong *Pv;
+ ulong *Mv;
+ ulong *Score;
+ ulong twopowmpos;
+ ulong twopowwpos;
+
+ void init(uchar const *P)
+ {
+ ulong i;
+ ulong j;
+ ulong l;
+ for(i = 0;i< 256;i++) {
+ S[i] = false;
+ for (l=0;l <= ((m-1) / W); l++)
+ Peq[l][i] = 0;
+ }
+ for (l=0;l <= ((m-1) / W); l++)
+ for (j = 1; j <= W; j++) {
+ if (W*l+j > m)
+ break;
+ Peq[l][P[W*l+j-1]] = Peq[l][P[W*l+j-1]] | (1lu << (j-1));
+ if (j+W*l <= k+1)
+ S[(unsigned)P[W*l+j-1]] = true;
+ }
+ for (l=0; l < blockCount; l++) {
+ Mv[l] = 0lu;
+ Score[l] = (l+1lu)*W;
+ Pv[l] = ~0lu;
+ }
+ Mv[blockCount] = 0;
+ Score[blockCount] = m;
+ Pv[blockCount] = (~0lu) >> ((blockCount+1lu)*W - m);
+ }
+
+ MyersEditDistance(unsigned _m, unsigned maxk)
+ : Peq(0), k(maxk), m(_m)
+ {
+ ulong l;
+ Peq = new ulong *[(m-1) / W + 1]; // (unsigned **)malloc((((m-1) / w)+1)*sizeof(unsigned*));
+ for (l=0;l <= ((m-1) / W); l++)
+ Peq[l] = new ulong[256]; //(unsigned *)malloc(256*sizeof(unsigned));
+
+ blockCount= ((m-1) / W);
+ block = (k-1) / W;
+ lastBlockLength = m % W;
+ if (lastBlockLength == 0)
+ lastBlockLength = W;
+ Pv = new ulong[blockCount+1]; // (unsigned *)malloc((blockCount+1)*sizeof(unsigned));
+ Mv = new ulong[blockCount+1]; // (unsigned *)malloc((blockCount+1)*sizeof(unsigned));
+ Score = new ulong[blockCount+1];//(unsigned *)malloc((blockCount+1)*sizeof(unsigned));
+ twopowmpos = 1lu << (lastBlockLength-1);
+ twopowwpos = 1lu << (W-1);
+ }
+
+ ~MyersEditDistance()
+ {
+ delete [] Pv; Pv = 0;
+ delete [] Mv; Mv = 0;
+ delete [] Score; Score = 0;
+ unsigned l;
+ for (l=0;l <= ((m-1) / W); l++)
+ {
+ delete [] Peq[l];
+ Peq[l] = 0;
+ }
+ delete [] Peq; Peq = 0;
+ }
+
+ std::pair<unsigned, unsigned> prefixDist(uchar const *text, unsigned n)
+ {
+ unsigned mindist = m+n;
+ unsigned minlen = m+n;
+ // int count=0;
+ //int blockAverage=0;
+ ulong Eq, Xv, Xh, Ph, Mh;
+ ulong Phendbit;
+ ulong Mhendbit;
+ ulong temp;
+ ulong j;
+ ulong l;
+ ulong overk=1;
+ block = (k-1) / W;
+ for(j = 1; j <= n; j++) {
+ Phendbit = 1lu; //0;
+ Mhendbit = 0lu;
+ for (l=0; l <= blockCount; l++) {
+ Eq = Peq[l][(unsigned char)text[j-1]];
+ Xv = Eq | Mv[l];
+ Xh = ((((Eq | Mhendbit)& Pv[l]) + Pv[l]) ^ Pv[l]) | (Eq | Mhendbit);
+ Ph = Mv[l] | ~ (Xh | Pv[l]);
+ Mh = Pv[l] & Xh;
+ temp = l < blockCount ? twopowwpos : twopowmpos;
+ if (Ph & temp)
+ Score[l] += 1;
+ else if (Mh & temp)
+ Score[l] -= 1;
+ temp = (Ph >> (W-1));
+ Ph <<= 1;
+ Ph |= Phendbit;
+ Phendbit = temp;
+ temp = (Mh >> (W-1));
+ Mh <<= 1;
+ Mh |= Mhendbit;
+ Mhendbit = temp;
+ Pv[l] = (Mh) | ~ (Xv | Ph);
+ Mv[l] = Ph & Xv;
+ if (block == l+1 &&
+ ((block == blockCount &&
+ (Score[l] > k+lastBlockLength ||
+ (Score[l] > k && overk != 0 && j - overk >= lastBlockLength))) ||
+ (block != blockCount &&
+ (Score[l] > k+W ||
+ (Score[l] > k && overk != 0 && j - overk >= W))))) {
+ // lohkon Score kasvanut niin isoksi, ettei seuraavaa kannata laskea
+ overk = 0;
+ block = l;
+ break;
+ }
+ else if (block == l+1 && overk == 0 && Score[l] > k)
+ // Talletetaan ensimmäinen diagonaali jolla Score > k. Näin tiedetään
+ // jatkossa koska seuraavan lohkon laskeminen on turhaa.
+ overk = j;
+ else if (block == l+1 && Score[l] <= k)
+ // Diagonaalilla Score <= k ==> Nollataan muuttuja overk.
+ overk = 0;
+ else if (block == l && block != blockCount && Score[l] <= k) {
+ // Score <= k ==> jatketaan seuraavaan lohkoon. Koska seuraavaa lohkoa
+ // ei ole laskettu edellisellä sarakkeella, pitää sen arvot päivittää.
+ Score[l+1] = k+1lu;
+ Pv[l+1] = 0lu;
+ Mv[l+1] = 0lu;
+ overk = 0lu;
+ block = l+1;
+ }
+ else if (block == l)
+ // Seuraavaan lohkoon ei kannata siirtyä, kun Score > k.
+ break;
+ }
+// blockAverage += block+1;
+ // if (block == blockCount && Score[blockCount] <= k) {
+ if (block == blockCount && Score[blockCount] <= mindist)
+ {
+ mindist = Score[blockCount];
+ minlen = j;
+
+ //printf("Min %u at len %u\n", mindist, minlen);
+ // count++;
+ }
+ }
+// return count;
+ return std::make_pair(mindist,minlen);
+ /*ShowMessage("Esiintymiä löytyi " + IntToStr(laskuri) + "kpl. " +
+ "Aikaa meni " + IntToStr(msec) + "msec");
+ ShowMessage("Keskimääräinen lohkojen määrä " +
+ FormatFloat("0.00",(float)blockAverage / n));*/
+ }
+
+};
+
+
+} // Namespace
+
+#endif
--- /dev/null
+/**
+ * k-errors queries
+ *
+ * Aligment using suffix filters.
+ */
+
+#ifndef _IndelQuery_H_
+#define _IndelQuery_H_
+
+#include "ResultSet.h"
+#include "EditDistance.h"
+#include "Query.h"
+
+#include <string>
+#include <cstdlib>
+#include <cstring>
+
+namespace SXSI
+{
+
+class IndelQuery : public Query
+{
+public:
+ IndelQuery(TextCollection const *tc_)
+ : Query(tc_), checked(0), splitPos(0), ed(0)
+ {
+ checked = new ResultSet(tc->getLength());
+ }
+
+ virtual ~IndelQuery()
+ {
+ delete checked; checked = 0;
+ delete [] splitPos; splitPos = 0;
+ delete ed; ed = 0;
+ }
+
+protected:
+ void firstStep(void)
+ {
+ checked->clear();
+ constructPartition();
+
+ uchar *reversepat = new uchar[patlen+1];
+ for (unsigned i = klimit+1; i > 1; --i)
+ {
+ if (debug) std::cout << "new try: " << splitPos[i] << ", split = " << i << std::endl;
+
+ for (unsigned j = 0; j < splitPos[i-1]; j++)
+ reversepat[j] = pat[splitPos[i-1]-j-1];
+ reversepat[splitPos[i-1]] = 0;
+ ed = new MyersEditDistanceIncremental(reversepat, splitPos[i-1], i-1);
+
+ exactMatch(i);
+
+ delete ed;
+ ed = 0;
+ }
+ delete [] reversepat;
+ reversepat = 0;
+
+ if (debug) std::cout << "new try: " << splitPos[1] << ", split = 1" << std::endl;
+ ed = 0; // Not used, exact search only for the first block
+ exactMatch(1);
+ }
+
+private:
+
+ void exactMatch(const unsigned split)
+ {
+ unsigned i = splitPos[split];
+ const unsigned border = splitPos[split-1];
+ if (debug) std::cerr << "Exact match: i = " << i << ", border = " << border << std::endl;
+
+ while (i != border && pushChar(pat[i-1]))
+ --i;
+
+ if (debug) std::cerr << "matched upto: i = " << i << std::endl;
+ if (i == 0)
+ checkMatch();
+ else
+ if (i == border)
+ nextStep(split);
+
+ // Rewind stack
+ match.clear();
+ while (!smin.empty()) smin.pop();
+ while (!smax.empty()) smax.pop();
+ smin.push(0);
+ smax.push(textlen-1);
+ }
+
+ void nextStep(const unsigned split)
+ {
+ // Maximum edit distance for the current filter is split-1
+ if (ed->dist() <= split-1) checkMatch();
+
+ if (ed->end()) return;
+
+ // ALPHABET has been defined in Query
+ for (const char *cp = ALPHABET; cp < ALPHABET + ALPHABET_SIZE; ++cp) {
+ char c = *cp;
+ if (!pushChar(c)) continue;
+ const unsigned colmin = ed->pushChar(c);
+
+ // Edit distance is for reversed pattern, compute prefix lenght:
+ const unsigned prefixl = ed->length - ed->getSuffixLength();
+ // How many errors are we allowing on a prefix of given length?
+ unsigned i = 1;
+ while (splitPos[i] < prefixl)
+ ++i;
+
+ assert(i < klimit+1);
+ assert(split-1 > i-1);
+ const unsigned maxk = (split-1) - (i-1);
+
+ if (debug)
+ std::cerr << "c = " << c << ", split = " << split << ", maxk = " << maxk << ", colmin = " << colmin
+ << ", prefixl = " << prefixl << ", dist = " << ed->dist() << std::endl;
+
+ if (colmin <= maxk) nextStep(split);
+
+ //if (debug) std::cerr << "pop c = " << c << std::endl;
+ ed->popChar();
+ popChar();
+ }
+ }
+
+ inline void checkMatch(void)
+ {
+// if (debug) std::cerr << "min,max: " << smin.top() << ", " << smax.top() << std::endl;
+ ulong min = smin.top();
+ ulong max = smax.top();
+ MyersEditDistance med(patlen, klimit);
+ for (ulong i = min; i <= max; i++) {
+ if (!checked->get(i))
+ {
+ checked->set(i);
+ med.init((uchar const *)pat);
+ validateCandidate(med, i);
+ }
+ }
+ }
+
+ void validateCandidate(MyersEditDistance &med, ulong sapos)
+ {
+ uchar *candidate = tc->getSuffix(sapos, patlen+klimit);
+ unsigned l = strlen((char*)candidate);
+ if (l < patlen)
+ {
+ delete [] candidate;
+ return;
+ }
+
+ std::pair<unsigned, unsigned> dist(0, patlen);
+ if (klimit == 0)
+ { // MyersEditDistance class does not support klimit == 0
+ unsigned i = 0;
+ while (i < patlen && pat[i] == candidate[i])
+ ++i;
+ if (i < patlen)
+ {
+ delete [] candidate;
+ return; // There is at least one mismatch (and klimit == 0)
+ }
+ }
+ else // Using prefixDist() when klimit > 0
+ dist = med.prefixDist(candidate, l);
+
+ if (dist.first <= klimit)
+ newMatch(sapos);
+ delete [] candidate;
+ }
+
+
+ inline void newMatch(ulong sapos)
+ {
+ TextCollection::DocId di = tc->getDoc(sapos);
+ result.insert(di);
+ }
+
+
+ void constructPartition(void)
+ {
+ delete [] splitPos;
+ splitPos = new unsigned[klimit+2];
+
+ if (klimit == 0)
+ {
+ splitPos[0] = 0;
+ splitPos[1]=patlen;
+ return;
+ }
+
+ // First piece size
+ unsigned psize = patlen/(klimit+2);
+ if (psize == 0)
+ {
+ std::cerr << "error: First psize == 0, for patlen = " << patlen << ", klimit = " << klimit << std::endl;
+ std::exit(1);
+ }
+ splitPos[0] = 0;
+ splitPos[1] = 2*psize;
+
+ // Last klimit are of size:
+ psize = (patlen-splitPos[1])/klimit;
+ if (psize == 0)
+ {
+ std::cerr << "error: psize == 0, for patlen = " << patlen << ", klimit = " << klimit << std::endl;
+ std::exit(1);
+ }
+
+ for (unsigned i = 2; i <= klimit; i++)
+ splitPos[i]=splitPos[i-1]+psize;
+ splitPos[klimit+1]=patlen;
+
+ if (splitPos[klimit] >= patlen)
+ {
+ std::cerr << "error: splitPos[" << klimit << "] == " << splitPos[klimit] << " >= patlen, for patlen = " << patlen << ", klimit = " << klimit << std::endl;
+ std::exit(1);
+ }
+
+ if (debug)
+ {
+ std::cerr << "splitpos: ";
+ for (unsigned i= 0;i<=klimit+1; i++)
+ std::cerr << splitPos[i] << ", ";
+ std::cerr << std::endl;
+ }
+ }
+
+
+ ResultSet *checked;
+ unsigned *splitPos;
+ MyersEditDistanceIncremental *ed;
+
+ IndelQuery();
+ // No copy constructor or assignment
+ IndelQuery(IndelQuery const&);
+ IndelQuery& operator = (IndelQuery const&);
+};
+
+} // namespace
+
+#endif // _IndelQuery_H_
--- /dev/null
+#include "Query.h"
+#include <cstring>
+
+namespace SXSI
+{
+
+const char Query::ALPHABET_DNA[] = {'A', 'C', 'G', 'T', 'N'};
+
+
+Query::Query(TextCollection const *tc_)
+ : pat(0), patlen(0), tc(tc_), debug(false), klimit(0),
+ textlen(tc->getLength()), ALPHABET(ALPHABET_DNA)
+{ }
+
+
+/*void reverse()
+{
+ uchar c;
+ for (int i = 0; i < patlen / 2; ++i) {
+ c = pat[i];
+ pat[i] = pat[patlen - i - 1];
+ pat[patlen - i - 1] = c;
+ }
+}
+
+void revcmp()
+{
+ reverse();
+ for (int i = 0; i < patlen; ++i)
+ switch (pat[i])
+ {
+ case('A'): pat[i] = 'T'; break;
+ case('C'): pat[i] = 'G'; break;
+ case('G'): pat[i] = 'C'; break;
+ case('T'): pat[i] = 'A'; break;
+ case('a'): pat[i] = 't'; break;
+ case('c'): pat[i] = 'g'; break;
+ case('g'): pat[i] = 'c'; break;
+ case('a'): pat[i] = 't'; break;
+ case('N'): pat[i] = 'N'; break;
+ case('n'): pat[i] = 'n'; break;
+ default:
+ std::cerr << "Query::align(): invalid alphabet ("<< pat[i] << ") given!" << std::endl;
+ std::exit(1);
+ break;
+ }
+ }*/
+
+TextCollection::document_result Query::align(uchar const *p, unsigned k)
+{
+ result.clear();
+ this->pat = p;
+ this->patlen = std::strlen((char const *)p);
+ this->klimit = k;
+
+ while (!smin.empty()) smin.pop();
+ while (!smax.empty()) smax.pop();
+ smin.push(0);
+ smax.push(textlen-1);
+ match.clear();
+
+ firstStep();
+
+ // Search reverse complemented?
+/* {
+ revcmp();
+ this->pat = p->c_str();
+ while (!smin.empty()) smin.pop();
+ while (!smax.empty()) smax.pop();
+ smin.push(0);
+ smax.push(textlen-1);
+ match.clear();
+
+ firstStep();
+
+ revcmp(); // Restore orig string
+ }*/
+
+ TextCollection::document_result dr(result.begin(), result.end());
+ return dr;
+}
+
+} // namespace
--- /dev/null
+/**
+ * Interface for alignment queries
+ */
+
+#ifndef _Query_H_
+#define _Query_H_
+
+#include "TextCollection.h"
+
+#include <stack>
+#include <vector>
+#include <set>
+
+namespace SXSI
+{
+
+/**
+ * Base class for queries
+ */
+class Query
+{
+public:
+ TextCollection::document_result align(uchar const *p, unsigned k);
+
+ void setDebug(bool b)
+ { this->debug = b; }
+
+ virtual ~Query()
+ { }
+
+protected:
+ virtual void firstStep() = 0;
+
+ inline bool pushChar(char c) {
+ ulong nmin = tc->LF(c, smin.top()-1);
+ ulong nmax = tc->LF(c, smax.top())-1;
+ if (nmin > nmax) return false;
+ smin.push(nmin);
+ smax.push(nmax);
+ match.push_back(c);
+ return true;
+ }
+
+ inline void popChar() {
+ smin.pop();
+ smax.pop();
+ match.pop_back();
+ }
+
+ inline int min(int a, int b) {
+ if (a < b) return a; else return b;
+ }
+
+ inline int min(int a, int b, int c) {
+ if (a < b) if (a < c) return a; else return c;
+ else if (b < c) return b; else return c;
+ }
+
+ Query(TextCollection const *tc_);
+
+ uchar const *pat;
+ unsigned patlen;
+ TextCollection const *tc;
+ bool debug;
+ unsigned klimit;
+ ulong textlen;
+
+ const char *ALPHABET;
+ static const unsigned ALPHABET_SIZE = 5;
+ static const char ALPHABET_DNA[];
+
+ std::stack<ulong> smin;
+ std::stack<ulong> smax;
+ std::vector<uchar> match;
+ std::set<TextCollection::DocId> result;
+
+private:
+ Query();
+ // No copy constructor or assignment
+ Query(Query const&);
+ Query& operator = (Query const&);
+};
+
+} // Namespace
+
+#endif // _Query_H_
#define _RLCSAWrapper_H_
#include "TextCollection.h"
+#include "IndelQuery.h"
#include "incbwt/rlcsa.h"
# define W 32
#endif
+#include <stdexcept>
#include <set>
#include <string>
public:
RLCSAWrapper(const CSA::RLCSA* index)
: rlcsa(index)
- { /* NOP */ }
+ {
+ // Init the edit distance look-up tables
+ MyersEditDistanceIncremental::initMyersFourRussians();
+ }
~RLCSAWrapper()
{
document_result Prefix(uchar const *) const { unsupported(); return document_result(); };
document_result Suffix(uchar const *) const { unsupported(); return document_result(); };
document_result Equal(uchar const *) const { unsupported(); return document_result(); };
+
document_result Contains(uchar const *pattern) const
- {
- //std::pair<TextPosition,TextPosition> p = backwards(pattern);
+ {
+ /*************************************************************************
+ * Approximate pattern matching
+ *
+ * Using suffix filters. Has to enumerate *all* approx. occurrences (sloooow...)
+ * instead of just returning the best occurrence (which is usually much faster).
+ *
+ * Query format: contains("APM 3 GATTACA")
+ * where
+ * "APM" is the keyword for approximate queries.
+ * "3" is the maximum edit distance allowed.
+ * "GATTACA" is the query word to be aligned.
+ */
+ if (strncmp((char const *)pattern, "APM ", 4) == 0)
+ {
+ // Edit distance allowed.
+ int k = std::atoi((char const *)pattern + 4);
+ if (k < 0 || k == INT_MAX || k == INT_MIN)
+ goto exact_pattern_matching; // Invalid format
+
+ // Find the start of the pattern (i.e. the second ' ')
+ uchar const * tmp = pattern + 4;
+ while (*tmp != ' ' && *tmp != 0) ++tmp;
+ if (*tmp != ' ' || tmp == pattern + 4)
+ goto exact_pattern_matching; // Invalid format
+
+ IndelQuery iq(this);
+ //std::cerr << "Pattern: " << tmp+1 << ", k = " << k << std::endl;
+ return iq.align(tmp+1, k);
+ }
+
+ /*************************************************************************
+ * Exact pattern matching
+ */
+ exact_pattern_matching:
CSA::pair_type p = rlcsa->count(std::string((char const *)pattern));
if (p.first > p.second)
return document_result();
this->rlcsa->writeTo(std::string(filename));
}
+ TextCollection::TextPosition getLength() const
+ {
+ return this->rlcsa->getSize() + this->rlcsa->getNumberOfSequences();
+ }
+
+ inline TextCollection::TextPosition LF(uchar c, TextPosition i) const
+ {
+ ++i;
+ if(i < this->rlcsa->getNumberOfSequences())
+ return rlcsa->C(c);
+ return rlcsa->LF(i - this->rlcsa->getNumberOfSequences(), c);
+ }
+
+ uchar* getSuffix(TextPosition pos, unsigned l) const
+ {
+ ++pos;
+ uchar* text = new uchar[l + 1];
+
+ if(l == 0 || pos < this->rlcsa->getNumberOfSequences())
+ {
+ text[0] = 0;
+ return text;
+ }
+ pos -= this->rlcsa->getNumberOfSequences();
+
+ unsigned n = rlcsa->displayFromPosition(pos, l, text);
+ text[n] = 0;
+ return text;
+ }
+
+ DocId getDoc(TextPosition i) const
+ {
+ if(i < this->rlcsa->getNumberOfSequences())
+ return i;
+
+ CSA::pair_type pt = rlcsa->getRelativePosition(this->rlcsa->locate(i - this->rlcsa->getNumberOfSequences()));
+ return pt.first;
+ }
+
private:
const CSA::RLCSA* rlcsa;
std::cerr << "\nFirst symb = " << (char)c << std::endl;
- TextPosition sp = rlcsa->C(c);
- TextPosition ep = rlcsa->C(c+1)-1;
+ TextPosition sp = LF(c, -1);
+ TextPosition ep = LF(c, getLength()-1) - 1;
printf("i = %lu, c = %c, sp = %lu, ep = %lu\n", i, pattern[i], sp, ep);
while (sp<=ep && i>=1)
{
c = (int)pattern[--i];
- sp = LF(c, sp);
- ep = LF(c, ep+1)-1;
+ sp = LF(c, sp-1);
+ ep = LF(c, ep)-1;
printf("new: c = %c, sp = %lu, ep = %lu\n", pattern[i], sp, ep);
}
- uchar* found = rlcsa->display(sp, std::strlen((char const *)pattern), 5, i);
- std::cerr << "found: " << found << " (" << i << std::endl;
-
return std::make_pair(sp, ep);
}*/
-
- inline TextCollection::TextPosition
- LF(uchar c, TextPosition i) const
- {
- if(i == (TextPosition)-1 || i < this->rlcsa->getNumberOfSequences())
- { return this->rlcsa->C(c); }
- return this->rlcsa->LF(i - this->rlcsa->getNumberOfSequences(), c);
- }
-
-
+
void unsupported() const
{
std::cerr << std::endl << "-------------------------------------------------------------\n"
--- /dev/null
+\r
+#include <iostream> \r
+#include "ResultSet.h"\r
+\r
+namespace SXSI\r
+{\r
+\r
+#define W (8*sizeof(ulong))\r
+\r
+#if __WORDSIZE == 64\r
+#define logW 6lu // 64bit system, code checks = lg(W)-1\r
+#else\r
+#define logW 5lu // 32bit system, code checks = lg(W)-1\r
+#endif\r
+\r
+#define divW(p) ((p)>>logW)\r
+#define modW(p) ((p)&(W-1))\r
+\r
+#define setBit(arr,pos) ((arr)[divW(pos)] |= 1lu<<modW(pos))\r
+#define clearBit(arr,pos) ((arr)[divW(pos)] &= ~(1lu<<modW(pos)))\r
+#define getBit(arr,pos) ((arr)[divW(pos)] & (1lu<<modW(pos))) // 0 or !=0\r
+\r
+\r
+ulong ResultSet::lg (ulong n)\r
+{ \r
+ ulong answ=0;\r
+ while (n) { n>>=1lu; answ++; }\r
+ return answ;\r
+}\r
+\r
+ResultSet::ResultSet(ulong n)\r
+{ \r
+ if (logW != lg(W)-1)\r
+ { \r
+ std::cerr << "Error, redefine logW as " << lg(W)-1 << " and recompile\n";\r
+// exit(1);\r
+ }\r
+ this->n = 2*n-1;\r
+ this->lgn = lg(n);\r
+ this->tree = new ulong[(this->n+W-1)/W];\r
+ if (!this->tree)\r
+ {\r
+ std::cerr << "Malloc failed at ResultSet class." << std::endl;\r
+ // exit(1);\r
+ }\r
+ clearBit(this->tree,0); // clear all\r
+}\r
+\r
+ResultSet::~ResultSet()\r
+{ \r
+ delete [] tree;\r
+}\r
+\r
+// to map 1..n to the bottom of the tree when n is not a power of 2\r
+ulong ResultSet::conv (ulong p, ulong n, ulong lgn)\r
+\r
+ { ulong t = n+1-(1lu<<lgn);\r
+ if (p < t) return p;\r
+ return (p<<1lu)-t;\r
+ }\r
+\r
+ulong ResultSet::unconv (ulong p, ulong n, ulong lgn)\r
+\r
+ { ulong t = n+1-(1lu<<lgn);\r
+ if (p < t) return p;\r
+ return (p+t)>>1lu;\r
+ }\r
+\r
+bool ResultSet::get (ulong p) // returns 0 or 1\r
+\r
+ { \r
+ ulong pos = 0;\r
+ ulong pot = this->lgn;\r
+ p = conv(p,n,pot);\r
+ do { if (!getBit(this->tree,pos)) return false;\r
+ pot--;\r
+ pos = (pos<<1lu)+1+(p>>pot);\r
+ p &= ~(1lu<<pot);\r
+ }\r
+ while (pos < n);\r
+ return true;\r
+ }\r
+\r
+void ResultSet::set (ulong p)\r
+\r
+ { \r
+ ulong pos = 0;\r
+ ulong npos;\r
+ ulong pot = this->lgn;\r
+ p = conv(p,n,pot);\r
+ do { npos = (pos<<1lu)+1;\r
+ if (!getBit(this->tree,pos))\r
+ { setBit(this->tree,pos);\r
+ if (npos < n) \r
+ { clearBit(this->tree,npos);\r
+ clearBit(this->tree,npos+1);\r
+ }\r
+ }\r
+ pot--;\r
+ pos = npos+(p>>pot);\r
+ p &= ~(1lu<<pot);\r
+ }\r
+ while (pos < n);\r
+ }\r
+\r
+ // returns final value of bit at pos\r
+ \r
+ulong ResultSet::clearRangeLeft (ulong p1, ulong n, ulong pos, ulong pot)\r
+ { ulong npos;\r
+ ulong bit;\r
+ if (!getBit(tree,pos)) return 0; // range already zeroed\r
+ p1 &= ~(1lu<<pot);\r
+ if (p1 == 0) // full range to clear\r
+ { clearBit(tree,pos); return 0; }\r
+ // p1 != 0, there must be children\r
+ pot--;\r
+ npos = (pos<<1lu)+1;\r
+ if ((p1>>pot) == 0) // go left, clear right\r
+ { clearBit(tree,npos+1);\r
+ bit = clearRangeLeft(p1,n,npos,pot);\r
+ } \r
+ else // go right\r
+ { bit = clearRangeLeft(p1,n,npos+1,pot);\r
+ if (!bit) bit = getBit(tree,npos);\r
+ }\r
+ if (!bit) clearBit(tree,pos);\r
+ return bit;\r
+ }\r
+\r
+ulong ResultSet::clearRangeRight (ulong p2, ulong n, ulong pos, ulong pot)\r
+\r
+ { ulong npos;\r
+ ulong bit;\r
+ if (!getBit(tree,pos)) return 0; // range already zeroed\r
+ p2 &= ~(1lu<<pot);\r
+ if (p2 == 0) return 1; // empty range to clear, and bit is 1 for sure\r
+ // p2 != 0, there must be children\r
+ pot--;\r
+ npos = (pos<<1lu)+1;\r
+ if ((p2>>pot) == 1) // go right, clear left\r
+ { clearBit(tree,npos);\r
+ bit = clearRangeRight(p2,n,npos+1,pot);\r
+ } \r
+ else // go left\r
+ { bit = clearRangeRight(p2,n,npos,pot);\r
+ if (!bit) bit = getBit(tree,npos+1);\r
+ }\r
+ if (!bit) clearBit(tree,pos);\r
+ return bit;\r
+ }\r
+\r
+ulong ResultSet::clearBoth (ulong n, ulong p1, ulong p2, ulong pos, ulong pot)\r
+\r
+ { ulong npos,npos1,npos2;\r
+ ulong bit;\r
+ if (!getBit(tree,pos)) return 0; // range is already zeroed\r
+ npos = (pos<<1lu)+1;\r
+ // children must exist while the path is unique, as p1<p2\r
+ pot--;\r
+ npos1 = npos+(p1>>pot);\r
+ npos2 = npos+(p2>>pot);\r
+ if (npos1 == npos2) // we're inside npos1=npos2\r
+ { bit = clearBoth (n,p1&~(1lu<<pot),p2&~(1lu<<pot),npos1,pot);\r
+ bit |= getBit(tree,npos+1-(p1>>pot)); // the other\r
+ }\r
+ else // p1 and p2 take different paths here\r
+ { bit = clearRangeLeft(p1,n,npos1,pot); \r
+ bit |= clearRangeRight(p2,n,npos2,pot);\r
+ }\r
+ if (!bit) clearBit(tree,pos);\r
+ return bit;\r
+ }\r
+\r
+void ResultSet::clearRange (ulong p1, ulong p2)\r
+\r
+ { if ((p2+1)<<1lu > this->n) \r
+ clearRangeLeft(conv(p1,this->n,this->lgn),this->n,0,this->lgn);\r
+ else clearBoth(n,conv(p1,n,this->lgn),conv(p2+1,n,lgn),0,lgn);\r
+ }\r
+\r
+ulong ResultSet::nextSmallest (ulong n, ulong pos, ulong pot)\r
+\r
+ { ulong p = 0;\r
+ while (1)\r
+ { pot--;\r
+ pos = (pos<<1lu)+1;\r
+ if (pos >= n) return p;\r
+ if (!getBit(tree,pos)) { pos++; p |= (1lu<<pot); }\r
+ }\r
+ }\r
+\r
+ulong ResultSet::nextLarger (ulong n, ulong p, ulong pos, ulong pot)\r
+\r
+ { ulong answ;\r
+ if (!getBit(tree,pos)) return ~0lu; // no answer\r
+ pot--;\r
+ pos = (pos<<1lu)+1;\r
+ if (pos >= n) return 0; // when n is not a power of 2, missing leaves\r
+ if ((p>>pot) == 0) // p goes left\r
+ { answ = nextLarger(n,p&~(1lu<<pot),pos,pot);\r
+ if (answ != ~0lu) return answ;\r
+ if (!getBit(tree,pos+1)) return ~0lu; // no answer\r
+ return (1lu<<pot) | nextSmallest(n,pos+1,pot);\r
+ }\r
+ else \r
+ { answ = nextLarger(n,p&~(1lu<<pot),pos+1,pot);\r
+ if (answ != ~0lu) return (1lu<<pot) | answ;\r
+ return ~0lu;\r
+ }\r
+ }\r
+\r
+\r
+void ResultSet::clear()\r
+{\r
+ clearBit(this->tree,0);\r
+}\r
+\r
+ulong ResultSet::nextResult (ulong p) // returns pos of next(p) or -1 if none\r
+\r
+ { ulong answ;\r
+ if (((p+1)<<1lu) > this->n) return ~0lu; // next(last), p+1 out of bounds\r
+ answ = nextLarger(this->n,conv(p+1,this->n,this->lgn),0,this->lgn);\r
+ if (answ == ~0lu) return ~0lu;\r
+ return unconv(answ,this->n,this->lgn);\r
+ }\r
+\r
+} // Namespace\r
--- /dev/null
+// Dynamic result set class
+// Originally by Gonzalo Navarro
+
+#ifndef _RESULTSET_H_
+#define _RESULTSET_H_
+
+namespace SXSI
+{
+
+#ifndef ulong
+typedef unsigned long ulong;
+#endif
+
+class ResultSet {
+private:
+ ulong n, lgn;
+ ulong *tree;
+ ulong lg(ulong);
+
+ ulong conv (ulong p, ulong n, ulong lgn);
+ ulong unconv (ulong p, ulong n, ulong lgn);
+ ulong clearRangeLeft (ulong p1, ulong n, ulong pos, ulong pot);
+ ulong clearRangeRight (ulong p2, ulong n, ulong pos, ulong pot);
+ ulong clearBoth (ulong n, ulong p1, ulong p2, ulong pos, ulong pot);
+
+ ulong nextSmallest (ulong n, ulong pos, ulong pot);
+ ulong nextLarger (ulong n, ulong p, ulong pos, ulong pot);
+
+ void prnspace (ulong k);
+public:
+ // creates empty results data structure for n nodes numbered 0..n-1
+ ResultSet(ulong n);
+ ~ResultSet();
+
+ // returns 0/1 telling whether result p is not/is present in R
+ bool get(ulong p);
+
+ // inserts result p into R
+ void set(ulong p);
+
+ // clears all results p1..p2 in R
+ void clearRange (ulong p1, ulong p2);
+ // clears all results
+ void clear();
+
+ // returns pos of next(p) in R, or -1 if none
+ ulong nextResult (ulong p);
+
+};
+
+}
+
+#endif // _RESULTSET_H_
virtual full_result FullKMismatches(uchar const *, unsigned) const = 0;
virtual full_result FullKErrors(uchar const *, unsigned) const = 0;
+
+ virtual TextPosition getLength() const
+ {
+ std::cerr << "TextCollection::getLength() is unsupported! Use RLCSA instead." << std::endl;
+ std::exit(2);
+ return 0;
+ }
+
+ virtual TextPosition LF(uchar c, TextPosition i) const
+ {
+ std::cerr << "TextCollection::LF() is unsupported! Use RLCSA instead." << std::endl;
+ std::exit(2);
+ return 0;
+ }
+
+ virtual uchar* getSuffix(TextPosition pos, unsigned l) const
+ {
+ std::cerr << "TextCollection::getSuffix() is unsupported! Use RLCSA instead." << std::endl;
+ std::exit(2);
+ return 0;
+ }
+
+ virtual DocId getDoc(TextPosition i) const
+ {
+ std::cerr << "TextCollection::getDoc() is unsupported! Use RLCSA instead." << std::endl;
+ std::exit(2);
+ return 0;
+ }
+
+
protected:
// Protected constructor; use TextCollectionBuilder
TextCollection() { };
BitRank.o: BitRank.cpp BitRank.h BlockArray.h Tools.h
+EditDistance.o: EditDistance.cpp EditDistance.h Tools.h
FMIndex.o: FMIndex.cpp FMIndex.h incbwt/bits/deltavector.h \
incbwt/bits/bitvector.h incbwt/bits/../misc/definitions.h \
incbwt/bits/bitbuffer.h BitRank.h BlockArray.h Tools.h TextCollection.h \
TextStorage.h ArrayDoc.h
FMIndexBuilder.o: FMIndexBuilder.cpp incbwt/rlcsa_builder.h \
- incbwt/rlcsa.h incbwt/bits/vectors.h incbwt/bits/deltavector.h \
- incbwt/bits/bitvector.h incbwt/bits/../misc/definitions.h \
- incbwt/bits/bitbuffer.h incbwt/bits/rlevector.h incbwt/sasamples.h \
- incbwt/misc/definitions.h incbwt/bits/bitbuffer.h \
- incbwt/bits/deltavector.h incbwt/lcpsamples.h incbwt/bits/array.h \
- incbwt/misc/utils.h incbwt/misc/definitions.h incbwt/misc/parameters.h \
- incbwt/bits/deltavector.h FMIndexBuilder.h TextCollectionBuilder.h \
- TextCollection.h Tools.h TextStorage.h FMIndex.h BitRank.h BlockArray.h \
- ArrayDoc.h
+ incbwt/rlcsa.h incbwt/bits/deltavector.h incbwt/bits/bitvector.h \
+ incbwt/bits/../misc/definitions.h incbwt/bits/bitbuffer.h \
+ incbwt/bits/rlevector.h incbwt/bits/nibblevector.h incbwt/sasamples.h \
+ incbwt/misc/definitions.h incbwt/bits/bitbuffer.h incbwt/lcpsamples.h \
+ incbwt/bits/array.h incbwt/misc/utils.h incbwt/misc/definitions.h \
+ incbwt/misc/parameters.h incbwt/bits/deltavector.h FMIndexBuilder.h \
+ TextCollectionBuilder.h TextCollection.h Tools.h TextStorage.h FMIndex.h \
+ BitRank.h BlockArray.h ArrayDoc.h
HeapProfiler.o: HeapProfiler.cpp HeapProfiler.h
+Query.o: Query.cpp Query.h TextCollection.h Tools.h
RLCSABuilder.o: RLCSABuilder.cpp incbwt/rlcsa_builder.h incbwt/rlcsa.h \
- incbwt/bits/vectors.h incbwt/bits/deltavector.h incbwt/bits/bitvector.h \
+ incbwt/bits/deltavector.h incbwt/bits/bitvector.h \
incbwt/bits/../misc/definitions.h incbwt/bits/bitbuffer.h \
- incbwt/bits/rlevector.h incbwt/sasamples.h incbwt/misc/definitions.h \
- incbwt/bits/bitbuffer.h incbwt/bits/deltavector.h incbwt/lcpsamples.h \
+ incbwt/bits/rlevector.h incbwt/bits/nibblevector.h incbwt/sasamples.h \
+ incbwt/misc/definitions.h incbwt/bits/bitbuffer.h incbwt/lcpsamples.h \
incbwt/bits/array.h incbwt/misc/utils.h incbwt/misc/definitions.h \
incbwt/misc/parameters.h RLCSABuilder.h TextCollectionBuilder.h \
TextCollection.h Tools.h TextStorage.h incbwt/bits/deltavector.h \
- RLCSAWrapper.h incbwt/rlcsa.h
+ RLCSAWrapper.h IndelQuery.h ResultSet.h EditDistance.h Query.h \
+ incbwt/rlcsa.h
+ResultSet.o: ResultSet.cpp ResultSet.h
TextCollection.o: TextCollection.cpp TextCollection.h Tools.h FMIndex.h \
incbwt/bits/deltavector.h incbwt/bits/bitvector.h \
incbwt/bits/../misc/definitions.h incbwt/bits/bitbuffer.h BitRank.h \
BlockArray.h TextStorage.h ArrayDoc.h SWCSAWrapper.h \
swcsa/utils/defValues.h swcsa/utils/valstring.h swcsa/interface.h \
- RLCSAWrapper.h incbwt/rlcsa.h incbwt/bits/vectors.h \
- incbwt/bits/deltavector.h incbwt/bits/rlevector.h incbwt/sasamples.h \
- incbwt/misc/definitions.h incbwt/bits/bitbuffer.h \
- incbwt/bits/deltavector.h incbwt/lcpsamples.h incbwt/bits/array.h \
+ RLCSAWrapper.h IndelQuery.h ResultSet.h EditDistance.h Query.h \
+ incbwt/rlcsa.h incbwt/bits/deltavector.h incbwt/bits/rlevector.h \
+ incbwt/bits/nibblevector.h incbwt/sasamples.h incbwt/misc/definitions.h \
+ incbwt/bits/bitbuffer.h incbwt/lcpsamples.h incbwt/bits/array.h \
incbwt/misc/utils.h incbwt/misc/definitions.h incbwt/misc/parameters.h
TextCollectionBuilder.o: TextCollectionBuilder.cpp \
TextCollectionBuilder.h TextCollection.h Tools.h TextStorage.h \
build_plcp.o: build_plcp.cpp rlcsa.h bits/deltavector.h bits/bitvector.h \
bits/../misc/definitions.h bits/bitbuffer.h bits/rlevector.h \
bits/nibblevector.h sasamples.h misc/definitions.h bits/bitbuffer.h \
- lcpsamples.h bits/array.h bits/vectors.h misc/utils.h misc/definitions.h \
+ lcpsamples.h bits/array.h misc/utils.h misc/definitions.h \
misc/parameters.h
build_rlcsa.o: build_rlcsa.cpp rlcsa_builder.h rlcsa.h bits/deltavector.h \
bits/bitvector.h bits/../misc/definitions.h bits/bitbuffer.h \
bits/rlevector.h bits/nibblevector.h sasamples.h misc/definitions.h \
- bits/bitbuffer.h lcpsamples.h bits/array.h bits/vectors.h misc/utils.h \
+ bits/bitbuffer.h lcpsamples.h bits/array.h misc/utils.h \
misc/definitions.h misc/parameters.h
display_test.o: display_test.cpp rlcsa.h bits/deltavector.h \
bits/bitvector.h bits/../misc/definitions.h bits/bitbuffer.h \
bits/rlevector.h bits/nibblevector.h sasamples.h misc/definitions.h \
- bits/bitbuffer.h lcpsamples.h bits/array.h bits/vectors.h misc/utils.h \
+ bits/bitbuffer.h lcpsamples.h bits/array.h misc/utils.h \
misc/definitions.h misc/parameters.h
extract_sequence.o: extract_sequence.cpp rlcsa.h bits/deltavector.h \
bits/bitvector.h bits/../misc/definitions.h bits/bitbuffer.h \
bits/rlevector.h bits/nibblevector.h sasamples.h misc/definitions.h \
- bits/bitbuffer.h lcpsamples.h bits/array.h bits/vectors.h misc/utils.h \
+ bits/bitbuffer.h lcpsamples.h bits/array.h misc/utils.h \
misc/definitions.h misc/parameters.h
lcp_test.o: lcp_test.cpp rlcsa.h bits/deltavector.h bits/bitvector.h \
bits/../misc/definitions.h bits/bitbuffer.h bits/rlevector.h \
bits/nibblevector.h sasamples.h misc/definitions.h bits/bitbuffer.h \
- lcpsamples.h bits/array.h bits/vectors.h misc/utils.h misc/definitions.h \
+ lcpsamples.h bits/array.h misc/utils.h misc/definitions.h \
misc/parameters.h
-lcpsamples.o: lcpsamples.cpp lcpsamples.h bits/array.h \
- bits/../misc/definitions.h bits/bitbuffer.h bits/vectors.h misc/utils.h \
+lcpsamples.o: lcpsamples.cpp lcpsamples.h bits/deltavector.h \
+ bits/bitvector.h bits/../misc/definitions.h bits/bitbuffer.h \
+ bits/rlevector.h bits/nibblevector.h bits/array.h misc/utils.h \
misc/definitions.h
locate_test.o: locate_test.cpp rlcsa.h bits/deltavector.h \
bits/bitvector.h bits/../misc/definitions.h bits/bitbuffer.h \
bits/rlevector.h bits/nibblevector.h sasamples.h misc/definitions.h \
- bits/bitbuffer.h lcpsamples.h bits/array.h bits/vectors.h misc/utils.h \
+ bits/bitbuffer.h lcpsamples.h bits/array.h misc/utils.h \
misc/definitions.h misc/parameters.h
parallel_build.o: parallel_build.cpp rlcsa_builder.h rlcsa.h \
bits/deltavector.h bits/bitvector.h bits/../misc/definitions.h \
bits/bitbuffer.h bits/rlevector.h bits/nibblevector.h sasamples.h \
misc/definitions.h bits/bitbuffer.h lcpsamples.h bits/array.h \
- bits/vectors.h misc/utils.h misc/definitions.h misc/parameters.h
+ misc/utils.h misc/definitions.h misc/parameters.h
read_bwt.o: read_bwt.cpp rlcsa.h bits/deltavector.h bits/bitvector.h \
bits/../misc/definitions.h bits/bitbuffer.h bits/rlevector.h \
bits/nibblevector.h sasamples.h misc/definitions.h bits/bitbuffer.h \
- lcpsamples.h bits/array.h bits/vectors.h misc/utils.h misc/definitions.h \
+ lcpsamples.h bits/array.h misc/utils.h misc/definitions.h \
misc/parameters.h
rlcsa.o: rlcsa.cpp rlcsa.h bits/deltavector.h bits/bitvector.h \
bits/../misc/definitions.h bits/bitbuffer.h bits/rlevector.h \
bits/nibblevector.h sasamples.h misc/definitions.h bits/bitbuffer.h \
- lcpsamples.h bits/array.h bits/vectors.h misc/utils.h misc/definitions.h \
- misc/parameters.h qsufsort/qsufsort.h qsufsort/../misc/definitions.h
+ lcpsamples.h bits/array.h misc/utils.h misc/definitions.h \
+ misc/parameters.h qsufsort/qsufsort.h qsufsort/../misc/definitions.h \
+ bits/vectors.h
rlcsa_builder.o: rlcsa_builder.cpp rlcsa_builder.h rlcsa.h \
bits/deltavector.h bits/bitvector.h bits/../misc/definitions.h \
bits/bitbuffer.h bits/rlevector.h bits/nibblevector.h sasamples.h \
misc/definitions.h bits/bitbuffer.h lcpsamples.h bits/array.h \
- bits/vectors.h misc/utils.h misc/definitions.h misc/parameters.h
+ misc/utils.h misc/definitions.h misc/parameters.h
rlcsa_grep.o: rlcsa_grep.cpp rlcsa.h bits/deltavector.h bits/bitvector.h \
bits/../misc/definitions.h bits/bitbuffer.h bits/rlevector.h \
bits/nibblevector.h sasamples.h misc/definitions.h bits/bitbuffer.h \
- lcpsamples.h bits/array.h bits/vectors.h misc/utils.h misc/definitions.h \
+ lcpsamples.h bits/array.h misc/utils.h misc/definitions.h \
misc/parameters.h
rlcsa_test.o: rlcsa_test.cpp rlcsa.h bits/deltavector.h bits/bitvector.h \
bits/../misc/definitions.h bits/bitbuffer.h bits/rlevector.h \
bits/nibblevector.h sasamples.h misc/definitions.h bits/bitbuffer.h \
- lcpsamples.h bits/array.h bits/vectors.h misc/utils.h misc/definitions.h \
+ lcpsamples.h bits/array.h misc/utils.h misc/definitions.h \
misc/parameters.h
sample_lcp.o: sample_lcp.cpp rlcsa.h bits/deltavector.h bits/bitvector.h \
bits/../misc/definitions.h bits/bitbuffer.h bits/rlevector.h \
bits/nibblevector.h sasamples.h misc/definitions.h bits/bitbuffer.h \
- lcpsamples.h bits/array.h bits/vectors.h misc/utils.h misc/definitions.h \
+ lcpsamples.h bits/array.h misc/utils.h misc/definitions.h \
misc/parameters.h
sasamples.o: sasamples.cpp sasamples.h misc/definitions.h \
bits/bitbuffer.h bits/../misc/definitions.h bits/deltavector.h \
bits/bitvector.h bits/bitbuffer.h misc/utils.h misc/definitions.h
+text_generator.o: text_generator.cpp rlcsa.h bits/deltavector.h \
+ bits/bitvector.h bits/../misc/definitions.h bits/bitbuffer.h \
+ bits/rlevector.h bits/nibblevector.h sasamples.h misc/definitions.h \
+ bits/bitbuffer.h lcpsamples.h bits/array.h misc/utils.h \
+ misc/definitions.h misc/parameters.h
array.o: bits/array.cpp bits/array.h bits/../misc/definitions.h \
bits/bitbuffer.h
bitvector.o: bits/bitvector.cpp bits/bitvector.h \
dcover_obs = dcover/difference_cover.o
TextCollection_obs = TextCollection.o TextCollectionBuilder.o FMIndexBuilder.o RLCSABuilder.o FMIndex.o Tools.o \
- TextStorage.o ${LIBRLCSA} ${LIBCDSA} ${LIBLZTRIE} ${LIBSWCSA}
+ TextStorage.o Query.o EditDistance.o ResultSet.o ${LIBRLCSA} ${LIBCDSA} ${LIBLZTRIE} ${LIBSWCSA}
TCDebug_obs = bittree.o rbtree.o dynFMI.o
all: testTextCollection