{
// Save file version info
-const uchar TCImplementation::versionFlag = 6;
+const uchar TCImplementation::versionFlag = 7;
/**
* Constructor inits an empty dynamic FM-index.
* Samplerate defaults to TEXTCOLLECTION_DEFAULT_SAMPLERATE.
*/
TCImplementation::TCImplementation(uchar * bwt, ulong length, unsigned samplerate_,
- unsigned numberOfTexts_, ulong maxTextLength_, ulong numberOfSamples_)
+ unsigned numberOfTexts_, ulong maxTextLength_, ulong numberOfSamples_, char tsType)
: n(length), samplerate(samplerate_), alphabetrank(0), sampled(0), suffixes(0),
suffixDocId(0), numberOfTexts(numberOfTexts_), maxTextLength(maxTextLength_), Doc(0)
{
makewavelet(bwt); // Deletes bwt!
bwt = 0;
-
+
// Make sampling tables
- maketables(numberOfSamples_);
+ maketables(numberOfSamples_, tsType);
}
bool TCImplementation::EmptyText(DocId k) const
}
-TextCollection::document_result TCImplementation::Kmismaches(uchar const * pattern, unsigned k) const
+TextCollection::document_result TCImplementation::KMismaches(uchar const * pattern, unsigned k) const
{
TextPosition m = strlen((char *)pattern);
if (m == 0)
return result;
}
-TextCollection::document_result TCImplementation::Kerrors(uchar const * pattern, unsigned k) const
+TextCollection::document_result TCImplementation::KErrors(uchar const * pattern, unsigned k) const
{
TextPosition m = strlen((char *)pattern);
if (m == 0)
return result;
}
-TextCollection::full_result TCImplementation::FullKmismatches(uchar const * pattern, unsigned k) const
+TextCollection::full_result TCImplementation::FullKMismatches(uchar const * pattern, unsigned k) const
{
TextPosition m = strlen((char *)pattern);
if (m == 0)
return result;
}
-TextCollection::full_result TCImplementation::FullKerrors(uchar const * pattern, unsigned k) const
+TextCollection::full_result TCImplementation::FullKErrors(uchar const * pattern, unsigned k) const
{
TextPosition m = strlen((char *)pattern);
if (m == 0)
throw std::runtime_error("TCImplementation::Load(): file read error (maxTextLength).");
Doc = static_sequence::load(file);
- textStorage = new TextStorage(file);
+ textStorage = TextStorage::Load(file);
// FIXME Construct data structures with new samplerate
//maketables();
//first call kerrors(pattern,1,n,m+k,k,d,m), where d[i]=i
ulong TCImplementation::kerrors(suffix_range_vector &result, uchar const *pattern, ulong sp, ulong ep, ulong j, unsigned k, ulong const *d, ulong m) const
{
+ ulong sum=0;
if (d[m]<=k) // range of suffixes with at most k-errors found
{
if (sp<=ep)
result.push_back(std::make_pair(sp, ep));
- return (sp<=ep)?ep-sp+1:0ul;
+ sum += (sp<=ep)?ep-sp+1:0ul;
}
if (sp>ep || j==0)
- return 0;
+ return sum;
ulong *dnew = new ulong[m+1];
int c;
ulong spnew;
ulong p,lowerbound;
ulong epnew;
- ulong sum=0;
vector<int> chars = alphabetrank->accessAll(sp, ep);
for (vector<int>::iterator it = chars.begin(); it != chars.end(); ++it)
{
#endif
}
-void TCImplementation::maketables(ulong sampleLength)
+void TCImplementation::maketables(ulong sampleLength, char tsType)
{
// Calculate BWT end-marker position (of last inserted text)
{
HeapProfiler::ResetMaxHeapConsumption();
#endif
- textStorage = tsbuilder.InitTextStorage();
+ textStorage = tsbuilder.InitTextStorage(tsType);
#ifdef DEBUG_MEMUSAGE
std::cerr << "heap usage after tsbuilder init: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " / " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes, " << HeapProfiler::GetHeapConsumption() << " / " << HeapProfiler::GetMaxHeapConsumption() << std::endl;
suffixes = new BlockArray(sampleLength, Tools::CeilLog2(maxTextLength));
suffixDocId = new BlockArray(sampleLength, Tools::CeilLog2(numberOfTexts));
-#ifdef DEBUG_MEMUSAGE
- std::cerr << "heap usage after sampled arrays: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " / " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes, " << HeapProfiler::GetHeapConsumption() << " / " << HeapProfiler::GetMaxHeapConsumption() << std::endl;
- HeapProfiler::ResetMaxHeapConsumption();
-#endif
-
x = n - 2;
posOfSuccEndmarker = n-1;
for(ulong i=0; i<sampleLength; i++) {
while ((posOfSuccEndmarker - x) % samplerate != 0)
{
--x;
- if (tsbuilder[x] == '\0')
+ assert(x != ~0lu);
+ if (textStorage->IsEndmarker(x))
posOfSuccEndmarker = x--;
}
assert((*positions)[i] < n);
// calculate offset from text start:
(*suffixes)[j-1] = textPos - textStorage->TextStartPos((*suffixDocId)[j-1]);
--x;
- if (tsbuilder[x] == '\0')
+ if (x != ~0lu && textStorage->IsEndmarker(x))
posOfSuccEndmarker = x--;
}
delete positions;
+#ifdef DEBUG_MEMUSAGE
+ std::cerr << "heap usage after sampled arrays: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " / " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes, " << HeapProfiler::GetHeapConsumption() << " / " << HeapProfiler::GetMaxHeapConsumption() << std::endl;
+ HeapProfiler::ResetMaxHeapConsumption();
+#endif
+
/**
* Second pass: check tables
*/