*****************************************************************************/
#include "TCImplementation.h"
+//#define DEBUG_MEMUSAGE
#ifdef DEBUG_MEMUSAGE
#include "HeapProfiler.h" // FIXME remove
#endif
{
// Save file version info
-const uchar TCImplementation::versionFlag = 4;
+const uchar TCImplementation::versionFlag = 6;
/**
* Constructor inits an empty dynamic FM-index.
return false; // Empty texts are not indexed
}
-uchar* TCImplementation::GetText(DocId k) const
+uchar * TCImplementation::GetText(DocId k) const
{
assert(k < (DocId)numberOfTexts);
- TextPosition i = k;
+
+ return textStorage->GetText(k);
+/* TextPosition i = k;
string result;
// Reserve average string length to avoid reallocs
res[i] = '\0';
for (ulong j = 0; j < i; ++j)
res[i-j-1] = result[j];
- return res;
+ return res;*/
}
/*
* Not supported
throw std::runtime_error("TCImplementation::Save(): file write error (maxTextLength).");
Doc->save(file);
+ textStorage->Save(file);
fflush(file);
}
throw std::runtime_error("TCImplementation::Load(): file read error (maxTextLength).");
Doc = static_sequence::load(file);
+ textStorage = new TextStorage(file);
// FIXME Construct data structures with new samplerate
//maketables();
delete suffixes;
delete suffixDocId;
delete Doc;
+ delete textStorage;
}
void TCImplementation::makewavelet(uchar *bwt)
p=bwtEndPos;
textId = numberOfTexts;
+ TextStorageBuilder tsbuilder(n);
+
/**
* Second pass: populate tables suffixes and suffixDocId.
*/
}
uchar c = alphabetrank->access(p, alphabetrank_i_tmp);
+ tsbuilder[i] = c;
+
if (c == '\0')
{
--textId;
p = C[c]+alphabetrank_i_tmp-1;
}
assert(textId == 0);
-
delete textStartPos;
+ textStorage = tsbuilder.InitTextStorage();
+
#ifdef DEBUG_MEMUSAGE
std::cerr << "max heap usage before Doc: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
HeapProfiler::ResetMaxHeapConsumption();
#endif
#undef bitset
+#include "TextStorage.h"
+
+
namespace SXSI
{
~TCImplementation();
bool EmptyText(DocId) const;
- uchar* GetText(DocId) const;
+
+ /**
+ * Returns a pointer to the original text.
+ *
+ * Do *not* try to free the array.
+ * (However, this implementation is suspect to change.)
+ *
+ * See TextStorage.h for details.
+ */
+ uchar * GetText(DocId) const;
+
/**
- * Next method is not supported:
- * Supporting GetText for some substring [i,j]
- * would require more space.
+ * Returns a pointer to the beginning of texts i, i+1, ..., j.
+ * Texts are separated by a '\0' byte.
+ *
+ * Do *not* try to free the array.
+ * (However, this implementation is suspect to change.)
+ *
+ * See TextStorage.h for details.
+ */
+ uchar * GetText(DocId i, DocId j) const;
+
+ /**
+ * Returns a substring of given text ID.
+ *
+ * FIXME This may be reimplemented via TextStorage.
*/
// uchar* GetText(DocId, TextPosition, TextPosition) const;
static_sequence * alphabetrank;
// Sample structures for texts longer than samplerate
- static_bitsequence * sampled; // FIXME Replace with RRR02
+ static_bitsequence * sampled;
BlockArray *suffixes;
BlockArray *suffixDocId;
// Array of document id's in the order of end-markers in BWT
static_sequence *Doc;
- // Following are not part of the public API
+ // Text storage for fast extraction
+ TextStorage * textStorage;
+
+ // Following methods are not part of the public API
uchar * BWT(uchar *);
void makewavelet(uchar *);
void maketables();