using std::make_pair;
using std::map;
-using SXSI::TextCollection;
+namespace SXSI
+{
// Save file version info
const uchar CSA::versionFlag = 2;
throw std::runtime_error("CSA::MakeStatic(): Data structure is already static (dynFMI == 0).");
#endif
- // Bitvector of empty texts
+ // Bitvector of empty texts, FIXME Remove?
{
//std::cout << std::endl << "texts: " << numberOfTexts << ", all texts " << numberOfAllTexts << ", size : " << emptyTextId.size() <<std::endl;
ulong *tempB = new ulong[numberOfAllTexts/W + 1];
emptyTextRank = new BSGAP(tempB, numberOfAllTexts, true);
}
-/* std::cout << "texts: ";
- for (ulong i = 0; i < n; i++)
- if (texts[i] > 50)
- std::cout << texts[i];
- else
- std::cout << (int)texts[i];
- std::cout << std::endl;*/
uchar *bwt = new uchar[n];
{
- // FIXME More succinct solution needed
- unsigned* maptexts = new unsigned[n+1];
+ // More succinct solution via StringIterator, see below.
+/* unsigned* maptexts = new unsigned[n+1];
// Map text chars to [0..255+numberOfTexts]
unsigned count = 0;
for (ulong i = 0; i < n; ++i)
maptexts[i] = ++count; // endmarkers \in [1..numberOfTexts]
else {
uchar c = (uchar)texts[i];
- maptexts[i] = (unsigned)c + numberOfTexts + 1;
+ maptexts[i] = (unsigned)c + numberOfTexts;
}
-
maptexts[n] = '\0';
- texts.erase();
- texts.reserve(0); // Release capacity
assert(count == numberOfTexts);
- bwtEndPos = (ulong)compute_bwt(&maptexts[0], &maptexts[n],
- &bwt[0], numberOfTexts + 1);
+ std::cout << "maptext: ";
+ for (ulong i = 0; i <= n; ++i)
+ std::cout << maptexts[i] << ", ";
+ std::cout << std::endl;*/
+
+ // Mark endmarker positions into bitvector
+ ulong * endmarkers = new ulong[n/W+1];
+ for (ulong i = 0; i < n/W+1; ++i)
+ endmarkers[i] = 0;
+ for (ulong i = 0; i < n; ++i)
+ if (texts[i] == 0)
+ Tools::SetField(endmarkers, 1, i, 1);
+ BitRank* br = new BitRank(endmarkers, n, true);
+ assert(numberOfTexts == br->rank(n-1));
+
+ // Build iterators, FIXME clean up iterator construction.
+ StringIterator itBegin((uchar const *)texts.c_str(), (uchar const *)texts.c_str(), n, numberOfTexts, br);
+ StringIterator itEnd((uchar const *)texts.c_str() + n,(uchar const *)texts.c_str(), n, numberOfTexts, br);
+
+ bwtEndPos = (ulong)compute_bwt(itBegin, itEnd, //&maptexts[0], &maptexts[n],
+ &bwt[0], numberOfTexts);
bwt[--bwtEndPos] = '\0';
- delete [] maptexts;
+ texts.erase();
+ texts.reserve(0); // Release capacity
+ delete br; // bitvector of endmarkers
} // End of bw transform
#ifdef CSA_TEST_BWT
{
uchar *bwtTest = dynFMI->getBWT();
-/* printf("123456789012345678901234567890123456789\n");
+ printf("123456789012345678901234567890123456789\n");
for (TextPosition i = 0; i < n && i < 100; i ++)
if (bwt[i] < 50)
printf("%d", (int)bwt[i]);
printf("%d", (int)bwtTest[i]);
else
printf("%c", bwtTest[i]);
- printf("\n");*/
+ printf("\n");
// Sanity check
assert(numberOfTexts == dynFMI->getCollectionSize());
// delete [] bwt;
//alphabetrank = new RLWaveletTree(bwt, n); // Deletes bwt!
- // FIXME: static_sequence_wvtree accepts only uint arrays
- uint *text = new uint[n];
- for (i = 0; i < n; ++i) // Silly
- text[i] = bwt[i];
- delete [] bwt;
- bwt = 0;
-
alphabet_mapper * am = new alphabet_mapper_none();
static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(16); // FIXME samplerate?
- wt_coder * wtc = new wt_coder_huff(text,n,am);
- alphabetrank = new static_sequence_wvtree(text,n,wtc,bmb,am);
+ wt_coder * wtc = new wt_coder_huff(bwt,n,am);
+ alphabetrank = new static_sequence_wvtree(bwt,n,wtc,bmb,am);
delete bmb;
- delete [] text;
- text = 0;
+ delete [] bwt;
}
void CSA::maketables()
return x | (bit << pos);
}
+} // namespace SXSI
+