From 96755a8cb6d7f1bf688115ef2ff677290c54523e Mon Sep 17 00:00:00 2001 From: nvalimak Date: Mon, 9 Mar 2009 11:31:35 +0000 Subject: [PATCH] Fixed MakeStatic() git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@224 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- CSA.cpp | 68 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 37 insertions(+), 31 deletions(-) diff --git a/CSA.cpp b/CSA.cpp index 5b2b190..a190b48 100644 --- a/CSA.cpp +++ b/CSA.cpp @@ -40,7 +40,8 @@ using std::pair; using std::make_pair; using std::map; -using SXSI::TextCollection; +namespace SXSI +{ // Save file version info const uchar CSA::versionFlag = 2; @@ -204,7 +205,7 @@ void CSA::MakeStatic() throw std::runtime_error("CSA::MakeStatic(): Data structure is already static (dynFMI == 0)."); #endif - // Bitvector of empty texts + // Bitvector of empty texts, FIXME Remove? { //std::cout << std::endl << "texts: " << numberOfTexts << ", all texts " << numberOfAllTexts << ", size : " << emptyTextId.size() < 50) - std::cout << texts[i]; - else - std::cout << (int)texts[i]; - std::cout << std::endl;*/ uchar *bwt = new uchar[n]; { - // FIXME More succinct solution needed - unsigned* maptexts = new unsigned[n+1]; + // More succinct solution via StringIterator, see below. +/* unsigned* maptexts = new unsigned[n+1]; // Map text chars to [0..255+numberOfTexts] unsigned count = 0; for (ulong i = 0; i < n; ++i) @@ -234,25 +228,43 @@ void CSA::MakeStatic() maptexts[i] = ++count; // endmarkers \in [1..numberOfTexts] else { uchar c = (uchar)texts[i]; - maptexts[i] = (unsigned)c + numberOfTexts + 1; + maptexts[i] = (unsigned)c + numberOfTexts; } - maptexts[n] = '\0'; - texts.erase(); - texts.reserve(0); // Release capacity assert(count == numberOfTexts); - bwtEndPos = (ulong)compute_bwt(&maptexts[0], &maptexts[n], - &bwt[0], numberOfTexts + 1); + std::cout << "maptext: "; + for (ulong i = 0; i <= n; ++i) + std::cout << maptexts[i] << ", "; + std::cout << std::endl;*/ + + // Mark endmarker positions into bitvector + ulong * endmarkers = new ulong[n/W+1]; + for (ulong i = 0; i < n/W+1; ++i) + endmarkers[i] = 0; + for (ulong i = 0; i < n; ++i) + if (texts[i] == 0) + Tools::SetField(endmarkers, 1, i, 1); + BitRank* br = new BitRank(endmarkers, n, true); + assert(numberOfTexts == br->rank(n-1)); + + // Build iterators, FIXME clean up iterator construction. + StringIterator itBegin((uchar const *)texts.c_str(), (uchar const *)texts.c_str(), n, numberOfTexts, br); + StringIterator itEnd((uchar const *)texts.c_str() + n,(uchar const *)texts.c_str(), n, numberOfTexts, br); + + bwtEndPos = (ulong)compute_bwt(itBegin, itEnd, //&maptexts[0], &maptexts[n], + &bwt[0], numberOfTexts); bwt[--bwtEndPos] = '\0'; - delete [] maptexts; + texts.erase(); + texts.reserve(0); // Release capacity + delete br; // bitvector of endmarkers } // End of bw transform #ifdef CSA_TEST_BWT { uchar *bwtTest = dynFMI->getBWT(); -/* printf("123456789012345678901234567890123456789\n"); + printf("123456789012345678901234567890123456789\n"); for (TextPosition i = 0; i < n && i < 100; i ++) if (bwt[i] < 50) printf("%d", (int)bwt[i]); @@ -264,7 +276,7 @@ void CSA::MakeStatic() printf("%d", (int)bwtTest[i]); else printf("%c", bwtTest[i]); - printf("\n");*/ + printf("\n"); // Sanity check assert(numberOfTexts == dynFMI->getCollectionSize()); @@ -931,20 +943,12 @@ void CSA::makewavelet(uchar *bwt) // delete [] bwt; //alphabetrank = new RLWaveletTree(bwt, n); // Deletes bwt! - // FIXME: static_sequence_wvtree accepts only uint arrays - uint *text = new uint[n]; - for (i = 0; i < n; ++i) // Silly - text[i] = bwt[i]; - delete [] bwt; - bwt = 0; - alphabet_mapper * am = new alphabet_mapper_none(); static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(16); // FIXME samplerate? - wt_coder * wtc = new wt_coder_huff(text,n,am); - alphabetrank = new static_sequence_wvtree(text,n,wtc,bmb,am); + wt_coder * wtc = new wt_coder_huff(bwt,n,am); + alphabetrank = new static_sequence_wvtree(bwt,n,wtc,bmb,am); delete bmb; - delete [] text; - text = 0; + delete [] bwt; } void CSA::maketables() @@ -1154,3 +1158,5 @@ unsigned CSA::node::SetBit(unsigned x, unsigned pos, unsigned bit) { return x | (bit << pos); } +} // namespace SXSI + -- 2.17.1