-#include "incbwt/rlcsa_builder.h"
#include "TextCollectionBuilder.h"
-
-// Un-comment next line to run a comparison of resulting BWT
-//#define TCB_TEST_BWT
-
-#ifdef TCB_TEST_BWT
-#include "dynFMI.h"
-#endif
-
-#include "TCImplementation.h"
+#include "FMIndexBuilder.h"
+#include "SWCSABuilder.h"
+#include "RLCSABuilder.h"
namespace SXSI
{
-
-struct TCBuilderRep
-{
- unsigned samplerate;
- CSA::RLCSABuilder * sa;
-
- ulong n;
- // Total number of texts in the collection
- unsigned numberOfTexts;
- // Length of the longest text
- ulong maxTextLength;
- ulong numberOfSamples;
-
-#ifdef TCB_TEST_BWT
- DynFMI *dynFMI;
-#endif
-};
-
-/**
- * Init text collection
- *
- */
-TextCollectionBuilder::TextCollectionBuilder(unsigned samplerate, ulong estimatedInputLength)
- : p_(new struct TCBuilderRep())
-{
- p_->n = 0;
- p_->samplerate = samplerate;
- p_->numberOfTexts = 0;
- p_->numberOfSamples = 0;
-
- // Current params: 8 bytes, no samples, buffer size n/10 bytes.
- p_->sa = new CSA::RLCSABuilder(8, 0, estimatedInputLength/10);
- assert(p_->sa->isOk());
-
-#ifdef TCB_TEST_BWT
- uchar temp[256];
- for (unsigned i = 0; i < 255; ++i)
- temp[i] = i+1;
- temp[255] = 0;
- p_->dynFMI = new DynFMI(temp, 1, 255, false);
-#endif
-}
-
-TextCollectionBuilder::~TextCollectionBuilder()
-{
-#ifdef TCB_TEST_BWT
- delete p_->dynFMI;
-#endif
-
- delete p_->sa;
- delete p_;
-}
-
-void TextCollectionBuilder::InsertText(uchar const * text)
-{
- TextCollection::TextPosition m = std::strlen((char *)text) + 1;
- if (m > p_->maxTextLength)
- p_->maxTextLength = m; // Store length of the longest text seen so far.
-
- if (m > 1)
- {
- p_->n += m;
- p_->numberOfTexts ++;
- p_->numberOfSamples += (m-1)/p_->samplerate;
-
-#ifdef TCB_TEST_BWT
- p_->dynFMI->addText(text, m);
-#endif
- p_->sa->insertSequence((char*)text, m-1, 0);
- assert(p_->sa->isOk());
- }
- else
- {
- // FIXME indexing empty texts
- std::cerr << "TextCollectionBuilder::InsertText() error: can not index empty texts!" << std::endl;
- exit(1);
- }
-}
-
-
-TextCollection * TextCollectionBuilder::InitTextCollection()
+TextCollectionBuilder* TextCollectionBuilder::create(unsigned samplerate,
+ index_type_t type,
+ ulong estimatedInputLength)
{
- uchar * bwt = 0;
- CSA::usint length = 0;
- if (p_->numberOfTexts == 0)
- {
- p_->numberOfTexts ++; // Add one empty text
- bwt = new uchar[2];
- bwt[0] = '\0';
- bwt[1] = '\0';
- length = 1;
- p_->maxTextLength = 1;
- }
- else
+ switch (type)
{
- bwt = (uchar *)p_->sa->getBWT(length);
- delete p_->sa;
- p_->sa = 0;
-
- assert(length == p_->n);
-
-#ifdef TCB_TEST_BWT
- {
- uchar *bwtTest = p_->dynFMI->getBWT();
- printf("123456789012345678901234567890123456789\n");
- for (ulong i = 0; i < p_->n && i < 100; i ++)
- if (bwt[i] < 50)
- printf("%d", (int)bwt[i]);
- else
- printf("%c", bwt[i]);
- printf("\n");
- for (ulong i = 0; i < p_->n && i < 100; i ++)
- if (bwtTest[i] < 50)
- printf("%d", (int)bwtTest[i]);
- else
- printf("%c", bwtTest[i]);
- printf("\n");
-
- // Sanity check
- assert(p_->numberOfTexts == p_->dynFMI->getCollectionSize());
-
- delete p_->dynFMI;
- p_->dynFMI = 0;
- for (ulong i = 0; i < p_->n; ++i)
- if (bwt[i] != bwtTest[i])
- {
- std::cout << "i = " << i << ", bwt = " << (unsigned)bwt[i] << ", "
- << (unsigned)bwtTest[i] << std::endl;
- assert(0);
- }
- delete [] bwtTest;
- }
-#endif // TCB_TEST_BWT
+ case index_type_default:
+ return new FMIndexBuilder(samplerate, estimatedInputLength);
+ break;
+ case index_type_swcsa:
+ return new SWCSABuilder(samplerate);
+ break;
+ case index_type_rlcsa:
+ return new RLCSABuilder(samplerate, estimatedInputLength);
+ break;
}
-
- TextCollection *result = new TCImplementation(bwt, (ulong)length,
- p_->samplerate, p_->numberOfTexts, p_->maxTextLength, p_->numberOfSamples);
- return result;
+ std::cerr << "TextCollectionBuilder::create(): unknown type given: expecting enum value, type = " << type << std::endl;
+ std::exit(2);
}
-
-} // namespace SXSI
+} // Namespace SXSI