--- /dev/null
+/******************************************************************************
+ * Copyright (C) 2009 by Niko Valimaki <nvalimak@cs.helsinki.fi> *
+ * *
+ * This program is free software; you can redistribute it and/or modify *
+ * it under the terms of the GNU Lesser General Public License as published *
+ * by the Free Software Foundation; either version 2 of the License, or *
+ * (at your option) any later version. *
+ * *
+ * This program is distributed in the hope that it will be useful, *
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of *
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
+ * GNU Lesser General Public License for more details. *
+ * *
+ * You should have received a copy of the GNU Lesser General Public License *
+ * along with this program; if not, write to the *
+ * Free Software Foundation, Inc., *
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
+ ******************************************************************************/
+
+#ifndef _SXSI_SWCSABuilder_h_
+#define _SXSI_SWCSABuilder_h_
+
+#include "TextCollectionBuilder.h"
+#include "TextStorage.h"
+#include "Tools.h" // Defines ulong and uchar.
+#include "SWCSAWrapper.h"
+
+#include <string>
+#include <utility> // Defines std::pair.
+#include <cstring> // Defines std::strlen, added by Kim
+
+namespace SXSI
+{
+ /**
+ * Build an instance of the TextCollection class.
+ */
+ class SWCSABuilder : public TextCollectionBuilder
+ {
+ public:
+ SWCSABuilder(unsigned sampler)
+ : text(""), samplerate(sampler), numberOfTexts(0)
+ { /* NOP */ }
+
+ virtual ~SWCSABuilder()
+ { /* NOP */ }
+
+ /**
+ * Insert text
+ *
+ * Must be a zero-terminated string from alphabet [1,255].
+ * Can not be called after makeStatic().
+ * The i'th text insertion gets an identifier value i-1.
+ * In other words, document identifiers start from 0.
+ *
+ * All texts must be inserted into the index!
+ * The default (FMIndex) text collection supports non-indexed texts.
+ */
+ virtual void InsertText(uchar const *t, bool index = true)
+ {
+ if (strlen((char const *) t) == 0)
+ {
+ std::cerr << "SWCSABuilder::InsertText(): Can not index empty texts!" << std::endl;
+ std::exit(1);
+ }
+ assert(index);
+ if (!index)
+ {
+ std::cerr << "SWCSABuilder::InsertText(): The implementation of SWCSA does not support non-indexed texts"
+ << std::endl << "Use the default (FMIndex) text collection instead." << std::endl;
+ std::exit(1);
+ }
+ text.append((char const *) t, strlen((char const *) t) + 1); // +1 for 0-byte.
+ ++ numberOfTexts;
+ }
+
+ /**
+ * Make static
+ *
+ * Convert to a static collection.
+ * New texts can not be inserted after this operation.
+ *
+ *
+ */
+ virtual TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT)
+ {
+ assert(type == TextStorage::TYPE_PLAIN_TEXT);
+ if (type != TextStorage::TYPE_PLAIN_TEXT)
+ {
+ std::cerr << "SWCSABuilder::InitTextCollection(): The implementation of SWCSA supports only TextStorage::TYPE_PLAIN_TEXT"
+ << std::endl << "Use the default (FMIndex) text collection instead." << std::endl;
+ std::exit(1);
+ }
+
+ ulong n = text.size();
+ uchar *t = new uchar[n]; // FIXME uses temporarily too much space
+ ulong l = text.copy((char *)t, n);
+ if (l != n)
+ {
+ std::cerr << "SWCSABuilder::InitTextCollection(): copy failed!" << std::endl;
+ std::exit(1);
+ }
+ text.clear();
+ return new SWCSAWrapper(t, n, samplerate, numberOfTexts); // This will delete [] t.
+ }
+
+
+ private:
+ SWCSABuilder();
+ std::string text;
+ unsigned samplerate;
+ unsigned numberOfTexts;
+
+ // No copy constructor or assignment
+ SWCSABuilder(SWCSABuilder const&);
+ SWCSABuilder& operator = (SWCSABuilder const&);
+ };
+}
+#endif