Added SWCSA
[SXSI/TextCollection.git] / SWCSABuilder.h
diff --git a/SWCSABuilder.h b/SWCSABuilder.h
new file mode 100644 (file)
index 0000000..e60bf02
--- /dev/null
@@ -0,0 +1,118 @@
+/******************************************************************************
+ *   Copyright (C) 2009 by Niko Valimaki <nvalimak@cs.helsinki.fi>            *
+ *                                                                            *
+ *   This program is free software; you can redistribute it and/or modify     *
+ *   it under the terms of the GNU Lesser General Public License as published *
+ *   by the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                      *
+ *                                                                            *
+ *   This program is distributed in the hope that it will be useful,          *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of           *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            *
+ *   GNU Lesser General Public License for more details.                      *
+ *                                                                            *
+ *   You should have received a copy of the GNU Lesser General Public License *
+ *   along with this program; if not, write to the                            *
+ *   Free Software Foundation, Inc.,                                          *
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.                *
+ ******************************************************************************/ 
+
+#ifndef _SXSI_SWCSABuilder_h_
+#define _SXSI_SWCSABuilder_h_
+
+#include "TextCollectionBuilder.h"
+#include "TextStorage.h"
+#include "Tools.h" // Defines ulong and uchar.
+#include "SWCSAWrapper.h"
+
+#include <string>
+#include <utility> // Defines std::pair.
+#include <cstring> // Defines std::strlen, added by Kim
+
+namespace SXSI
+{
+    /**
+     * Build an instance of the TextCollection class.
+     */
+    class SWCSABuilder : public TextCollectionBuilder
+    {
+    public:
+        SWCSABuilder(unsigned sampler)
+            : text(""), samplerate(sampler), numberOfTexts(0)
+        { /* NOP */ }
+
+        virtual ~SWCSABuilder()
+        { /* NOP */ }
+
+        /** 
+         * Insert text
+         *
+         * Must be a zero-terminated string from alphabet [1,255].
+         * Can not be called after makeStatic().
+         * The i'th text insertion gets an identifier value i-1.
+         * In other words, document identifiers start from 0.
+         *
+         * All texts must be inserted into the index!
+         * The default (FMIndex) text collection supports non-indexed texts.
+         */
+        virtual void InsertText(uchar const *t, bool index = true)
+        {
+            if (strlen((char const *) t) == 0)
+            {
+                std::cerr << "SWCSABuilder::InsertText(): Can not index empty texts!" << std::endl;
+                std::exit(1); 
+            }
+            assert(index);
+            if (!index)
+            {
+                std::cerr << "SWCSABuilder::InsertText(): The implementation of SWCSA does not support non-indexed texts" 
+                          << std::endl << "Use the default (FMIndex) text collection instead." << std::endl;
+                std::exit(1);                
+            }
+            text.append((char const *) t, strlen((char const *) t) + 1); // +1 for 0-byte.            
+            ++ numberOfTexts;
+        }
+
+        /**
+         * Make static
+         *
+         * Convert to a static collection.
+         * New texts can not be inserted after this operation.
+         *
+         * 
+         */
+        virtual TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT)
+        {
+            assert(type == TextStorage::TYPE_PLAIN_TEXT);
+            if (type != TextStorage::TYPE_PLAIN_TEXT)
+            {
+                std::cerr << "SWCSABuilder::InitTextCollection(): The implementation of SWCSA supports only TextStorage::TYPE_PLAIN_TEXT" 
+                          << std::endl << "Use the default (FMIndex) text collection instead." << std::endl;
+                std::exit(1);
+            }
+            
+            ulong n = text.size();
+            uchar *t = new uchar[n]; // FIXME uses temporarily too much space
+            ulong l = text.copy((char *)t, n);
+            if (l != n)
+            {
+                std::cerr << "SWCSABuilder::InitTextCollection(): copy failed!" << std::endl;
+                std::exit(1);
+            }
+            text.clear();
+            return new SWCSAWrapper(t, n, samplerate, numberOfTexts); // This will delete [] t.
+        }
+
+        
+    private:
+        SWCSABuilder();
+        std::string text;
+        unsigned samplerate;
+        unsigned numberOfTexts;
+        
+        // No copy constructor or assignment
+        SWCSABuilder(SWCSABuilder const&);
+        SWCSABuilder& operator = (SWCSABuilder const&);
+    };
+}
+#endif