Added RLCSA index option
[SXSI/TextCollection.git] / RLCSABuilder.h
diff --git a/RLCSABuilder.h b/RLCSABuilder.h
new file mode 100644 (file)
index 0000000..dd474b4
--- /dev/null
@@ -0,0 +1,82 @@
+/******************************************************************************
+ *   Copyright (C) 2009 by Niko Valimaki <nvalimak@cs.helsinki.fi>            *
+ *                                                                            *
+ *   This program is free software; you can redistribute it and/or modify     *
+ *   it under the terms of the GNU Lesser General Public License as published *
+ *   by the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                      *
+ *                                                                            *
+ *   This program is distributed in the hope that it will be useful,          *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of           *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            *
+ *   GNU Lesser General Public License for more details.                      *
+ *                                                                            *
+ *   You should have received a copy of the GNU Lesser General Public License *
+ *   along with this program; if not, write to the                            *
+ *   Free Software Foundation, Inc.,                                          *
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.                *
+ ******************************************************************************/ 
+
+#ifndef _SXSI_RLCSABuilder_h_
+#define _SXSI_RLCSABuilder_h_
+
+#include "TextCollectionBuilder.h"
+#include "TextStorage.h"
+#include "Tools.h" // Defines ulong and uchar.
+
+#include <string>
+#include <vector>
+#include <utility> // Defines std::pair.
+#include <cstring> // Defines std::strlen, added by Kim
+
+namespace SXSI
+{
+    struct TCBuilderRep; // Pimpl
+    
+    /**
+     * Build an instance of the TextCollection class.
+     */
+    class RLCSABuilder : public TextCollectionBuilder
+    {
+    public:
+        RLCSABuilder(unsigned samplerate, ulong estimatedInputLength);
+
+        virtual ~RLCSABuilder();
+        
+        /** 
+         * Insert text
+         *
+         * Must be a zero-terminated string from alphabet [1,255].
+         * Can not be called after makeStatic().
+         * The i'th text insertion gets an identifier value i-1.
+         * In other words, document identifiers start from 0.
+         *
+         * Second parameter tells if the text will be added to the
+         * index also. If false, text is added only to the TextCollection
+         * and can not be searched for.
+         */
+        virtual void InsertText(uchar const *, bool index = true);
+
+        /**
+         * Make static
+         *
+         * Convert to a static collection.
+         * New texts can not be inserted after this operation.
+         *
+         * TextStorage type defaults to TYPE_PLAIN_TEXT, another
+         * possible type is TYPE_LZ_INDEX.
+         */
+        virtual TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT);
+        
+    private:
+        RLCSABuilder();
+        
+        // Using Pimpl idiom to hide RLCSA implementation.
+        struct TCBuilderRep * p_;
+
+        // No copy constructor or assignment
+        RLCSABuilder(RLCSABuilder const&);
+        RLCSABuilder& operator = (RLCSABuilder const&);
+    };
+}
+#endif