1 /******************************************************************************
2 * Copyright (C) 2009 by Niko Valimaki <nvalimak@cs.helsinki.fi> *
4 * This program is free software; you can redistribute it and/or modify *
5 * it under the terms of the GNU Lesser General Public License as published *
6 * by the Free Software Foundation; either version 2 of the License, or *
7 * (at your option) any later version. *
9 * This program is distributed in the hope that it will be useful, *
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
12 * GNU Lesser General Public License for more details. *
14 * You should have received a copy of the GNU Lesser General Public License *
15 * along with this program; if not, write to the *
16 * Free Software Foundation, Inc., *
17 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
18 ******************************************************************************/
20 #ifndef _SXSI_SWCSABuilder_h_
21 #define _SXSI_SWCSABuilder_h_
23 #include "TextCollectionBuilder.h"
24 #include "TextStorage.h"
25 #include "Tools.h" // Defines ulong and uchar.
26 #include "SWCSAWrapper.h"
29 #include <utility> // Defines std::pair.
30 #include <cstring> // Defines std::strlen, added by Kim
35 * Build an instance of the TextCollection class.
37 class SWCSABuilder : public TextCollectionBuilder
40 SWCSABuilder(unsigned sampler)
41 : text(""), samplerate(sampler), numberOfTexts(0)
44 virtual ~SWCSABuilder()
50 * Must be a zero-terminated string from alphabet [1,255].
51 * Can not be called after makeStatic().
52 * The i'th text insertion gets an identifier value i-1.
53 * In other words, document identifiers start from 0.
55 * All texts must be inserted into the index!
56 * The default (FMIndex) text collection supports non-indexed texts.
58 virtual void InsertText(uchar const *t, bool index = true)
60 if (strlen((char const *) t) == 0)
62 std::cerr << "SWCSABuilder::InsertText(): Can not index empty texts!" << std::endl;
68 std::cerr << "SWCSABuilder::InsertText(): The implementation of SWCSA does not support non-indexed texts"
69 << std::endl << "Use the default (FMIndex) text collection instead." << std::endl;
72 text.append((char const *) t, strlen((char const *) t) + 1); // +1 for 0-byte.
79 * Convert to a static collection.
80 * New texts can not be inserted after this operation.
84 virtual TextCollection * InitTextCollection(char type = TextStorage::TYPE_PLAIN_TEXT)
86 assert(type == TextStorage::TYPE_PLAIN_TEXT);
87 if (type != TextStorage::TYPE_PLAIN_TEXT)
89 std::cerr << "SWCSABuilder::InitTextCollection(): The implementation of SWCSA supports only TextStorage::TYPE_PLAIN_TEXT"
90 << std::endl << "Use the default (FMIndex) text collection instead." << std::endl;
94 ulong n = text.size();
95 uchar *t = new uchar[n]; // FIXME uses temporarily too much space
96 ulong l = text.copy((char *)t, n);
99 std::cerr << "SWCSABuilder::InitTextCollection(): copy failed!" << std::endl;
103 return new SWCSAWrapper(t, n, samplerate, numberOfTexts); // This will delete [] t.
111 unsigned numberOfTexts;
113 // No copy constructor or assignment
114 SWCSABuilder(SWCSABuilder const&);
115 SWCSABuilder& operator = (SWCSABuilder const&);