1 /******************************************************************************
2 * Copyright (C) 2009 Niko Välimäki *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Lesser General Public License as published *
7 * by the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU Lesser General Public License for more details. *
15 * You should have received a copy of the GNU Lesser General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 *****************************************************************************/
21 #ifndef _TextStorage_H_
22 #define _TextStorage_H_
24 #include "TextCollection.h"
27 #include "incbwt/bits/deltavector.h"
28 // Re-define word size to ulong:
43 class TextStorageBuilder;
44 class TextStoragePlainText;
45 class TextStorageLzIndex;
48 * Text collection that supports fast extraction.
49 * Defines an abstact interface class.
50 * See subclasses TextStorageLzIndex and TextStoragePlainText
57 const static char TYPE_PLAIN_TEXT = 0;
58 const static char TYPE_LZ_INDEX = 1;
60 // Call DeleteText() for each pointer returned by GetText()
61 // to avoid possible memory leaks.
62 virtual uchar * GetText(TextCollection::DocId docId) const = 0;
63 virtual uchar * GetText(TextCollection::DocId i, TextCollection::DocId j) const = 0;
64 virtual void DeleteText(uchar *) const = 0;
66 static TextStorage * Load(FILE *file);
67 virtual void Save(FILE *file) const = 0;
69 virtual ~TextStorage()
77 TextCollection::DocId DocIdAtTextPos(TextCollection::TextPosition i) const
80 return offit_->rank(i)-1;
83 TextCollection::TextPosition TextStartPos(TextCollection::DocId i) const
85 assert(i < (TextCollection::DocId)numberOfTexts_);
86 return offit_->select(i);
89 bool IsEndmarker(TextCollection::TextPosition i) const
94 return offit_->isSet(i+1);
100 typedef TextCollection::TextPosition TextPosition;
101 // Block size in DeltaVector
102 const static CSA::usint DV_BLOCK_SIZE = 32;
104 TextStorage(uchar const * text, TextPosition n)
105 : n_(n), offsets_(0), offit_(0), numberOfTexts_(0)
107 // Delta encoded bitvector of text offsets.
108 CSA::DeltaEncoder encoder(DV_BLOCK_SIZE);
109 encoder.setBit(0); // Start of the first text.
111 // Read offsets by finding text end positions:
112 for (TextPosition i = 0; i < n_ - 1; ++i)
117 offsets_ = new CSA::DeltaVector(encoder, n_);
118 offit_ = new CSA::DeltaVector::Iterator(*(offsets_));
119 numberOfTexts_ = offit_->rank(n_ - 1);
122 TextStorage(std::FILE *);
123 void Save(FILE *file, char type) const;
126 CSA::DeltaVector *offsets_;
127 CSA::DeltaVector::Iterator *offit_;
128 TextPosition numberOfTexts_;
131 /******************************************************************
132 * Plain text collection.
134 class TextStoragePlainText : public TextStorage
137 TextStoragePlainText(uchar *text, TextPosition n)
138 : TextStorage(text, n), text_(text)
141 TextStoragePlainText(FILE *file)
142 : TextStorage(file), text_(0)
144 text_ = new uchar[n_];
145 if (std::fread(this->text_, sizeof(uchar), n_, file) != n_)
146 throw std::runtime_error("TextStorage::Load(): file read error (text_).");
149 void Save(FILE *file) const
151 TextStorage::Save(file, TYPE_PLAIN_TEXT);
153 if (std::fwrite(this->text_, sizeof(uchar), n_, file) != n_)
154 throw std::runtime_error("TextStorage::Save(): file write error (text_).");
157 ~TextStoragePlainText()
164 uchar * GetText(TextCollection::DocId docId) const
166 assert(docId < (TextCollection::DocId)numberOfTexts_);
168 TextPosition offset = offit_->select(docId);
169 return &text_[offset];
172 uchar * GetText(TextCollection::DocId i, TextCollection::DocId j) const
174 assert(i < (TextCollection::DocId)numberOfTexts_);
175 assert(j < (TextCollection::DocId)numberOfTexts_);
177 TextPosition offset = offit_->select(i);
178 return &text_[offset];
181 // No operation, since text is a pointer to this->text_
182 void DeleteText(uchar *text) const
187 }; // class TextStorage
190 /******************************************************************
191 * LZ-index text collection.
193 struct LzTriePimpl; // Using Pimpl idiom to hide LzTrie implementation.
195 class TextStorageLzIndex : public TextStorage
198 TextStorageLzIndex(uchar *text, TextPosition n);
199 TextStorageLzIndex(FILE *file);
200 void Save(FILE *file) const;
201 ~TextStorageLzIndex();
202 uchar * GetText(TextCollection::DocId docId) const;
203 uchar * GetText(TextCollection::DocId i, TextCollection::DocId j) const;
205 // Free the space malloc'ed in lztrie::extract()
206 void DeleteText(uchar *text) const
212 struct LzTriePimpl * p_;
213 }; // class TextStorageLzIndex
217 * Builder for TextStorage class
219 class TextStorageBuilder
223 typedef TextCollection::TextPosition TextPosition;
225 // Build up simple uchar array
226 explicit TextStorageBuilder(TextPosition n)
227 : n_(n), text_(new uchar [n]), freeText(true)
230 ~TextStorageBuilder()
238 // Write access to text[]
239 uchar& operator[] (TextPosition i)
245 // Type defaults to plain text.
246 TextStorage * InitTextStorage(char type = TextStorage::TYPE_PLAIN_TEXT)
248 freeText = false; // Passing text to TextStorage.
251 case (TextStorage::TYPE_PLAIN_TEXT):
252 return new TextStoragePlainText(text_, n_);
253 case (TextStorage::TYPE_LZ_INDEX):
254 return new TextStorageLzIndex(text_, n_);
256 std::cerr << "TextStorageBuilder: Unknown type given!" << std::endl;
265 }; // class TextStorageBuilder
269 #endif // #ifndef _TextStorage_H_