1 /******************************************************************************
2 * Copyright (C) 2009 Niko Välimäki *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Lesser General Public License as published *
7 * by the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU Lesser General Public License for more details. *
15 * You should have received a copy of the GNU Lesser General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 *****************************************************************************/
21 #ifndef _TextStorage_H_
22 #define _TextStorage_H_
24 #include "incbwt/bits/deltavector.h"
30 * Text collection that supports fast extraction.
36 typedef TextCollection::TextPosition TextPosition;
37 // Block size in DeltaVector
38 const static CSA::usint DV_BLOCK_SIZE = 16;
41 TextStorage(uchar *text, TextPosition n)
42 : n_(n), text_(text), offsets_(0), numberOfTexts_(0)
47 TextStorage(FILE *file)
48 : n_(0), text_(0), offsets_(0), numberOfTexts_(0)
50 if (std::fread(&(this->n_), sizeof(TextPosition), 1, file) != 1)
51 throw std::runtime_error("TextStorage::Load(): file read error (n_).");
53 text_ = new uchar[n_];
54 if (std::fread(this->text_, sizeof(uchar), n_, file) != n_)
55 throw std::runtime_error("TextStorage::Load(): file read error (text_).");
62 if (std::fwrite(&(this->n_), sizeof(TextPosition), 1, file) != 1)
63 throw std::runtime_error("TextStorage::Save(): file write error (n_).");
65 if (std::fwrite(this->text_, sizeof(uchar), n_, file) != n_)
66 throw std::runtime_error("TextStorage::Save(): file write error (text_).");
78 uchar * GetText(TextCollection::DocId docId)
80 assert(docId < numberOfTexts_);
82 TextPosition offset = offsets_->select(docId);
83 return &text_[offset];
91 // Delta encoded bitvector of text offsets.
92 CSA::DeltaEncoder encoder(DV_BLOCK_SIZE);
93 encoder.setBit(0); // Start of the first text.
95 // Read offsets by finding text end positions:
96 for (TextPosition i = 0; i < n_ - 1; ++i)
100 offsets_ = new CSA::DeltaVector(encoder, n_);
101 numberOfTexts_ = offsets_->rank(n_ - 1);
105 uchar *text_; // FIXME Replace with a succinct representation.
106 CSA::DeltaVector *offsets_;
107 TextPosition numberOfTexts_;
108 }; // class TextStorage
112 * Builder for TextStorage class
114 class TextStorageBuilder
118 typedef TextCollection::TextPosition TextPosition;
120 // Build up simple uchar array
121 explicit TextStorageBuilder(TextPosition n)
122 : n_(n), text_(new uchar [n]), freeText(true)
125 ~TextStorageBuilder()
133 // Write access to text[]
134 uchar& operator[] (TextPosition i)
140 TextStorage * InitTextStorage()
142 freeText = false; // Passing text to TextStorage.
143 return new TextStorage(text_, n_);
148 uchar *text_; // FIXME Replace with a succinct representation.
150 }; // class TextStorageBuilder
154 #endif // #ifndef _TextStorage_H_