1 /******************************************************************************
2 * Copyright (C) 2009 Niko Välimäki *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Lesser General Public License as published *
7 * by the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU Lesser General Public License for more details. *
15 * You should have received a copy of the GNU Lesser General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 *****************************************************************************/
21 #ifndef _TextStorage_H_
22 #define _TextStorage_H_
24 #include "incbwt/bits/deltavector.h"
30 * Text collection that supports fast extraction.
36 typedef TextCollection::TextPosition TextPosition;
37 // Block size in DeltaVector
38 const static CSA::usint DV_BLOCK_SIZE = 32;
40 TextStorage(uchar *text, TextPosition n)
41 : n_(n), text_(text), offsets_(0), numberOfTexts_(0)
46 TextStorage(FILE *file)
47 : n_(0), text_(0), offsets_(0), numberOfTexts_(0)
49 if (std::fread(&(this->n_), sizeof(TextPosition), 1, file) != 1)
50 throw std::runtime_error("TextStorage::Load(): file read error (n_).");
52 text_ = new uchar[n_];
53 if (std::fread(this->text_, sizeof(uchar), n_, file) != n_)
54 throw std::runtime_error("TextStorage::Load(): file read error (text_).");
61 if (std::fwrite(&(this->n_), sizeof(TextPosition), 1, file) != 1)
62 throw std::runtime_error("TextStorage::Save(): file write error (n_).");
64 if (std::fwrite(this->text_, sizeof(uchar), n_, file) != n_)
65 throw std::runtime_error("TextStorage::Save(): file write error (text_).");
77 uchar * GetText(TextCollection::DocId docId)
79 assert(docId < (TextCollection::DocId)numberOfTexts_);
81 TextPosition offset = offsets_->select(docId);
82 return &text_[offset];
85 TextCollection::DocId DocIdAtTextPos(TextCollection::TextPosition i)
88 return offsets_->rank(i)-1;
91 TextCollection::TextPosition TextStartPos(TextCollection::DocId i)
93 assert(i < (TextCollection::DocId)numberOfTexts_);
94 return offsets_->select(i);
100 // Delta encoded bitvector of text offsets.
101 CSA::DeltaEncoder encoder(DV_BLOCK_SIZE);
102 encoder.setBit(0); // Start of the first text.
104 // Read offsets by finding text end positions:
105 for (TextPosition i = 0; i < n_ - 1; ++i)
106 if (text_[i] == '\0')
109 offsets_ = new CSA::DeltaVector(encoder, n_);
110 numberOfTexts_ = offsets_->rank(n_ - 1);
114 uchar *text_; // FIXME Replace with a succinct representation.
115 CSA::DeltaVector *offsets_;
116 TextPosition numberOfTexts_;
117 }; // class TextStorage
121 * Builder for TextStorage class
123 class TextStorageBuilder
127 typedef TextCollection::TextPosition TextPosition;
129 // Build up simple uchar array
130 explicit TextStorageBuilder(TextPosition n)
131 : n_(n), text_(new uchar [n]), freeText(true)
134 ~TextStorageBuilder()
142 // Write access to text[]
143 uchar& operator[] (TextPosition i)
149 TextStorage * InitTextStorage()
151 freeText = false; // Passing text to TextStorage.
152 return new TextStorage(text_, n_);
157 uchar *text_; // FIXME Replace with a succinct representation.
159 }; // class TextStorageBuilder
163 #endif // #ifndef _TextStorage_H_