1 /******************************************************************************
2 * Copyright (C) 2009 Niko Välimäki *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Lesser General Public License as published *
7 * by the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU Lesser General Public License for more details. *
15 * You should have received a copy of the GNU Lesser General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 *****************************************************************************/
21 #include "TextStorage.h"
28 #include "lzindex/lztrie.h"
30 // Re-define word size to ulong:
45 /******************************************************************
49 TextStorage * TextStorage::Load(std::FILE *file)
52 if (std::fread(&type, sizeof(char), 1, file) != 1)
53 throw std::runtime_error("TextStorage::Load(): file read error (type).");
57 case (TYPE_PLAIN_TEXT):
58 return new TextStoragePlainText(file);
60 return new TextStorageLzIndex(file);
62 std::cerr << "TextStorage::Load(): Unknown type in save file!" << std::endl;
67 TextStorage::TextStorage(std::FILE * file)
68 : n_(0), offsets_(0), offit_(0), numberOfTexts_(0)
70 if (std::fread(&(this->n_), sizeof(TextPosition), 1, file) != 1)
71 throw std::runtime_error("TextStorage::Load(): file read error (n_).");
73 if (std::fread(&(this->numberOfTexts_), sizeof(TextPosition), 1, file) != 1)
74 throw std::runtime_error("TextStorage::Load(): file read error (numberOfTexts_).");
76 offsets_ = new CSA::DeltaVector(file);
77 offit_ = new CSA::DeltaVector::Iterator(*offsets_);
80 void TextStorage::Save(FILE *file, char type) const
82 if (std::fwrite(&type, sizeof(char), 1, file) != 1)
83 throw std::runtime_error("TextStorage::Save(): file write error (type).");
85 if (std::fwrite(&(this->n_), sizeof(TextPosition), 1, file) != 1)
86 throw std::runtime_error("TextStorage::Save(): file write error (n_).");
88 if (std::fwrite(&(this->numberOfTexts_), sizeof(TextPosition), 1, file) != 1)
89 throw std::runtime_error("TextStorage::Save(): file write error (n_).");
91 offsets_->writeTo(file);
95 /******************************************************************
96 * Class TextStorageLzIndex
99 // Hide the lztrie declaration
109 TextStorageLzIndex::TextStorageLzIndex(uchar *text, TextPosition n)
110 : TextStorage(text, n), p_(new struct LzTriePimpl)
112 for (ulong i = 0; i < n_ - 1; ++i)
114 text[i] = 1; // '\0' can appear only once.
117 p_->lz = buildLZTrie(text, (uchar)0, n_);
121 TextStorageLzIndex::TextStorageLzIndex(FILE *file)
122 : TextStorage(file), p_(new struct LzTriePimpl)
124 p_->lz = loadLZTrie(file);
127 void TextStorageLzIndex::Save(FILE *file) const
129 TextStorage::Save(file, TYPE_LZ_INDEX);
131 saveLZTrie(p_->lz, file);
134 TextStorageLzIndex::~TextStorageLzIndex()
136 destroyLZTrie(p_->lz);
142 uchar * TextStorageLzIndex::GetText(TextCollection::DocId docId) const
144 assert(docId < (TextCollection::DocId)numberOfTexts_);
146 TextPosition from = offit_->select(docId);
148 if (docId < (TextCollection::DocId)numberOfTexts_ - 1)
149 to = offit_->select(docId + 1) - 1;
155 extract(p_->lz, from, to, &text, &l);
161 uchar * TextStorageLzIndex::GetText(TextCollection::DocId i, TextCollection::DocId j) const
163 assert(i < (TextCollection::DocId)numberOfTexts_);
164 assert(j < (TextCollection::DocId)numberOfTexts_);
166 TextPosition from = offit_->select(i);
168 if (j < (TextCollection::DocId)numberOfTexts_ - 1)
169 to = offit_->select(j + 1) - 1;
175 extract(p_->lz, from, to, &text, &l);
177 // Put '\0' bytes back in place
178 while (i < j && i < (TextCollection::DocId)numberOfTexts_)
181 text[offit_->select(i) - 1 - from] = 0;