1 /******************************************************************************
2 * Copyright (C) 2009 Niko Välimäki *
5 * This program is free software; you can redistribute it and/or modify *
6 * it under the terms of the GNU Lesser General Public License as published *
7 * by the Free Software Foundation; either version 2 of the License, or *
8 * (at your option) any later version. *
10 * This program is distributed in the hope that it will be useful, *
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13 * GNU Lesser General Public License for more details. *
15 * You should have received a copy of the GNU Lesser General Public License *
16 * along with this program; if not, write to the *
17 * Free Software Foundation, Inc., *
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
19 *****************************************************************************/
21 #include "TextStorage.h"
28 #include "lzindex/lztrie.h"
30 // Re-define word size to ulong:
45 /******************************************************************
49 TextStorage * TextStorage::Load(std::FILE *file)
52 if (std::fread(&type, sizeof(char), 1, file) != 1)
53 throw std::runtime_error("TextStorage::Load(): file read error (type).");
57 case (TYPE_PLAIN_TEXT):
58 return new TextStoragePlainText(file);
60 return new TextStorageLzIndex(file);
62 std::cerr << "TextStorage::Load(): Unknown type in save file!" << std::endl;
67 TextStorage::TextStorage(std::FILE * file)
68 : n_(0), offsets_(0), numberOfTexts_(0)
70 if (std::fread(&(this->n_), sizeof(TextPosition), 1, file) != 1)
71 throw std::runtime_error("TextStorage::Load(): file read error (n_).");
73 if (std::fread(&(this->numberOfTexts_), sizeof(TextPosition), 1, file) != 1)
74 throw std::runtime_error("TextStorage::Load(): file read error (numberOfTexts_).");
76 offsets_ = new CSA::DeltaVector(file);
79 void TextStorage::Save(FILE *file, char type) const
81 if (std::fwrite(&type, sizeof(char), 1, file) != 1)
82 throw std::runtime_error("TextStorage::Save(): file write error (type).");
84 if (std::fwrite(&(this->n_), sizeof(TextPosition), 1, file) != 1)
85 throw std::runtime_error("TextStorage::Save(): file write error (n_).");
87 if (std::fwrite(&(this->numberOfTexts_), sizeof(TextPosition), 1, file) != 1)
88 throw std::runtime_error("TextStorage::Save(): file write error (n_).");
90 offsets_->writeTo(file);
94 /******************************************************************
95 * Class TextStorageLzIndex
98 // Hide the lztrie declaration
108 TextStorageLzIndex::TextStorageLzIndex(uchar *text, TextPosition n)
109 : TextStorage(text, n), p_(new struct LzTriePimpl)
111 for (ulong i = 0; i < n_ - 1; ++i)
113 text[i] = 1; // '\0' can appear only once.
116 p_->lz = buildLZTrie(text, (uchar)0, n_);
120 TextStorageLzIndex::TextStorageLzIndex(FILE *file)
121 : TextStorage(file), p_(new struct LzTriePimpl)
123 p_->lz = loadLZTrie(file);
126 void TextStorageLzIndex::Save(FILE *file) const
128 TextStorage::Save(file, TYPE_LZ_INDEX);
130 saveLZTrie(p_->lz, file);
133 TextStorageLzIndex::~TextStorageLzIndex()
135 destroyLZTrie(p_->lz);
141 uchar * TextStorageLzIndex::GetText(TextCollection::DocId docId) const
143 assert(docId < (TextCollection::DocId)numberOfTexts_);
145 TextPosition from = offsets_->select(docId);
147 if (docId < (TextCollection::DocId)numberOfTexts_ - 1)
148 to = offsets_->select(docId + 1) - 1;
154 extract(p_->lz, from, to, &text, &l);
160 uchar * TextStorageLzIndex::GetText(TextCollection::DocId i, TextCollection::DocId j) const
162 assert(i < (TextCollection::DocId)numberOfTexts_);
163 assert(j < (TextCollection::DocId)numberOfTexts_);
165 TextPosition from = offsets_->select(i);
167 if (j < (TextCollection::DocId)numberOfTexts_ - 1)
168 to = offsets_->select(j + 1) - 1;
174 extract(p_->lz, from, to, &text, &l);
176 // Put '\0' bytes back in place
177 while (i < j && i < (TextCollection::DocId)numberOfTexts_)
180 text[offsets_->select(i) - 1 - from] = 0;