From: nvalimak Date: Fri, 8 May 2009 12:03:39 +0000 (+0000) Subject: Added TextStorage class X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2FTextCollection.git;a=commitdiff_plain;h=b8470e984146c62910517a2ec762d2ede97c2d96 Added TextStorage class git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@376 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/TextStorage.h b/TextStorage.h new file mode 100644 index 0000000..237bc44 --- /dev/null +++ b/TextStorage.h @@ -0,0 +1,154 @@ +/****************************************************************************** + * Copyright (C) 2009 Niko Välimäki * + * * + * * + * This program is free software; you can redistribute it and/or modify * + * it under the terms of the GNU Lesser General Public License as published * + * by the Free Software Foundation; either version 2 of the License, or * + * (at your option) any later version. * + * * + * This program is distributed in the hope that it will be useful, * + * but WITHOUT ANY WARRANTY; without even the implied warranty of * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * + * GNU Lesser General Public License for more details. * + * * + * You should have received a copy of the GNU Lesser General Public License * + * along with this program; if not, write to the * + * Free Software Foundation, Inc., * + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. * + *****************************************************************************/ + +#ifndef _TextStorage_H_ +#define _TextStorage_H_ + +#include "incbwt/bits/deltavector.h" + +namespace SXSI +{ + +/** + * Text collection that supports fast extraction. + */ +class TextStorage +{ +public: + // Define a shortcut + typedef TextCollection::TextPosition TextPosition; + // Block size in DeltaVector + const static CSA::usint DV_BLOCK_SIZE = 16; + + + TextStorage(uchar *text, TextPosition n) + : n_(n), text_(text), offsets_(0), numberOfTexts_(0) + { + initOffsets(); + } + + TextStorage(FILE *file) + : n_(0), text_(0), offsets_(0), numberOfTexts_(0) + { + if (std::fread(&(this->n_), sizeof(TextPosition), 1, file) != 1) + throw std::runtime_error("TextStorage::Load(): file read error (n_)."); + + text_ = new uchar[n_]; + if (std::fread(this->text_, sizeof(uchar), n_, file) != n_) + throw std::runtime_error("TextStorage::Load(): file read error (text_)."); + + initOffsets(); + } + + void Save(FILE *file) + { + if (std::fwrite(&(this->n_), sizeof(TextPosition), 1, file) != 1) + throw std::runtime_error("TextStorage::Save(): file write error (n_)."); + + if (std::fwrite(this->text_, sizeof(uchar), n_, file) != n_) + throw std::runtime_error("TextStorage::Save(): file write error (text_)."); + } + + ~TextStorage() + { + delete offsets_; + offsets_ = 0; + delete [] text_; + text_ = 0; + n_ = 0; + } + + uchar * GetText(TextCollection::DocId docId) + { + assert(docId < numberOfTexts_); + + TextPosition offset = offsets_->select(docId); + return &text_[offset]; + } + + + +private: + void initOffsets() + { + // Delta encoded bitvector of text offsets. + CSA::DeltaEncoder encoder(DV_BLOCK_SIZE); + encoder.setBit(0); // Start of the first text. + + // Read offsets by finding text end positions: + for (TextPosition i = 0; i < n_ - 1; ++i) + if (text_[i] == '\0') + encoder.setBit(i+1); + + offsets_ = new CSA::DeltaVector(encoder, n_); + numberOfTexts_ = offsets_->rank(n_ - 1); + } + + TextPosition n_; + uchar *text_; // FIXME Replace with a succinct representation. + CSA::DeltaVector *offsets_; + TextPosition numberOfTexts_; +}; // class TextStorage + + +/** + * Builder for TextStorage class + */ +class TextStorageBuilder +{ +public: + // Define a shortcut + typedef TextCollection::TextPosition TextPosition; + + // Build up simple uchar array + explicit TextStorageBuilder(TextPosition n) + : n_(n), text_(new uchar [n]), freeText(true) + { } + + ~TextStorageBuilder() + { + if (freeText) + delete [] text_; + text_ = 0; + n_ = 0; + } + + // Write access to text[] + uchar& operator[] (TextPosition i) + { + return text_[i]; + } + + // Init TextStorage + TextStorage * InitTextStorage() + { + freeText = false; // Passing text to TextStorage. + return new TextStorage(text_, n_); + } + +private: + TextPosition n_; + uchar *text_; // FIXME Replace with a succinct representation. + bool freeText; +}; // class TextStorageBuilder + +} // namespace SXSI + +#endif // #ifndef _TextStorage_H_