Added TextStorage class
authornvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Fri, 8 May 2009 12:03:39 +0000 (12:03 +0000)
committernvalimak <nvalimak@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Fri, 8 May 2009 12:03:39 +0000 (12:03 +0000)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@376 3cdefd35-fc62-479d-8e8d-bae585ffb9ca

TextStorage.h [new file with mode: 0644]

diff --git a/TextStorage.h b/TextStorage.h
new file mode 100644 (file)
index 0000000..237bc44
--- /dev/null
@@ -0,0 +1,154 @@
+/******************************************************************************
+ *   Copyright (C) 2009 Niko Välimäki                                         *
+ *                                                                            *
+ *                                                                            *
+ *   This program is free software; you can redistribute it and/or modify     *
+ *   it under the terms of the GNU Lesser General Public License as published *
+ *   by the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                      *
+ *                                                                            *
+ *   This program is distributed in the hope that it will be useful,          *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of           *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            *
+ *   GNU Lesser General Public License for more details.                      *
+ *                                                                            *
+ *   You should have received a copy of the GNU Lesser General Public License *
+ *   along with this program; if not, write to the                            *
+ *   Free Software Foundation, Inc.,                                          *
+ *   51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.            *
+ *****************************************************************************/
+
+#ifndef _TextStorage_H_
+#define _TextStorage_H_
+
+#include "incbwt/bits/deltavector.h"
+
+namespace SXSI 
+{
+
+/**
+ * Text collection that supports fast extraction.
+ */
+class TextStorage
+{
+public:
+    // Define a shortcut
+    typedef TextCollection::TextPosition TextPosition;
+    // Block size in DeltaVector
+    const static CSA::usint DV_BLOCK_SIZE = 16;
+
+
+    TextStorage(uchar *text, TextPosition n)
+        : n_(n), text_(text), offsets_(0), numberOfTexts_(0)
+    { 
+        initOffsets();
+    }
+
+    TextStorage(FILE *file)
+        : n_(0), text_(0), offsets_(0), numberOfTexts_(0)
+    {
+        if (std::fread(&(this->n_), sizeof(TextPosition), 1, file) != 1)
+            throw std::runtime_error("TextStorage::Load(): file read error (n_).");
+        
+        text_ = new uchar[n_];
+        if (std::fread(this->text_, sizeof(uchar), n_, file) != n_)
+            throw std::runtime_error("TextStorage::Load(): file read error (text_).");
+
+        initOffsets();
+    }
+
+    void Save(FILE *file)
+    {
+        if (std::fwrite(&(this->n_), sizeof(TextPosition), 1, file) != 1)
+            throw std::runtime_error("TextStorage::Save(): file write error (n_).");
+
+          if (std::fwrite(this->text_, sizeof(uchar), n_, file) != n_)
+            throw std::runtime_error("TextStorage::Save(): file write error (text_).");
+    }
+
+    ~TextStorage()
+    {
+        delete offsets_;
+        offsets_ = 0;
+        delete [] text_;
+        text_ = 0;
+        n_ = 0;
+    }
+
+    uchar * GetText(TextCollection::DocId docId)
+    {
+        assert(docId < numberOfTexts_);
+
+        TextPosition offset = offsets_->select(docId);
+        return &text_[offset];
+    }
+
+
+
+private:
+    void initOffsets()
+    {
+        // Delta encoded bitvector of text offsets.
+        CSA::DeltaEncoder encoder(DV_BLOCK_SIZE);
+        encoder.setBit(0); // Start of the first text.
+
+        // Read offsets by finding text end positions:
+        for (TextPosition i = 0; i < n_ - 1; ++i)
+            if (text_[i] == '\0')
+                encoder.setBit(i+1);
+
+        offsets_ = new CSA::DeltaVector(encoder, n_);
+        numberOfTexts_ = offsets_->rank(n_ - 1);
+    }
+
+    TextPosition n_;
+    uchar *text_; // FIXME Replace with a succinct representation.
+    CSA::DeltaVector *offsets_;
+    TextPosition numberOfTexts_;
+}; // class TextStorage
+
+
+/**
+ * Builder for TextStorage class
+ */
+class TextStorageBuilder
+{
+public:
+    // Define a shortcut
+    typedef TextCollection::TextPosition TextPosition;
+
+    // Build up simple uchar array
+    explicit TextStorageBuilder(TextPosition n)
+        : n_(n), text_(new uchar [n]), freeText(true)
+    { }
+    
+    ~TextStorageBuilder()
+    {
+        if (freeText)
+            delete [] text_;
+        text_ = 0;
+        n_ = 0;
+    }
+    
+    // Write access to text[]
+    uchar& operator[] (TextPosition i)
+    {
+        return text_[i];
+    }
+
+    // Init TextStorage
+    TextStorage * InitTextStorage()
+    {
+        freeText = false; // Passing text to TextStorage.
+        return new TextStorage(text_, n_);
+    }
+
+private:
+    TextPosition n_;
+    uchar *text_; // FIXME Replace with a succinct representation.
+    bool freeText;
+}; // class TextStorageBuilder
+
+} // namespace SXSI
+
+#endif // #ifndef _TextStorage_H_