projects
/
SXSI
/
TextCollection.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Debug swcsa
[SXSI/TextCollection.git]
/
TextStorage.h
diff --git
a/TextStorage.h
b/TextStorage.h
index
fcd66e5
..
1f809b8
100644
(file)
--- a/
TextStorage.h
+++ b/
TextStorage.h
@@
-23,7
+23,16
@@
#include "TextCollection.h"
#include "Tools.h"
#include "TextCollection.h"
#include "Tools.h"
+
#include "incbwt/bits/deltavector.h"
#include "incbwt/bits/deltavector.h"
+// Re-define word size to ulong:
+#undef W
+#if __WORDSIZE == 64
+# define W 64
+#else
+# define W 32
+#endif
+
#include <cassert>
#include <stdexcept>
#include <cassert>
#include <stdexcept>
@@
-59,20
+68,22
@@
public:
virtual ~TextStorage()
{
virtual ~TextStorage()
{
+ delete offit_;
+ offit_ = 0;
delete offsets_;
delete offsets_;
- offsets_ = 0;
+ offsets_ = 0;
}
TextCollection::DocId DocIdAtTextPos(TextCollection::TextPosition i) const
{
assert(i < n_);
}
TextCollection::DocId DocIdAtTextPos(TextCollection::TextPosition i) const
{
assert(i < n_);
- return off
sets
_->rank(i)-1;
+ return off
it
_->rank(i)-1;
}
TextCollection::TextPosition TextStartPos(TextCollection::DocId i) const
{
assert(i < (TextCollection::DocId)numberOfTexts_);
}
TextCollection::TextPosition TextStartPos(TextCollection::DocId i) const
{
assert(i < (TextCollection::DocId)numberOfTexts_);
- return off
sets
_->select(i);
+ return off
it
_->select(i);
}
bool IsEndmarker(TextCollection::TextPosition i) const
}
bool IsEndmarker(TextCollection::TextPosition i) const
@@
-80,7
+91,7
@@
public:
assert(i < n_);
if (i >= n_ - 1)
return true;
assert(i < n_);
if (i >= n_ - 1)
return true;
- return off
sets
_->isSet(i+1);
+ return off
it
_->isSet(i+1);
}
}
@@
-91,7
+102,7
@@
protected:
const static CSA::usint DV_BLOCK_SIZE = 32;
TextStorage(uchar const * text, TextPosition n)
const static CSA::usint DV_BLOCK_SIZE = 32;
TextStorage(uchar const * text, TextPosition n)
- : n_(n), offsets_(0), numberOfTexts_(0)
+ : n_(n), offsets_(0),
offit_(0),
numberOfTexts_(0)
{
// Delta encoded bitvector of text offsets.
CSA::DeltaEncoder encoder(DV_BLOCK_SIZE);
{
// Delta encoded bitvector of text offsets.
CSA::DeltaEncoder encoder(DV_BLOCK_SIZE);
@@
-104,12
+115,8
@@
protected:
offsets_ = new CSA::DeltaVector(encoder, n_);
offsets_ = new CSA::DeltaVector(encoder, n_);
-
-/* for (ulong i = 0; i < n_-1; ++i)
- if ((text[i] == '\0') != IsEndmarker(i))
- std::cout << "misplaced endmarker at i = " << i << std::endl;
-*/
- numberOfTexts_ = offsets_->rank(n_ - 1);
+ offit_ = new CSA::DeltaVector::Iterator(*(offsets_));
+ numberOfTexts_ = offit_->rank(n_ - 1);
}
TextStorage(std::FILE *);
}
TextStorage(std::FILE *);
@@
-117,6
+124,7
@@
protected:
TextPosition n_;
CSA::DeltaVector *offsets_;
TextPosition n_;
CSA::DeltaVector *offsets_;
+ CSA::DeltaVector::Iterator *offit_;
TextPosition numberOfTexts_;
};
TextPosition numberOfTexts_;
};
@@
-157,7
+165,7
@@
public:
{
assert(docId < (TextCollection::DocId)numberOfTexts_);
{
assert(docId < (TextCollection::DocId)numberOfTexts_);
- TextPosition offset = off
sets
_->select(docId);
+ TextPosition offset = off
it
_->select(docId);
return &text_[offset];
}
return &text_[offset];
}
@@
-166,7
+174,7
@@
public:
assert(i < (TextCollection::DocId)numberOfTexts_);
assert(j < (TextCollection::DocId)numberOfTexts_);
assert(i < (TextCollection::DocId)numberOfTexts_);
assert(j < (TextCollection::DocId)numberOfTexts_);
- TextPosition offset = off
sets
_->select(i);
+ TextPosition offset = off
it
_->select(i);
return &text_[offset];
}
return &text_[offset];
}
@@
-182,7
+190,7
@@
private:
/******************************************************************
* LZ-index text collection.
*/
/******************************************************************
* LZ-index text collection.
*/
-struct LzTriePimpl; //
Pimpl, declared in .cpp
+struct LzTriePimpl; //
Using Pimpl idiom to hide LzTrie implementation.
class TextStorageLzIndex : public TextStorage
{
class TextStorageLzIndex : public TextStorage
{
@@
-252,7
+260,7
@@
public:
private:
TextPosition n_;
private:
TextPosition n_;
- uchar *text_;
// FIXME Replace with a succinct representation.
+ uchar *text_;
bool freeText;
}; // class TextStorageBuilder
bool freeText;
}; // class TextStorageBuilder