From 663cd2f6cc5e3796d001e8c527de0aea8c8bbf68 Mon Sep 17 00:00:00 2001 From: nvalimak Date: Wed, 4 Mar 2009 19:35:25 +0000 Subject: [PATCH] Added Count git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@204 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- CSA.cpp | 33 +++++++++++++-------------------- CSA.h | 11 +++++++++++ TextCollection.h | 16 +++++++--------- 3 files changed, 31 insertions(+), 29 deletions(-) diff --git a/CSA.cpp b/CSA.cpp index 5f69aa8..04ace79 100644 --- a/CSA.cpp +++ b/CSA.cpp @@ -40,7 +40,7 @@ const uchar CSA::versionFlag = 2; //////////////////////////////////////////////////////////////////////////// // Class CSA::THuffAlphabetRank - +// FIXME Unused code CSA::THuffAlphabetRank::THuffAlphabetRank(uchar *s, TextPosition n, TCodeEntry *codetable, unsigned level) { left = NULL; right = NULL; @@ -122,26 +122,8 @@ CSA::THuffAlphabetRank::~THuffAlphabetRank() { delete bitrank; } -/** - * Saving data fields: - BitRank *bitrank; - bool leaf; - uchar ch; - left child; - right child; -*/ -void CSA::THuffAlphabetRank::Save(FILE *file) -{ - -} - -CSA::THuffAlphabetRank::THuffAlphabetRank(FILE *file) -{ - - -} -/////////////////////////////////////////////f/////////////////////////////// +//////////////////////////////////////////////////////////////////////////// // Class CSA /** @@ -408,6 +390,17 @@ bool CSA::IsLessThan(uchar const*) const /****************************************************************** * Counting queries */ +ulong CSA::Count(uchar const * pattern) const +{ + TextPosition m = strlen((char *)pattern); + if (m == 0) + return 0; + + TextPosition sp = 0, ep = 0; + unsigned count = (unsigned) Search(pattern, m, &sp, &ep); + return count; +} + unsigned CSA::CountPrefix(uchar const * pattern) const { TextPosition m = strlen((char *)pattern); diff --git a/CSA.h b/CSA.h index ec6e890..df56585 100644 --- a/CSA.h +++ b/CSA.h @@ -41,6 +41,7 @@ #else # define W 32 #endif +#undef bitset /** @@ -73,6 +74,11 @@ public: void MakeStatic(); bool EmptyText(DocId) const; uchar* GetText(DocId) const; + /** + * Next method is not supported: + * Supporting GetText for some substring [i,j] + * would require more space. + */ // uchar* GetText(DocId, TextPosition, TextPosition) const; bool IsPrefix(uchar const *) const; @@ -81,6 +87,7 @@ public: bool IsContains(uchar const *) const; bool IsLessThan(uchar const *) const; + ulong Count(uchar const *) const; unsigned CountPrefix(uchar const *) const; unsigned CountSuffix(uchar const *) const; unsigned CountEqual(uchar const *) const; @@ -128,6 +135,7 @@ public: } private: + // FIXME Unused code class TCodeEntry { public: unsigned count; @@ -137,6 +145,7 @@ private: }; + // FIXME Unused code class THuffAlphabetRank { // using fixed 0...255 alphabet private: @@ -223,6 +232,7 @@ private: } }; + // FIXME Unused code class node { private: unsigned weight; @@ -256,6 +266,7 @@ private: static TCodeEntry *makecodetable(uchar *, TextPosition); }; + // FIXME Unused code static const unsigned char print = 1; static const unsigned char report = 1; static const uchar versionFlag; diff --git a/TextCollection.h b/TextCollection.h index ef0eb72..37d5cb5 100644 --- a/TextCollection.h +++ b/TextCollection.h @@ -111,7 +111,7 @@ namespace SXSI * * Note: Do we need this? * Forward iterator would be really in-efficient compared to - * getText(k) and getText(k, i, j). + * getText(k). * * TODO Define and implement const_reverse_iterator. */ @@ -132,8 +132,12 @@ namespace SXSI virtual bool IsLessThan(uchar const *) const = 0; /** - * Counting queries - * + * Counting queries + * Result is the number of occurrences. + */ + virtual ulong Count(uchar const *) const = 0; + /** + * More counting queries * Result is the number of documents. */ virtual unsigned CountPrefix(uchar const *) const = 0; @@ -164,12 +168,6 @@ namespace SXSI typedef std::vector > full_result; virtual full_result FullContains(uchar const *) const = 0; - - /** - *Debug - * - */ - virtual TextPosition Lookup(TextPosition) const = 0; protected: // Protected constructor; call the static function InitTextCollection(). TextCollection() { }; -- 2.17.1