From 164adea5e18f69c6afae6772ab8cb4444816d236 Mon Sep 17 00:00:00 2001 From: nvalimak Date: Fri, 29 May 2009 14:40:05 +0000 Subject: [PATCH] Approx pattern matching functions git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@422 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- XMLTree.h | 50 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/XMLTree.h b/XMLTree.h index 275b9bc..ee44d32 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -40,7 +40,7 @@ #include using SXSI::TextCollection; using SXSI::TextCollectionBuilder; - +using SXSI::TextStorage; // this constant is used to efficiently compute the child operation in the tree #define OPTD 10 @@ -325,6 +325,20 @@ public: TextCollection::document_result LessThan(uchar const *s) { return Text->LessThan(s); } + + /** KMismatches (s): returns document identifiers for the texts that + * contain occurrence of string s with at most K mismatches. */ + TextCollection::document_result KMismatches(uchar const *s, unsigned K) { + return Text->KMismatches(s, K); + } + + /** KErrors (s): returns document identifiers for the texts that + * contain occurrence of string s with at most K errors. + * The accepted "errors" are insertions, deletions and mutations of chars. + */ + TextCollection::document_result KErrors(uchar const *s, unsigned K) { + return Text->KErrors(s, K); + } /** IsPrefix(x): returns true if there is a text prefixed by string s. */ bool IsPrefix(uchar const *s) { @@ -385,18 +399,34 @@ public: } /** GetText(d): returns the text corresponding to document with - * id d. */ - uchar* GetText(DocID d) { - uchar * s = Text->GetText(d); - return (s[0] == 1 ? (uchar*)"" : s); + * id d. + * + * Implementation of GetText() may or may NOT + * require you to free() the pointer that is returned. + * Call DeleteText() for each pointer returned by GetText() + * to avoid possible memory leaks. + */ + uchar* GetText(DocID d) const { + return Text->GetText(d); + } + + /** + * Free the pointer returned by GetText(). + */ + void DeleteText(uchar *text) const { + Text->DeleteText(text); } /** GetText(i, j): returns the texts corresponding to documents with - * ids i, i+1, ..., j. Texts are separated by '\0' character. */ - // uchar* GetText(DocID i, DocID j) { - // uchar * s = Text->GetText(i, j); - // return (s[0] == 1 ? (uchar*)"" : s); - //} + * ids i, i+1, ..., j. Texts are separated by '\0' character. + * + * Call DeleteText() for each pointer returned by GetText() + * to avoid possible memory leaks. + */ + uchar* GetText(DocID i, DocID j) const { + return Text->GetText(i, j); + } + TextCollection *getTextCollection() { return Text; -- 2.17.1