From deeb5434b7e061741700eff08588539d34b717a6 Mon Sep 17 00:00:00 2001 From: nvalimak Date: Mon, 23 Mar 2009 15:33:35 +0000 Subject: [PATCH] Jouni's Incremental BWT integrated into TextCollection git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@272 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- XMLTree.cpp | 31 ++++++++++++++++++++----------- XMLTree.h | 11 +++++++---- makefile | 2 +- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/XMLTree.cpp b/XMLTree.cpp index 8f8739d..70cf4b7 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -223,8 +223,7 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) // loads the texts if (!XML_Tree->disable_tc){ - XML_Tree->Text = TextCollection::InitTextCollection(sample_rate_text); - XML_Tree->Text->Load(fp,sample_rate_text); + XML_Tree->Text = TextCollection::Load(fp,sample_rate_text); int sst; int st; ufread(&sst, sizeof(int),1,fp); @@ -284,7 +283,9 @@ XMLTree::~XMLTree() Tags = NULL; //Text->~TextCollection(); - delete Text; + delete TextBuilder; + TextBuilder = NULL; + delete Text; Text = NULL; initialized = false; @@ -1013,9 +1014,11 @@ int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text,bool dtc) if (!indexing_empty_texts) empty_texts_aux = (unsigned int *)umalloc(sizeof(unsigned int)); - - - Text = TextCollection::InitTextCollection((unsigned)sample_rate_text); + if (disable_tc) + TextBuilder = 0; + else + TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text); + Text = 0; return 1; // indicates success in the initialization of the data structure } @@ -1073,10 +1076,16 @@ int XMLTree::CloseDocument() //delete ssb; - // makes the text collection static + // makes the text collection static if (!disable_tc) - Text->MakeStatic(); - + { + assert(Text = 0); + assert(TextBuilder != 0); + Text = TextBuilder->InitTextCollection(); + delete TextBuilder; + TextBuilder = 0; + } + // creates the data structure marking the non-empty texts (just in the case it is necessary) if (!indexing_empty_texts) { EBVector = new static_bitsequence_rrr02((uint *)empty_texts_aux,(ulong)npar,(uint)32); @@ -1214,7 +1223,7 @@ int XMLTree::NewText(unsigned char *s) bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector } - Text->InsertText(s); + TextBuilder->InsertText(s); string cpps = (char*) s; CachedText.push_back(cpps); @@ -1239,7 +1248,7 @@ int XMLTree::NewEmptyText() bitclean(empty_texts_aux, npar-1); // marks the empty text with a 0 in the bit vector } - else Text->InsertText(&c); // we insert the empty text just in case we index all the texts + else TextBuilder->InsertText(&c); // we insert the empty text just in case we index all the texts return 1; // success } diff --git a/XMLTree.h b/XMLTree.h index 80173a4..9b79ae3 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -21,7 +21,7 @@ #ifndef XMLTREE_H_ #define XMLTREE_H_ -#include "TextCollection/TextCollection.h" +#include "TextCollection/TextCollectionBuilder.h" #include #include #include @@ -37,6 +37,7 @@ #include #include using SXSI::TextCollection; +using SXSI::TextCollectionBuilder; // this constant is used to efficiently compute the child operation in the tree @@ -88,7 +89,9 @@ class XMLTree { uint tags_blen, tags_len; /** The texts in the XML document */ + TextCollectionBuilder *TextBuilder; TextCollection *Text; + /** The texts in the XML document (cached for faster display) */ vector CachedText; @@ -125,7 +128,7 @@ public: void print_stats(); /** Data structure constructor */ - XMLTree() {finished = false; initialized = false;}; + XMLTree() {finished = false; initialized = false; Text = 0; TextBuilder = 0; }; /** Data structure destructor */ ~XMLTree(); @@ -331,7 +334,7 @@ public: TagType RegisterTag(unsigned char *tagname); bool EmptyText(DocID i) { - return Text->EmptyText(i); + return Text->EmptyText(i); } /** Prefix(s): search for texts prefixed by string s. */ TextCollection::document_result Prefix(uchar const *s) { @@ -420,7 +423,7 @@ public: /** GetText(d): returns the text corresponding to document with * id d. */ uchar* GetText(DocID d) { - return Text->GetText(d); + return Text->GetText(d); } uchar* GetCachedText(DocID d) { diff --git a/makefile b/makefile index 4f06f3b..e1705ef 100644 --- a/makefile +++ b/makefile @@ -1,7 +1,7 @@ FLAGS =-std=c++0x -O3 -I./libcds/includes/ OBJECTS=libcds/lib/libcds.a -OBJECTS_TC= TextCollection/TextCollection.o TextCollection/CSA.o TextCollection/Tools.o TextCollection/BitRank.o TextCollection/bittree.o TextCollection/rbtree.o TextCollection/dynFMI.o TextCollection/RLWaveletTree.o TextCollection/GapEncode.o TextCollection/BSGAP.o +OBJECTS_TC= TextCollection/TextCollection.o TextCollection/TextCollectionBuilder.o TextCollection/TCImplementation.o TextCollection/Tools.o TextCollection/BitRank.o TextCollection/BSGAP.o TextCollection/incbwt/rlcsa.a all: libcds text_collection XMLTree -- 2.17.1