X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.cpp;h=a37263a16c30577984fcbf5537d1ca382bed6a9f;hb=b6f779e236950176bab71fce980ac1685e8661c0;hp=4ddec0170a5186dc02c43e98ab59e525c89380c2;hpb=dd9992dcd5366f37820c24ed7cddf24ecbc0d549;p=SXSI%2FXMLTree.git diff --git a/XMLTree.cpp b/XMLTree.cpp index 4ddec01..a37263a 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -105,7 +105,15 @@ void XMLTree::Save(unsigned char *filename) // stores the texts if (!disable_tc) Text->Save(fp); - + if (!disable_tc){ + int st = CachedText.size(); + ufwrite(&st, sizeof(int),1,fp); + for (int i = 0; i< CachedText.size(); ++i){ + st = CachedText.at(i).size(); + ufwrite(&st, sizeof(int),1,fp); + ufwrite(CachedText.at(i).c_str(),sizeof(char),(1+strlen(CachedText.at(i).c_str())),fp); + }; + }; fclose(fp); } @@ -192,11 +200,27 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) if (!XML_Tree->disable_tc){ XML_Tree->Text = TextCollection::InitTextCollection(sample_rate_text); XML_Tree->Text->Load(fp,sample_rate_text); + int sst; + int st; + ufread(&sst, sizeof(int),1,fp); + for (int i=0;iCachedText.push_back(cppstr); + free(str); + }; + } else XML_Tree->Text = NULL; s_text = ftell(fp) - s_text; + + + + fclose(fp); std::cerr << "Tree part is " << s_tree/1024 << " Kbytes,\n" @@ -557,9 +581,30 @@ treeNode XMLTree::TaggedPrec(treeNode x, TagType tag) return NULLT; // there is no such node } + // TaggedFoll(x,tag): returns the first node tagged tag with larger preorder than x and not in // the subtree of x. Returns NULLT if there is none. -treeNode XMLTree::TaggedFoll(treeNode x, TagType tag) +treeNode XMLTree::TaggedFoll(treeNode x, TagType tag) + { + if (!finished) { + fprintf(stderr, "Error: data structure has not been constructed properly\n"); + exit(1); + } + + int r, s; + if (x ==NULLT || x == Root()) + return NULLT; + + r = (int) Tags->rank(tag, find_close(Par, x)); + s = (int) Tags->select(tag, r+1); // select returns -1 in case that there is no r+1-th tag. + if (s==-1) return NULLT; + else return tagpos2node(s); + } + + +// TaggedFollowingSibling(x,tag): returns the first node tagged tag with larger preorder than x and not in +// the subtree of x. Returns NULLT if there is none. +treeNode XMLTree::TaggedFollowingSibling(treeNode x, TagType tag) { if (!finished) { fprintf(stderr, "Error: data structure has not been constructed properly\n"); @@ -567,15 +612,39 @@ treeNode XMLTree::TaggedFoll(treeNode x, TagType tag) } int r, s; - if (x ==NULLT || x == Root()|| (next_sibling(Par,x) == -1 )) + treeNode ns = next_sibling(Par,x); + + if (x == NULLT || x == Root() || ns == -1) return NULLT; - r = (int) Tags->rank(tag, node2tagpos(next_sibling(Par, x))-1); + r = (int) Tags->rank(tag, node2tagpos(ns)-1); s = (int) Tags->select(tag, r+1); // select returns -1 in case that there is no r+1-th tag. if (s==-1) return NULLT; else return tagpos2node(s); } + +// TaggedAncestor(x, tag): returns the closest ancestor of x tagged tag. Return +// NULLT is there is none. +treeNode XMLTree::TaggedAncestor(treeNode x, TagType tag) + { + if (!finished) { + fprintf(stderr, "Error: data structure has not been constructed properly\n"); + exit(1); + } + + if (x == NULLT || x == Root()) + return NULLT; + + treeNode s = parent(Par, x), r = Root(); + while (s != r) { + if (Tags->access(node2tagpos(s)) == tag) return s; + s = parent(Par, s); + } + return NULLT; + } + + // PrevText(x): returns the document identifier of the text to the left // of node x, or NULLT if x is the root node or the text is empty. // Assumes Doc ids start from 0. @@ -842,7 +911,7 @@ int XMLTree::NewOpenTag(unsigned char *tagname) tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags npar++; - + return 1; } @@ -866,17 +935,16 @@ int XMLTree::NewClosingTag(unsigned char *tagname) parArraySize *= 2; } - setbit(par_aux,npar,CP); // marks a new closing opening parenthesis + setbit(par_aux,npar,CP); // marks a new closing parenthesis // transforms the tagname into a tag identifier. If the tag is new, we insert // it in the table. for (i=0; iInsertText(s); + string cpps = (char*) s; + CachedText.push_back(cpps); return 1; // success }