X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.cpp;h=a37263a16c30577984fcbf5537d1ca382bed6a9f;hb=3c8f8af6704ca98b36b503878058aa0619806dad;hp=b93e75a5e773d737af40667a91f229de8260121c;hpb=a0808cd570d8645798ae80287d2b4845ceea36a2;p=SXSI%2FXMLTree.git diff --git a/XMLTree.cpp b/XMLTree.cpp index b93e75a..a37263a 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -105,7 +105,15 @@ void XMLTree::Save(unsigned char *filename) // stores the texts if (!disable_tc) Text->Save(fp); - + if (!disable_tc){ + int st = CachedText.size(); + ufwrite(&st, sizeof(int),1,fp); + for (int i = 0; i< CachedText.size(); ++i){ + st = CachedText.at(i).size(); + ufwrite(&st, sizeof(int),1,fp); + ufwrite(CachedText.at(i).c_str(),sizeof(char),(1+strlen(CachedText.at(i).c_str())),fp); + }; + }; fclose(fp); } @@ -120,7 +128,10 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) char buffer[1024]; XMLTree *XML_Tree; int i; - + size_t s_tree = 0; + long s_text = 0; + size_t s_tags = 0; + // first load the tree topology sprintf(buffer, "%s.srx", filename); fp = fopen(buffer, "r"); @@ -134,10 +145,18 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) XML_Tree->Par = (bp *)umalloc(sizeof(bp)); loadTree(XML_Tree->Par, fp); - + + s_tree += sizeof(bp); + // stores the table with tag names ufread(&XML_Tree->ntagnames, sizeof(int), 1, fp); + + s_tree += sizeof(int); + XML_Tree->TagName = (unsigned char **)umalloc(XML_Tree->ntagnames*sizeof(unsigned char *)); + + s_tags += sizeof(unsigned char*)*XML_Tree->ntagnames; + for (i=0; intagnames;i++) { @@ -155,6 +174,7 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) int len = strlen((const char*)buffer); XML_Tree->TagName[i] = (unsigned char *)ucalloc(len,sizeof(char)); strncpy((char *)XML_Tree->TagName[i], (const char *)buffer,len - 1); + s_tags+= len*sizeof(char); } // loads the flags @@ -163,21 +183,53 @@ XMLTree *XMLTree::Load(unsigned char *filename, int sample_rate_text) ufread(&(XML_Tree->initialized), sizeof(bool), 1, fp); ufread(&(XML_Tree->finished), sizeof(bool), 1, fp); ufread(&(XML_Tree->disable_tc), sizeof(bool), 1, fp); + + s_tree+=sizeof(bool)*4; if (!(XML_Tree->indexing_empty_texts)) XML_Tree->EBVector = static_bitsequence_rrr02::load(fp); - + + s_tree+= XML_Tree->EBVector->size(); + // loads the tags XML_Tree->Tags = static_sequence::load(fp); + s_tree+= XML_Tree->Tags->size(); + + s_text = ftell(fp); // loads the texts if (!XML_Tree->disable_tc){ XML_Tree->Text = TextCollection::InitTextCollection(sample_rate_text); XML_Tree->Text->Load(fp,sample_rate_text); + int sst; + int st; + ufread(&sst, sizeof(int),1,fp); + for (int i=0;iCachedText.push_back(cppstr); + free(str); + }; + } else XML_Tree->Text = NULL; + s_text = ftell(fp) - s_text; + + + + fclose(fp); + + std::cerr << "Tree part is " << s_tree/1024 << " Kbytes,\n" + << "with node->tagid part " << XML_Tree->Tags->size()/1024 << "Kbytes \n" + << "size of Tag part : " << XML_Tree->Tags->length () << " elements\n" + << "sizof(unsigned int)* " << XML_Tree->Tags->length () << " = " << + sizeof(unsigned int) * XML_Tree->Tags->length () / 1024 << " Kbytes\n" + << "Tag part is " << s_tags/1024 << " Kbytes,\n" + << "Text collection is " << s_text/1024 << " Kbytes \n"; return XML_Tree; } @@ -240,6 +292,9 @@ int XMLTree::SubtreeTags(treeNode x, TagType tag) fprintf(stderr, "Error: data structure has not been constructed properly\n"); exit(1); } + if (x == Root()) + x = first_child(Par,x); + int s = x + 2*subtree_size(Par, x) - 1; @@ -340,7 +395,7 @@ TagType XMLTree::Tag(treeNode x) fprintf(stderr, "Error: data structure has not been constructed properly\n"); exit(1); } - + return Tags->access(node2tagpos(x)); } @@ -468,6 +523,9 @@ treeNode XMLTree::TaggedDesc(treeNode x, TagType tag) int r, s; treeNode y; + if (isleaf(Par,x)) + return NULLT; + r = (int) Tags->rank(tag, node2tagpos(x)); s = (int) Tags->select(tag, r+1); if (s == -1) return NULLT; // there is no such node @@ -476,6 +534,28 @@ treeNode XMLTree::TaggedDesc(treeNode x, TagType tag) else return y; } +// TaggedNext(x,tag): returns the first node tagged tag with larger preorder than x +// Returns NULLT if there is none. +treeNode XMLTree::TaggedNext(treeNode x, TagType tag) + { + if (!finished) { + fprintf(stderr, "Error: data structure has not been constructed properly\n"); + exit(1); + } + + int r, s; + treeNode y; + if (x==NULLT) + return NULLT; + + r = (int) Tags->rank(tag, node2tagpos(x)); + s = (int) Tags->select(tag, r+1); + if (s == -1) return NULLT; // there is no such node + y = tagpos2node(s); // transforms the tag position into a node position + return (y<=x ? NULLT : y); + } + + // TaggedPrec(x,tag): returns the first node tagged tag with smaller preorder than x and not an // ancestor of x. Returns NULLT if there is none. treeNode XMLTree::TaggedPrec(treeNode x, TagType tag) @@ -501,9 +581,30 @@ treeNode XMLTree::TaggedPrec(treeNode x, TagType tag) return NULLT; // there is no such node } + // TaggedFoll(x,tag): returns the first node tagged tag with larger preorder than x and not in // the subtree of x. Returns NULLT if there is none. -treeNode XMLTree::TaggedFoll(treeNode x, TagType tag) +treeNode XMLTree::TaggedFoll(treeNode x, TagType tag) + { + if (!finished) { + fprintf(stderr, "Error: data structure has not been constructed properly\n"); + exit(1); + } + + int r, s; + if (x ==NULLT || x == Root()) + return NULLT; + + r = (int) Tags->rank(tag, find_close(Par, x)); + s = (int) Tags->select(tag, r+1); // select returns -1 in case that there is no r+1-th tag. + if (s==-1) return NULLT; + else return tagpos2node(s); + } + + +// TaggedFollowingSibling(x,tag): returns the first node tagged tag with larger preorder than x and not in +// the subtree of x. Returns NULLT if there is none. +treeNode XMLTree::TaggedFollowingSibling(treeNode x, TagType tag) { if (!finished) { fprintf(stderr, "Error: data structure has not been constructed properly\n"); @@ -511,12 +612,39 @@ treeNode XMLTree::TaggedFoll(treeNode x, TagType tag) } int r, s; - r = (int) Tags->rank(tag, node2tagpos(next_sibling(Par, x))-1); + treeNode ns = next_sibling(Par,x); + + if (x == NULLT || x == Root() || ns == -1) + return NULLT; + + r = (int) Tags->rank(tag, node2tagpos(ns)-1); s = (int) Tags->select(tag, r+1); // select returns -1 in case that there is no r+1-th tag. if (s==-1) return NULLT; else return tagpos2node(s); } + +// TaggedAncestor(x, tag): returns the closest ancestor of x tagged tag. Return +// NULLT is there is none. +treeNode XMLTree::TaggedAncestor(treeNode x, TagType tag) + { + if (!finished) { + fprintf(stderr, "Error: data structure has not been constructed properly\n"); + exit(1); + } + + if (x == NULLT || x == Root()) + return NULLT; + + treeNode s = parent(Par, x), r = Root(); + while (s != r) { + if (Tags->access(node2tagpos(s)) == tag) return s; + s = parent(Par, s); + } + return NULLT; + } + + // PrevText(x): returns the document identifier of the text to the left // of node x, or NULLT if x is the root node or the text is empty. // Assumes Doc ids start from 0. @@ -627,9 +755,10 @@ treeNode XMLTree::ParentNode(DocID d) if (d == NULLT) return NULLT; - int s = d; - // Kim : I added the d+1. before that, else was select1(d) - // and gave wrong results but I'm really poking a dead bear with a stick here. + int s; + // OJO : Kim : I added the d+1. before that, else branch was + // EBVector->select1(d) + // and gave wrong results (I'm really poking a bear with a stick here). if (indexing_empty_texts) s = d; else s = EBVector->select1(d+1); @@ -782,7 +911,7 @@ int XMLTree::NewOpenTag(unsigned char *tagname) tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags npar++; - + return 1; } @@ -806,17 +935,16 @@ int XMLTree::NewClosingTag(unsigned char *tagname) parArraySize *= 2; } - setbit(par_aux,npar,CP); // marks a new closing opening parenthesis + setbit(par_aux,npar,CP); // marks a new closing parenthesis // transforms the tagname into a tag identifier. If the tag is new, we insert // it in the table. for (i=0; iInsertText(s); + string cpps = (char*) s; + CachedText.push_back(cpps); return 1; // success }