X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.cpp;h=c87c6dc008443bb723a450f7ecef9aff560598a3;hb=f97501e008660c0363f0fe643be09de66efd3533;hp=9be500ee993f4af9a0565a2513c64d23bf902531;hpb=b9205dab05e219c2ac3ce7cf5fe0107267d60535;p=SXSI%2FXMLTree.git diff --git a/XMLTree.cpp b/XMLTree.cpp index 9be500e..c87c6dc 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -1,8 +1,11 @@ #include "basics.h" -//#include -#include #include "XMLTree.h" #include "timings.h" +#include +using std::cout; +using std::endl; +using std::min; +using std::string; // functions to convert tag positions to the corresponding tree node and viceversa. // These are implemented in order to be able to change the tree and Tags representations, @@ -53,7 +56,7 @@ static treeNode fast_first_child(bp *Par, treeNode x) static treeNode fast_next_sibling(bp* Par,treeNode x) { - x = fwd_excess(Par,x,0); + x = fast_find_close(Par,x)+1; return (fast_inspect(Par,x) == OP) ? x : NULLT; } @@ -255,7 +258,7 @@ XMLTree *XMLTree::Load(int fd, bool load_tc,int sample_factor) XMLTree *XML_Tree; int i; - + buffer[1023] = '\0'; fp = fdopen(fd, "r"); @@ -269,23 +272,21 @@ XMLTree *XMLTree::Load(int fd, bool load_tc,int sample_factor) PRINTTIME("Loading parenthesis struct", Loading); STARTTIMER(); - XML_Tree->TagName = new vector(); - XML_Tree->tIdMap = new std::unordered_map(); - - string s; + XML_Tree->TagName = new std::vector(); + XML_Tree->tIdMap = new std::unordered_map(); + std::string s; int ntags; // Load the tag names ufread(&ntags, sizeof(int), 1, fp); for (i=0; iTagName->push_back(s); + XML_Tree->TagName->push_back(s); XML_Tree->tIdMap->insert(std::make_pair(s,i)); }; @@ -366,7 +367,7 @@ int XMLTree::SubtreeTags(treeNode x, TagType tag) int s = x + 2*subtree_size(Par, x) - 1; - return Tags->rank(tag, s) - Tags->rank(tag, node2tagpos(x)-1); + return (Tags->rank(tag, s) - Tags->rank(tag, node2tagpos(x)-1))+1; } int XMLTree::SubtreeElements(treeNode x) { @@ -538,22 +539,6 @@ treeNode XMLTree::NextElement(treeNode x) } else return x; } -value XMLTree::CamlFirstElement(value x) -{ - return Val_int(FirstElement(Int_val(x))); -} -value XMLTree::CamlNextElement(value x) -{ - return Val_int(NextElement(Int_val(x))); -} - -extern "C" value caml_cpp_fast_first_element(value xmltree, value node){ - return XMLTREE(xmltree)->CamlFirstElement(node); -} - -extern "C" value caml_cpp_fast_next_element(value xmltree, value node){ - return XMLTREE(xmltree)->CamlNextElement(node); -} // LastChild(x): returns the last child of node x. treeNode XMLTree::LastChild(treeNode x) @@ -944,11 +929,15 @@ bool XMLTree::IsOpen(treeNode x) { return fast_inspect(Par,x); } //WARNING this uses directly the underlying implementation for plain text - void XMLTree::Print(int fd,treeNode x, bool no_text){ int newfd = dup(fd); stream = fdopen(newfd,"wa"); + if (stream == 0){ + perror(NULL); + return; + }; + if (buffer == 0) buffer = new string(); @@ -959,8 +948,8 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ uchar * tagstr; range r = DocIds(x); treeNode first_idx; - treeNode first_text = (tag == PCDATA_TAG_ID ? x : TaggedDescendant(x,PCDATA_TAG_ID)); - treeNode first_att = NULLT;//TaggedDesc(x,ATTRIBUTE_DATA_TAG_ID); + treeNode first_text = (tag == PCDATA_TAG_ID ? x : ParentNode(r.min-1)); + treeNode first_att = NULLT; if (first_att == NULLT) first_idx = first_text; @@ -972,18 +961,18 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ uchar * current_text=NULL; if (first_idx != NULLT) current_text = GetText(MyText(first_idx)); - int read = 0; + size_t read = 0; std::vector st; while (n <= fin){ if (fast_inspect(Par,n)){ if (tag == PCDATA_TAG_ID ) { - // myfputs((const char*) (GetText(MyTextUnsafe(n))),fp); + if (no_text) myfputs("<$/>",fp); else{ - read = fprintf(fp,"%s",(const char*) current_text); - current_text += (read + 1); - } + read = myfprintf((const char*) current_text, fp); + current_text += (read + 1); + }; n+=2; // skip closing $ tag = Tag(n); @@ -1002,11 +991,10 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ while (fast_inspect(Par,n)){ if (no_text) { myfputc('<',fp); - const uchar * tmp = &(GetTagNameByRef(Tag(n))[3]); - myfputs((const char*) tmp,fp); + myfputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); myfputc('>',fp); myfputs("<$@/>',fp); n+= 4; } @@ -1015,9 +1003,8 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ myfputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); n++; myfputs("=\"",fp); - read = fprintf(fp,"%s",(const char*) current_text); + read = myfprintf((const char*) current_text,fp); current_text += (read + 1); - //myfputs((const char*) GetText(MyTextUnsafe(n)),fp); myfputc('"',fp); n+=3; } @@ -1049,8 +1036,8 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ }while (!fast_inspect(Par,n) && !st.empty()); tag=Tag(n); }; - //myfputc('\n',fp); + myfputc('\n',fp); mybufferflush(fp); - fflush(fp); + //fflush(fp); fclose(fp); }