X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTree.cpp;h=c87c6dc008443bb723a450f7ecef9aff560598a3;hb=f97501e008660c0363f0fe643be09de66efd3533;hp=174c94c76ffe50379cfa1cdce354583e146dbac0;hpb=912ff50e1d38de484b503d8ef877a49a65765ab9;p=SXSI%2FXMLTree.git diff --git a/XMLTree.cpp b/XMLTree.cpp index 174c94c..c87c6dc 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -1,8 +1,11 @@ #include "basics.h" -//#include -#include #include "XMLTree.h" #include "timings.h" +#include +using std::cout; +using std::endl; +using std::min; +using std::string; // functions to convert tag positions to the corresponding tree node and viceversa. // These are implemented in order to be able to change the tree and Tags representations, @@ -53,7 +56,7 @@ static treeNode fast_first_child(bp *Par, treeNode x) static treeNode fast_next_sibling(bp* Par,treeNode x) { - x = fwd_excess(Par,x,0); + x = fast_find_close(Par,x)+1; return (fast_inspect(Par,x) == OP) ? x : NULLT; } @@ -107,6 +110,7 @@ XMLTree::XMLTree( pb * const par, uint npar, vector * const TN, TagIdM uint *empty_texts_bmp, TagType *tags, TextCollection * const TC, bool dis_tc) { + buffer = 0; // creates the data structure for the tree topology Par = (bp *)umalloc(sizeof(bp)); STARTTIMER(); @@ -248,12 +252,13 @@ void XMLTree::Save(int fd) // a pointer to the loaded data structure XMLTree *XMLTree::Load(int fd, bool load_tc,int sample_factor) { + FILE *fp; char buffer[1024]; XMLTree *XML_Tree; int i; - + buffer[1023] = '\0'; fp = fdopen(fd, "r"); @@ -267,23 +272,21 @@ XMLTree *XMLTree::Load(int fd, bool load_tc,int sample_factor) PRINTTIME("Loading parenthesis struct", Loading); STARTTIMER(); - XML_Tree->TagName = new vector(); - XML_Tree->tIdMap = new std::unordered_map(); - - string s; + XML_Tree->TagName = new std::vector(); + XML_Tree->tIdMap = new std::unordered_map(); + std::string s; int ntags; // Load the tag names ufread(&ntags, sizeof(int), 1, fp); for (i=0; iTagName->push_back(s); + XML_Tree->TagName->push_back(s); XML_Tree->tIdMap->insert(std::make_pair(s,i)); }; @@ -364,7 +367,7 @@ int XMLTree::SubtreeTags(treeNode x, TagType tag) int s = x + 2*subtree_size(Par, x) - 1; - return Tags->rank(tag, s) - Tags->rank(tag, node2tagpos(x)-1); + return (Tags->rank(tag, s) - Tags->rank(tag, node2tagpos(x)-1))+1; } int XMLTree::SubtreeElements(treeNode x) { @@ -536,22 +539,6 @@ treeNode XMLTree::NextElement(treeNode x) } else return x; } -value XMLTree::CamlFirstElement(value x) -{ - return Val_int(FirstElement(Int_val(x))); -} -value XMLTree::CamlNextElement(value x) -{ - return Val_int(NextElement(Int_val(x))); -} - -extern "C" value caml_cpp_fast_first_element(value xmltree, value node){ - return XMLTREE(xmltree)->CamlFirstElement(node); -} - -extern "C" value caml_cpp_fast_next_element(value xmltree, value node){ - return XMLTREE(xmltree)->CamlNextElement(node); -} // LastChild(x): returns the last child of node x. treeNode XMLTree::LastChild(treeNode x) @@ -941,18 +928,18 @@ bool XMLTree::IsOpen(treeNode x) { return fast_inspect(Par,x); } //WARNING this uses directly the underlying implementation for plain text -void XMLTree::Print(int fd,treeNode x){ + +void XMLTree::Print(int fd,treeNode x, bool no_text){ int newfd = dup(fd); stream = fdopen(newfd,"wa"); - /* if (stream_fd != fd){ - if (stream != NULL) - fclose(stream); - int newfd = dup(fd); - stream = fdopen(newfd,"wa"); - stream_fd = fd; - }; - */ + if (stream == 0){ + perror(NULL); + return; + }; + + if (buffer == 0) + buffer = new string(); FILE* fp = stream; treeNode fin = fast_find_close(Par,x); @@ -961,8 +948,8 @@ void XMLTree::Print(int fd,treeNode x){ uchar * tagstr; range r = DocIds(x); treeNode first_idx; - treeNode first_text = (tag == PCDATA_TAG_ID ? x : TaggedDescendant(x,PCDATA_TAG_ID)); - treeNode first_att = NULLT;//TaggedDesc(x,ATTRIBUTE_DATA_TAG_ID); + treeNode first_text = (tag == PCDATA_TAG_ID ? x : ParentNode(r.min-1)); + treeNode first_att = NULLT; if (first_att == NULLT) first_idx = first_text; @@ -974,52 +961,66 @@ void XMLTree::Print(int fd,treeNode x){ uchar * current_text=NULL; if (first_idx != NULLT) current_text = GetText(MyText(first_idx)); - int read = 0; - - std::stack st; + size_t read = 0; + std::vector st; while (n <= fin){ if (fast_inspect(Par,n)){ - if (tag == PCDATA_TAG_ID) { - // fputs((const char*) (GetText(MyTextUnsafe(n))),fp); - - read = fprintf(fp,"%s",(const char*) current_text); - current_text += (read + 1); - + if (tag == PCDATA_TAG_ID ) { + + if (no_text) + myfputs("<$/>",fp); + else{ + read = myfprintf((const char*) current_text, fp); + current_text += (read + 1); + }; n+=2; // skip closing $ tag = Tag(n); + } else { - - fputc('<',fp); + myfputc('<',fp); tagstr = (uchar*) GetTagNameByRef(tag); - fputs((const char*) tagstr ,fp); + myfputs((const char*) tagstr ,fp); n++; if (fast_inspect(Par,n)) { - st.push(tagstr); + st.push_back(tagstr); tag = Tag(n); if (tag == ATTRIBUTE_TAG_ID){ n++; + if (no_text) myfputs("><@@>",fp); while (fast_inspect(Par,n)){ - fputc(' ',fp); - fputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); - fputs("=\"",fp); - n++; - read = fprintf(fp,"%s",(const char*) current_text); - current_text += (read + 1); - //fputs((const char*) GetText(MyTextUnsafe(n)),fp); - fputc('"',fp); - n+=3; //close @$ @@ + if (no_text) { + myfputc('<',fp); + myfputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); + myfputc('>',fp); + myfputs("<$@/>',fp); + n+= 4; + } + else { + myfputc(' ',fp); + myfputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); + n++; + myfputs("=\"",fp); + read = myfprintf((const char*) current_text,fp); + current_text += (read + 1); + myfputc('"',fp); + n+=3; + } }; - fputc('>',fp); + if (no_text) + myfputs("",fp); + else myfputc('>',fp); n++; tag=Tag(n); } else { - fputc('>',fp); + myfputc('>',fp); }; } else {// tag - fputs("/>",fp); + myfputs("/>",fp); n++; tag=Tag(n); }; @@ -1027,15 +1028,16 @@ void XMLTree::Print(int fd,treeNode x){ } else do { - fputs("', fp); - st.pop(); + myfputs("', fp); + st.pop_back(); n++; }while (!fast_inspect(Par,n) && !st.empty()); tag=Tag(n); }; - fputc('\n',fp); - fflush(fp); + myfputc('\n',fp); + mybufferflush(fp); + //fflush(fp); fclose(fp); }