From: kim Date: Wed, 28 Apr 2010 06:40:29 +0000 (+0000) Subject: Fix the printing some more X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2FXMLTree.git;a=commitdiff_plain;h=9c696a1b1a7034794b3768e4e1e40db86e87ebbb Fix the printing some more git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/XMLTree@799 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/XMLTree.cpp b/XMLTree.cpp index 9be500e..db9814c 100644 --- a/XMLTree.cpp +++ b/XMLTree.cpp @@ -3,7 +3,7 @@ #include #include "XMLTree.h" #include "timings.h" - +#include // functions to convert tag positions to the corresponding tree node and viceversa. // These are implemented in order to be able to change the tree and Tags representations, // without affecting the code so much. @@ -53,7 +53,7 @@ static treeNode fast_first_child(bp *Par, treeNode x) static treeNode fast_next_sibling(bp* Par,treeNode x) { - x = fwd_excess(Par,x,0); + x = fast_find_close(Par,x)+1; return (fast_inspect(Par,x) == OP) ? x : NULLT; } @@ -944,11 +944,15 @@ bool XMLTree::IsOpen(treeNode x) { return fast_inspect(Par,x); } //WARNING this uses directly the underlying implementation for plain text - void XMLTree::Print(int fd,treeNode x, bool no_text){ int newfd = dup(fd); stream = fdopen(newfd,"wa"); + if (stream == 0){ + perror(NULL); + return; + }; + if (buffer == 0) buffer = new string(); @@ -959,8 +963,8 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ uchar * tagstr; range r = DocIds(x); treeNode first_idx; - treeNode first_text = (tag == PCDATA_TAG_ID ? x : TaggedDescendant(x,PCDATA_TAG_ID)); - treeNode first_att = NULLT;//TaggedDesc(x,ATTRIBUTE_DATA_TAG_ID); + treeNode first_text = (tag == PCDATA_TAG_ID ? x : ParentNode(r.min-1)); + treeNode first_att = NULLT; if (first_att == NULLT) first_idx = first_text; @@ -972,18 +976,18 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ uchar * current_text=NULL; if (first_idx != NULLT) current_text = GetText(MyText(first_idx)); - int read = 0; + size_t read = 0; std::vector st; while (n <= fin){ if (fast_inspect(Par,n)){ if (tag == PCDATA_TAG_ID ) { - // myfputs((const char*) (GetText(MyTextUnsafe(n))),fp); + if (no_text) myfputs("<$/>",fp); else{ - read = fprintf(fp,"%s",(const char*) current_text); - current_text += (read + 1); - } + read = myfprintf((const char*) current_text, fp); + current_text += (read + 1); + }; n+=2; // skip closing $ tag = Tag(n); @@ -1002,11 +1006,10 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ while (fast_inspect(Par,n)){ if (no_text) { myfputc('<',fp); - const uchar * tmp = &(GetTagNameByRef(Tag(n))[3]); - myfputs((const char*) tmp,fp); + myfputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); myfputc('>',fp); myfputs("<$@/>',fp); n+= 4; } @@ -1015,9 +1018,8 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ myfputs((const char*) &(GetTagNameByRef(Tag(n))[3]),fp); n++; myfputs("=\"",fp); - read = fprintf(fp,"%s",(const char*) current_text); + read = myfprintf((const char*) current_text,fp); current_text += (read + 1); - //myfputs((const char*) GetText(MyTextUnsafe(n)),fp); myfputc('"',fp); n+=3; } @@ -1049,8 +1051,8 @@ void XMLTree::Print(int fd,treeNode x, bool no_text){ }while (!fast_inspect(Par,n) && !st.empty()); tag=Tag(n); }; - //myfputc('\n',fp); + myfputc('\n',fp); mybufferflush(fp); - fflush(fp); + //fflush(fp); fclose(fp); } diff --git a/XMLTree.h b/XMLTree.h index d0a5c73..ea7b778 100644 --- a/XMLTree.h +++ b/XMLTree.h @@ -162,6 +162,21 @@ class XMLTree { fputs(buffer->c_str(), fp); buffer->clear(); } + + size_t myfprintf(const char* s, FILE * fp){ + if (s == NULL) + return 0; + size_t i = buffer->size(); + buffer->append(s); + size_t j = buffer->size(); + if (buffer->size() >= 100000){ + fputs(buffer->c_str(),fp); + buffer->clear(); + }; + return (j-i); + } + + void PrintNode(treeNode n, int fd); /** Data structure constructors */ XMLTree(){ buffer = 0;}; @@ -458,7 +473,7 @@ public: uchar* GetText(DocID d) { uchar * s = Text->GetText(d); - return (s[0] == 1 ? (uchar*)"" : s); + return (s[0] == 1 ? (s+1) : s); } /** GetText(i, j): returns the texts corresponding to documents with diff --git a/makefile b/makefile index 2ba3549..4eae048 100644 --- a/makefile +++ b/makefile @@ -1,4 +1,4 @@ -FLAGS =-std=c++0x -O9 -I./libcds/includes/ -I. -fno-PIC +FLAGS =-std=c++0x -O3 -I./libcds/includes/ -I. -fno-PIC LIBCDS_A=libcds/lib/libcds.a OBJECTS_TCO= TextCollection/TextCollection.o TextCollection/TextCollectionBuilder.o TextCollection/TCImplementation.o TextCollection/Tools.o TextCollection/BitRank.o TextCollection/TextStorage.o