X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=timeXMLTree.cpp;fp=timeXMLTree.cpp;h=0000000000000000000000000000000000000000;hb=4b52da1a20a4fe031930bb96d2ca46bec06dc529;hp=6e8bcec1188c952977b26ca21809b9b03fff329c;hpb=a223af3254fb51c279cfbccdc18c59484fdca74e;p=SXSI%2Fxpathcomp.git diff --git a/timeXMLTree.cpp b/timeXMLTree.cpp deleted file mode 100644 index 6e8bcec..0000000 --- a/timeXMLTree.cpp +++ /dev/null @@ -1,336 +0,0 @@ -#include "XMLDocShredder.h" -#include "XMLTree.h" -#include "Utils.h" -#include -#include -#include - -#define read32u() \ - (intern_src += 4, \ - ((uintnat)(intern_src[-4]) << 24) + (intern_src[-3] << 16) + \ - (intern_src[-2] << 8) + intern_src[-1]) - -using std::cout; -using std::string; -using std::left; -using std::right; - -static double tFirstChild = 0; -static double tNextSibling = 0; -static double tParent = 0; -static double tTaggedAncestor = 0; -static double tTaggedChild = 0; -static double tTaggedDesc = 0; -static double tTaggedFoll = 0; -static double tParentNode = 0; -static double tPrevNode = 0; -static double tTag = 0; -static double tMyText = 0; -static double tPrevText = 0; -static double tNextText = 0; -static double tDocIds = 0; - -static double tFullTraversal = 0; -static double tJumpTraversal = 0; - -static unsigned int cFirstChild = 0; -static unsigned int cNextSibling = 0; -static unsigned int cParent = 0; -static unsigned int cTaggedAncestor = 0; -static unsigned int cTaggedChild = 0; -static unsigned int cTaggedDesc = 0; -static unsigned int cTaggedFoll = 0; -static unsigned int cParentNode = 0; -static unsigned int cPrevNode = 0; -static unsigned int cTag = 0; -static unsigned int cMyText = 0; -static unsigned int cPrevText = 0; -static unsigned int cNextText = 0; -static unsigned int cDocIds = 0; - -static unsigned int cFullTraversal = 0; -static unsigned int cJumpTraversal = 0; - - -static struct timeval tmpv1; -static struct timeval tmpv2; - -static TagType target_tag = -1; - -#define STARTTIMER() (gettimeofday(&tmpv1,NULL)) -#define STOPTIMER(x) do { \ - gettimeofday(&tmpv2,NULL); \ - (t##x) = (t##x) + ((tmpv2.tv_sec - tmpv1.tv_sec) * 1000000.0 + \ - (tmpv2.tv_usec - tmpv1.tv_usec))/1000.0; \ - (c##x)= (c##x)+1; \ - } while (0) - -#define PRINTSTATS(x) do { \ - std::cout.width(15); \ - std::cout << std::left << #x; \ - std::cout << " : "; \ - std::cout.width(8); \ - std::cout << std::right << c##x << " calls, "; \ - std::cout.width(8); \ - std::cout << std::right << (t##x) \ - << " ms, mean: "; \ - std::cout.width(8); \ - std::cout << std::right \ - << (t##x) *1.00 / c##x \ - << "\n"; \ - } while (0) - -void traversal(XMLTree * tree, treeNode node,unsigned char* targettagname){ - treeNode res1,res2; - TagType tag; - DocID id1,id2,id3; - range rg; - const unsigned char * tagname; - if (node != NULLT){ - - STARTTIMER(); - tag = tree->Tag(node); - STOPTIMER(Tag); - if (target_tag == -1){ - tagname = tree->GetTagNameByRef(tag); - if (strcmp( (char*) tagname, (char*) targettagname) == 0) - target_tag = tag; - }; - STARTTIMER(); - res1 = tree->Parent(node); - STOPTIMER(Parent); - /* - STARTTIMER(); - res1 = tree->TaggedChild(node,0,tag); - STOPTIMER(TaggedChild); - - STARTTIMER(); - res1 = tree->TaggedAncestor(node,tag); - STOPTIMER(TaggedAncestor); - */ - STARTTIMER(); - res1 = tree->TaggedDesc(node,tag); - STOPTIMER(TaggedDesc); - - STARTTIMER(); - res1 = tree->TaggedFoll(node,tag); - STOPTIMER(TaggedFoll); - - STARTTIMER(); - rg = tree->DocIds(node); - STOPTIMER(DocIds); - - STARTTIMER(); - id1 = tree->MyText(node); - STOPTIMER(MyText); - - STARTTIMER(); - id2 = tree->PrevText(node); - STOPTIMER(PrevText); - - STARTTIMER(); - id3 = tree->NextText(node); - STOPTIMER(NextText); - - id1 = max(id1, max(id2,id3)); - - STARTTIMER(); - res1 = tree->ParentNode(id1); - STOPTIMER(ParentNode); - - STARTTIMER(); - res1 = tree->PrevNode(id1); - STOPTIMER(PrevNode); - - STARTTIMER(); - res1 = tree->FirstChild(node); - STOPTIMER(FirstChild); - - STARTTIMER(); - res2 = tree->NextSibling(node); - STOPTIMER(NextSibling); - - traversal(tree,res1,targettagname); - traversal(tree,res2,targettagname); - - }; - -} - -/* This simulates the run function of the automata */ - -unsigned int time_traversal(XMLTree *tree,treeNode node){ - TagType tag; - if (node != NULLT) { - cFullTraversal++; - tag = tree->Tag(node); - if (tag == target_tag) - return 1 + - time_traversal(tree,tree->FirstChild(node)) + - time_traversal(tree,tree->NextSibling(node)); - else - return time_traversal(tree,tree->FirstChild(node)) + - time_traversal(tree,tree->NextSibling(node)); - - } - else - return 0; -} - -/* This simulates the run function of the jumping automata*/ -unsigned int time_jump(XMLTree* tree, treeNode node,treeNode root){ - TagType tag; - if (node != NULLT) { - cJumpTraversal++; - tag = tree->Tag(node); - if (tag == target_tag) - return 1 + - time_jump(tree, tree->TaggedDesc(node,target_tag),node) + - time_jump(tree, tree->TaggedFollBelow(node,target_tag,root), root); - - else - return time_jump(tree, tree->TaggedDesc(node,target_tag),node) + - time_jump(tree, tree->TaggedFollBelow(node,target_tag,root), root); - } - else - return 0; -} - - -int usage(char ** argv){ - - std::cout << "usage : " << argv[0] << " [-d] [-s] file.{xml,.srx} tagname\n"; - return 1; - -} - - -int main(int argc, char ** argv){ - unsigned int count1,count2; - unsigned char * tagname; - string arg,filename,ext; - bool disable_tc = false; - bool save = false; - bool srx; - XMLTree * tree; - - int i = 1; - if ( i >= argc) - return usage(argv); - - arg = argv[i]; - if (arg.compare("-d") == 0){ - disable_tc = true; - i++; - if ( i >= argc) - return usage(argv); - arg = argv[i]; - }; - - if (arg.compare("-s") == 0){ - save = true; - i++; - if ( i >= argc) - return usage(argv); - arg = argv[i]; - }; - - - // The filename - if (arg.size() < 4) - return usage(argv); - - ext=(arg.substr(arg.size()-4,4)); - if (ext.compare(".srx") == 0){ - // must truncate - filename = arg.substr(0,arg.size()-4); - - srx = true; - } - else if (ext.compare(".xml")==0) { - filename = arg; - srx = false; - } - else - return usage(argv); - i++; - - if (i >= argc) - return usage(argv); - - tagname = (unsigned char*) argv[i]; - - - - if (srx) - // The samplerate is not taken into account for loading anymore - tree = XMLTree::Load((unsigned char*) filename.c_str(),64); - else { - try { - //filename, sampling factor, index empty texts, disable tc - XMLDocShredder shredder(filename.c_str(),64,false,disable_tc); - shredder.processStartDocument(""); - shredder.parse(); - shredder.processEndDocument(); - tree = (XMLTree *) shredder.storageIfc_->returnDocument(); - if (save){ - filename = filename.substr(0,filename.size()-4).append(".srx"); - struct stat stats; - int exists = stat(filename.c_str(),&stats); - if(exists == 0) { - std::cout << "Warning : indexed file " << filename << " exists, not overwriting\n"; - } - else { - tree->Save((unsigned char*) filename.substr(0,filename.size()-4).c_str()); - }; - - }; - } - catch (const std::exception& e){ - std::cout << "Error during parsing : " << e.what() << "\n"; - return 2; - }; - }; - traversal(tree,tree->Root(),tagname); - - - - PRINTSTATS(Tag); - PRINTSTATS(FirstChild); - PRINTSTATS(NextSibling); - PRINTSTATS(Parent); - PRINTSTATS(TaggedAncestor); - PRINTSTATS(TaggedChild); - PRINTSTATS(DocIds); - PRINTSTATS(TaggedDesc); - PRINTSTATS(TaggedFoll); - PRINTSTATS(PrevText); - PRINTSTATS(MyText); - PRINTSTATS(NextText); - PRINTSTATS(ParentNode); - PRINTSTATS(PrevNode); - std::cout << "\n"; - - if (target_tag == -1){ - std::cout << "Warning: tag " << tagname << " was not found in the document!\n" - << "Warning: not timing traversal and jumping functions\n"; - return 3; - }; - - STARTTIMER(); - count1 = time_traversal(tree,tree->Root()); - STOPTIMER(FullTraversal); - - count2 = time_jump(tree,tree->Root(),tree->Root()); - STOPTIMER(JumpTraversal); - - std::cout << "Full traversal found " << count1 << " " << tagname << " nodes\n"; - PRINTSTATS(FullTraversal); - std::cout << "\n"; - std::cout << "Jump traversal found " << count2 << " " << tagname << " nodes\n"; - PRINTSTATS(JumpTraversal); - - - return 0; - -}