X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=timeXMLTree.cpp;h=6e8bcec1188c952977b26ca21809b9b03fff329c;hb=df5fdb22632be887ecd9f5c46a014e7e970148a2;hp=aef9731343747db92720dbb381f64a2c2a189de1;hpb=92455238a637876bec18bfdaed4f5342f4cbbd1f;p=SXSI%2Fxpathcomp.git diff --git a/timeXMLTree.cpp b/timeXMLTree.cpp index aef9731..6e8bcec 100644 --- a/timeXMLTree.cpp +++ b/timeXMLTree.cpp @@ -3,27 +3,41 @@ #include "Utils.h" #include #include +#include + +#define read32u() \ + (intern_src += 4, \ + ((uintnat)(intern_src[-4]) << 24) + (intern_src[-3] << 16) + \ + (intern_src[-2] << 8) + intern_src[-1]) using std::cout; using std::string; using std::left; using std::right; -static clock_t tFirstChild = 0; -static clock_t tNextSibling = 0; -static clock_t tTaggedDesc = 0; -static clock_t tTaggedFoll = 0; -static clock_t tParentNode = 0; -static clock_t tPrevNode = 0; -static clock_t tTag = 0; -static clock_t tMyText = 0; -static clock_t tPrevText = 0; -static clock_t tNextText = 0; -static clock_t tFullTraversal = 0; -static clock_t tJumpTraversal = 0; +static double tFirstChild = 0; +static double tNextSibling = 0; +static double tParent = 0; +static double tTaggedAncestor = 0; +static double tTaggedChild = 0; +static double tTaggedDesc = 0; +static double tTaggedFoll = 0; +static double tParentNode = 0; +static double tPrevNode = 0; +static double tTag = 0; +static double tMyText = 0; +static double tPrevText = 0; +static double tNextText = 0; +static double tDocIds = 0; + +static double tFullTraversal = 0; +static double tJumpTraversal = 0; static unsigned int cFirstChild = 0; static unsigned int cNextSibling = 0; +static unsigned int cParent = 0; +static unsigned int cTaggedAncestor = 0; +static unsigned int cTaggedChild = 0; static unsigned int cTaggedDesc = 0; static unsigned int cTaggedFoll = 0; static unsigned int cParentNode = 0; @@ -32,39 +46,48 @@ static unsigned int cTag = 0; static unsigned int cMyText = 0; static unsigned int cPrevText = 0; static unsigned int cNextText = 0; +static unsigned int cDocIds = 0; + static unsigned int cFullTraversal = 0; static unsigned int cJumpTraversal = 0; -static clock_t tmp; + +static struct timeval tmpv1; +static struct timeval tmpv2; static TagType target_tag = -1; -#define STARTTIMER() (tmp= clock()) -#define STOPTIMER(x) do { (t##x) = (t##x) + (clock() - tmp); (c##x)= (c##x)+1; } while (0) +#define STARTTIMER() (gettimeofday(&tmpv1,NULL)) +#define STOPTIMER(x) do { \ + gettimeofday(&tmpv2,NULL); \ + (t##x) = (t##x) + ((tmpv2.tv_sec - tmpv1.tv_sec) * 1000000.0 + \ + (tmpv2.tv_usec - tmpv1.tv_usec))/1000.0; \ + (c##x)= (c##x)+1; \ + } while (0) + #define PRINTSTATS(x) do { \ - std::cout.width(11); \ + std::cout.width(15); \ std::cout << std::left << #x; \ std::cout << " : "; \ std::cout.width(8); \ - std::cout << std::right << c##x << " calls,"; \ + std::cout << std::right << c##x << " calls, "; \ std::cout.width(8); \ - std::cout << std::right << t##x << " cycles, total:"; \ - std::cout.width(5); \ - std::cout << std::right << ((t##x) *1000.00) /CLOCKS_PER_SEC \ + std::cout << std::right << (t##x) \ << " ms, mean: "; \ - std::cout.width(5); \ + std::cout.width(8); \ std::cout << std::right \ - << (((t##x)* 1000.00) /CLOCKS_PER_SEC) / c##x \ + << (t##x) *1.00 / c##x \ << "\n"; \ } while (0) - void traversal(XMLTree * tree, treeNode node,unsigned char* targettagname){ treeNode res1,res2; TagType tag; DocID id1,id2,id3; + range rg; const unsigned char * tagname; if (node != NULLT){ + STARTTIMER(); tag = tree->Tag(node); STOPTIMER(Tag); @@ -74,12 +97,28 @@ void traversal(XMLTree * tree, treeNode node,unsigned char* targettagname){ target_tag = tag; }; STARTTIMER(); + res1 = tree->Parent(node); + STOPTIMER(Parent); + /* + STARTTIMER(); + res1 = tree->TaggedChild(node,0,tag); + STOPTIMER(TaggedChild); + + STARTTIMER(); + res1 = tree->TaggedAncestor(node,tag); + STOPTIMER(TaggedAncestor); + */ + STARTTIMER(); res1 = tree->TaggedDesc(node,tag); STOPTIMER(TaggedDesc); STARTTIMER(); res1 = tree->TaggedFoll(node,tag); STOPTIMER(TaggedFoll); + + STARTTIMER(); + rg = tree->DocIds(node); + STOPTIMER(DocIds); STARTTIMER(); id1 = tree->MyText(node); @@ -110,6 +149,7 @@ void traversal(XMLTree * tree, treeNode node,unsigned char* targettagname){ STARTTIMER(); res2 = tree->NextSibling(node); STOPTIMER(NextSibling); + traversal(tree,res1,targettagname); traversal(tree,res2,targettagname); @@ -117,70 +157,151 @@ void traversal(XMLTree * tree, treeNode node,unsigned char* targettagname){ } -unsigned int time_traversal(XMLTree *tree,treeNode node,unsigned int count){ +/* This simulates the run function of the automata */ + +unsigned int time_traversal(XMLTree *tree,treeNode node){ TagType tag; if (node != NULLT) { cFullTraversal++; tag = tree->Tag(node); - if (tag == target_tag) - count = count + 1; - return time_traversal(tree,tree->NextSibling(node), - time_traversal(tree,tree->FirstChild(node),count)); + if (tag == target_tag) + return 1 + + time_traversal(tree,tree->FirstChild(node)) + + time_traversal(tree,tree->NextSibling(node)); + else + return time_traversal(tree,tree->FirstChild(node)) + + time_traversal(tree,tree->NextSibling(node)); } else - return count; + return 0; } - -unsigned int time_jump(XMLTree* tree, treeNode node,unsigned int count,treeNode root){ +/* This simulates the run function of the jumping automata*/ +unsigned int time_jump(XMLTree* tree, treeNode node,treeNode root){ TagType tag; if (node != NULLT) { cJumpTraversal++; tag = tree->Tag(node); if (tag == target_tag) - count = count + 1; - return time_jump(tree, - tree->TaggedFollBelow(node,target_tag,root), - time_jump(tree, - tree->TaggedDesc(node,target_tag), - count, - node), - root); - + return 1 + + time_jump(tree, tree->TaggedDesc(node,target_tag),node) + + time_jump(tree, tree->TaggedFollBelow(node,target_tag,root), root); + + else + return time_jump(tree, tree->TaggedDesc(node,target_tag),node) + + time_jump(tree, tree->TaggedFollBelow(node,target_tag,root), root); } else - return count; + return 0; } +int usage(char ** argv){ + + std::cout << "usage : " << argv[0] << " [-d] [-s] file.{xml,.srx} tagname\n"; + return 1; +} int main(int argc, char ** argv){ unsigned int count1,count2; - unsigned char * tagname = (unsigned char *) "keyword"; + unsigned char * tagname; + string arg,filename,ext; + bool disable_tc = false; + bool save = false; + bool srx; + XMLTree * tree; + + int i = 1; + if ( i >= argc) + return usage(argv); + + arg = argv[i]; + if (arg.compare("-d") == 0){ + disable_tc = true; + i++; + if ( i >= argc) + return usage(argv); + arg = argv[i]; + }; - if (argc != 2){ - std::cout << "Usage : " << argv[0] << " filename (without .srx)\n"; - return 1; + if (arg.compare("-s") == 0){ + save = true; + i++; + if ( i >= argc) + return usage(argv); + arg = argv[i]; }; - // The samplerate is not taken into account for loading anymore - XMLTree * tree = XMLTree::Load((unsigned char*) argv[1],64); + + // The filename + if (arg.size() < 4) + return usage(argv); - traversal(tree,tree->Root(),tagname); + ext=(arg.substr(arg.size()-4,4)); + if (ext.compare(".srx") == 0){ + // must truncate + filename = arg.substr(0,arg.size()-4); - STARTTIMER(); - count1 = time_traversal(tree,tree->Root(),0); - STOPTIMER(FullTraversal); + srx = true; + } + else if (ext.compare(".xml")==0) { + filename = arg; + srx = false; + } + else + return usage(argv); + i++; + + if (i >= argc) + return usage(argv); + + tagname = (unsigned char*) argv[i]; + - count2 = time_jump(tree,tree->Root(),0,tree->Root()); - STOPTIMER(JumpTraversal); + if (srx) + // The samplerate is not taken into account for loading anymore + tree = XMLTree::Load((unsigned char*) filename.c_str(),64); + else { + try { + //filename, sampling factor, index empty texts, disable tc + XMLDocShredder shredder(filename.c_str(),64,false,disable_tc); + shredder.processStartDocument(""); + shredder.parse(); + shredder.processEndDocument(); + tree = (XMLTree *) shredder.storageIfc_->returnDocument(); + if (save){ + filename = filename.substr(0,filename.size()-4).append(".srx"); + struct stat stats; + int exists = stat(filename.c_str(),&stats); + if(exists == 0) { + std::cout << "Warning : indexed file " << filename << " exists, not overwriting\n"; + } + else { + tree->Save((unsigned char*) filename.substr(0,filename.size()-4).c_str()); + }; + + }; + } + catch (const std::exception& e){ + std::cout << "Error during parsing : " << e.what() << "\n"; + return 2; + }; + }; + traversal(tree,tree->Root(),tagname); + + + + PRINTSTATS(Tag); PRINTSTATS(FirstChild); PRINTSTATS(NextSibling); - PRINTSTATS(Tag); + PRINTSTATS(Parent); + PRINTSTATS(TaggedAncestor); + PRINTSTATS(TaggedChild); + PRINTSTATS(DocIds); PRINTSTATS(TaggedDesc); PRINTSTATS(TaggedFoll); PRINTSTATS(PrevText); @@ -189,6 +310,20 @@ int main(int argc, char ** argv){ PRINTSTATS(ParentNode); PRINTSTATS(PrevNode); std::cout << "\n"; + + if (target_tag == -1){ + std::cout << "Warning: tag " << tagname << " was not found in the document!\n" + << "Warning: not timing traversal and jumping functions\n"; + return 3; + }; + + STARTTIMER(); + count1 = time_traversal(tree,tree->Root()); + STOPTIMER(FullTraversal); + + count2 = time_jump(tree,tree->Root(),tree->Root()); + STOPTIMER(JumpTraversal); + std::cout << "Full traversal found " << count1 << " " << tagname << " nodes\n"; PRINTSTATS(FullTraversal); std::cout << "\n"; @@ -197,4 +332,5 @@ int main(int argc, char ** argv){ return 0; + }