X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=timeXMLTree.cpp;h=6e8bcec1188c952977b26ca21809b9b03fff329c;hb=df5fdb22632be887ecd9f5c46a014e7e970148a2;hp=ed721cde76537aa0af0e4d40ec7f3088421bdd4b;hpb=a7dc14cd894ab8ecc34a3f47db0150232950e5b7;p=SXSI%2Fxpathcomp.git diff --git a/timeXMLTree.cpp b/timeXMLTree.cpp index ed721cd..6e8bcec 100644 --- a/timeXMLTree.cpp +++ b/timeXMLTree.cpp @@ -3,29 +3,35 @@ #include "Utils.h" #include #include +#include + +#define read32u() \ + (intern_src += 4, \ + ((uintnat)(intern_src[-4]) << 24) + (intern_src[-3] << 16) + \ + (intern_src[-2] << 8) + intern_src[-1]) using std::cout; using std::string; using std::left; using std::right; -static clock_t tFirstChild = 0; -static clock_t tNextSibling = 0; -static clock_t tParent = 0; -static clock_t tTaggedAncestor = 0; -static clock_t tTaggedChild = 0; -static clock_t tTaggedDesc = 0; -static clock_t tTaggedFoll = 0; -static clock_t tParentNode = 0; -static clock_t tPrevNode = 0; -static clock_t tTag = 0; -static clock_t tMyText = 0; -static clock_t tPrevText = 0; -static clock_t tNextText = 0; -static clock_t tDocIds = 0; - -static clock_t tFullTraversal = 0; -static clock_t tJumpTraversal = 0; +static double tFirstChild = 0; +static double tNextSibling = 0; +static double tParent = 0; +static double tTaggedAncestor = 0; +static double tTaggedChild = 0; +static double tTaggedDesc = 0; +static double tTaggedFoll = 0; +static double tParentNode = 0; +static double tPrevNode = 0; +static double tTag = 0; +static double tMyText = 0; +static double tPrevText = 0; +static double tNextText = 0; +static double tDocIds = 0; + +static double tFullTraversal = 0; +static double tJumpTraversal = 0; static unsigned int cFirstChild = 0; static unsigned int cNextSibling = 0; @@ -46,31 +52,34 @@ static unsigned int cFullTraversal = 0; static unsigned int cJumpTraversal = 0; - -static clock_t tmp; +static struct timeval tmpv1; +static struct timeval tmpv2; static TagType target_tag = -1; -#define STARTTIMER() (tmp= clock()) -#define STOPTIMER(x) do { (t##x) = (t##x) + (clock() - tmp); (c##x)= (c##x)+1; } while (0) +#define STARTTIMER() (gettimeofday(&tmpv1,NULL)) +#define STOPTIMER(x) do { \ + gettimeofday(&tmpv2,NULL); \ + (t##x) = (t##x) + ((tmpv2.tv_sec - tmpv1.tv_sec) * 1000000.0 + \ + (tmpv2.tv_usec - tmpv1.tv_usec))/1000.0; \ + (c##x)= (c##x)+1; \ + } while (0) + #define PRINTSTATS(x) do { \ std::cout.width(15); \ std::cout << std::left << #x; \ std::cout << " : "; \ std::cout.width(8); \ - std::cout << std::right << c##x << " calls,"; \ + std::cout << std::right << c##x << " calls, "; \ std::cout.width(8); \ - std::cout << std::right << t##x << " cycles, total:"; \ - std::cout.width(5); \ - std::cout << std::right << ((t##x) *1000.00) /CLOCKS_PER_SEC \ + std::cout << std::right << (t##x) \ << " ms, mean: "; \ - std::cout.width(5); \ + std::cout.width(8); \ std::cout << std::right \ - << (((t##x)* 1000.00) /CLOCKS_PER_SEC) / c##x \ + << (t##x) *1.00 / c##x \ << "\n"; \ } while (0) - void traversal(XMLTree * tree, treeNode node,unsigned char* targettagname){ treeNode res1,res2; TagType tag; @@ -175,7 +184,6 @@ unsigned int time_jump(XMLTree* tree, treeNode node,treeNode root){ cJumpTraversal++; tag = tree->Tag(node); if (tag == target_tag) - return 1 + time_jump(tree, tree->TaggedDesc(node,target_tag),node) + time_jump(tree, tree->TaggedFollBelow(node,target_tag,root), root); @@ -189,29 +197,103 @@ unsigned int time_jump(XMLTree* tree, treeNode node,treeNode root){ } +int usage(char ** argv){ + + std::cout << "usage : " << argv[0] << " [-d] [-s] file.{xml,.srx} tagname\n"; + return 1; +} int main(int argc, char ** argv){ unsigned int count1,count2; - unsigned char * tagname = (unsigned char *) "keyword"; + unsigned char * tagname; + string arg,filename,ext; + bool disable_tc = false; + bool save = false; + bool srx; + XMLTree * tree; + + int i = 1; + if ( i >= argc) + return usage(argv); + + arg = argv[i]; + if (arg.compare("-d") == 0){ + disable_tc = true; + i++; + if ( i >= argc) + return usage(argv); + arg = argv[i]; + }; - if (argc != 2){ - std::cout << "Usage : " << argv[0] << " filename (without .srx)\n"; - return 1; + if (arg.compare("-s") == 0){ + save = true; + i++; + if ( i >= argc) + return usage(argv); + arg = argv[i]; }; - // The samplerate is not taken into account for loading anymore - XMLTree * tree = XMLTree::Load((unsigned char*) argv[1],64); + + // The filename + if (arg.size() < 4) + return usage(argv); + + ext=(arg.substr(arg.size()-4,4)); + if (ext.compare(".srx") == 0){ + // must truncate + filename = arg.substr(0,arg.size()-4); + + srx = true; + } + else if (ext.compare(".xml")==0) { + filename = arg; + srx = false; + } + else + return usage(argv); + i++; + + if (i >= argc) + return usage(argv); + + tagname = (unsigned char*) argv[i]; + + + if (srx) + // The samplerate is not taken into account for loading anymore + tree = XMLTree::Load((unsigned char*) filename.c_str(),64); + else { + try { + //filename, sampling factor, index empty texts, disable tc + XMLDocShredder shredder(filename.c_str(),64,false,disable_tc); + shredder.processStartDocument(""); + shredder.parse(); + shredder.processEndDocument(); + tree = (XMLTree *) shredder.storageIfc_->returnDocument(); + if (save){ + filename = filename.substr(0,filename.size()-4).append(".srx"); + struct stat stats; + int exists = stat(filename.c_str(),&stats); + if(exists == 0) { + std::cout << "Warning : indexed file " << filename << " exists, not overwriting\n"; + } + else { + tree->Save((unsigned char*) filename.substr(0,filename.size()-4).c_str()); + }; + + }; + } + catch (const std::exception& e){ + std::cout << "Error during parsing : " << e.what() << "\n"; + return 2; + }; + }; traversal(tree,tree->Root(),tagname); - STARTTIMER(); - count1 = time_traversal(tree,tree->Root()); - STOPTIMER(FullTraversal); - count2 = time_jump(tree,tree->Root(),tree->Root()); - STOPTIMER(JumpTraversal); PRINTSTATS(Tag); PRINTSTATS(FirstChild); @@ -228,6 +310,20 @@ int main(int argc, char ** argv){ PRINTSTATS(ParentNode); PRINTSTATS(PrevNode); std::cout << "\n"; + + if (target_tag == -1){ + std::cout << "Warning: tag " << tagname << " was not found in the document!\n" + << "Warning: not timing traversal and jumping functions\n"; + return 3; + }; + + STARTTIMER(); + count1 = time_traversal(tree,tree->Root()); + STOPTIMER(FullTraversal); + + count2 = time_jump(tree,tree->Root(),tree->Root()); + STOPTIMER(JumpTraversal); + std::cout << "Full traversal found " << count1 << " " << tagname << " nodes\n"; PRINTSTATS(FullTraversal); std::cout << "\n"; @@ -236,4 +332,5 @@ int main(int argc, char ** argv){ return 0; + }