From 983bac1e5cce257dbc9cafa6e42370924f19af83 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 11 Mar 2009 23:37:43 +0000 Subject: [PATCH] enhanced timeXMLTree git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@253 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- timeXMLTree.cpp | 112 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 101 insertions(+), 11 deletions(-) diff --git a/timeXMLTree.cpp b/timeXMLTree.cpp index 68e7556..c3bd7f9 100644 --- a/timeXMLTree.cpp +++ b/timeXMLTree.cpp @@ -3,6 +3,7 @@ #include "Utils.h" #include #include +#include using std::cout; using std::string; @@ -191,29 +192,103 @@ unsigned int time_jump(XMLTree* tree, treeNode node,treeNode root){ } +int usage(char ** argv){ + + std::cout << "usage : " << argv[0] << " [-d] [-s] file.{xml,.srx} tagname\n"; + return 1; +} int main(int argc, char ** argv){ unsigned int count1,count2; - unsigned char * tagname = (unsigned char *) "keyword"; + unsigned char * tagname; + string arg,filename,ext; + bool disable_tc = false; + bool save = false; + bool srx; + XMLTree * tree; + + int i = 1; + if ( i >= argc) + return usage(argv); + + arg = argv[i]; + if (arg.compare("-d") == 0){ + disable_tc = true; + i++; + if ( i >= argc) + return usage(argv); + arg = argv[i]; + }; - if (argc != 2){ - std::cout << "Usage : " << argv[0] << " filename (without .srx)\n"; - return 1; + if (arg.compare("-s") == 0){ + save = true; + i++; + if ( i >= argc) + return usage(argv); + arg = argv[i]; }; - // The samplerate is not taken into account for loading anymore - XMLTree * tree = XMLTree::Load((unsigned char*) argv[1],64); + + // The filename + if (arg.size() < 4) + return usage(argv); + + ext=(arg.substr(arg.size()-4,4)); + if (ext.compare(".srx") == 0){ + // must truncate + filename = arg.substr(0,arg.size()-4); + + srx = true; + } + else if (ext.compare(".xml")==0) { + filename = arg; + srx = false; + } + else + return usage(argv); + i++; + + if (i >= argc) + return usage(argv); + + tagname = (unsigned char*) argv[i]; + + + if (srx) + // The samplerate is not taken into account for loading anymore + tree = XMLTree::Load((unsigned char*) filename.c_str(),64); + else { + try { + //filename, sampling factor, index empty texts, disable tc + XMLDocShredder shredder(filename.c_str(),64,false,disable_tc); + shredder.processStartDocument(""); + shredder.parse(); + shredder.processEndDocument(); + tree = (XMLTree *) shredder.storageIfc_->returnDocument(); + if (save){ + filename = filename.substr(0,filename.size()-4).append(".srx"); + struct stat stats; + int exists = stat(filename.c_str(),&stats); + if(exists == 0) { + std::cout << "Warning : indexed file " << filename << " exists, not overwriting\n"; + } + else { + tree->Save((unsigned char*) filename.substr(0,filename.size()-4).c_str()); + }; + + }; + } + catch (const std::exception& e){ + std::cout << "Error during parsing : " << e.what() << "\n"; + return 2; + }; + }; traversal(tree,tree->Root(),tagname); - STARTTIMER(); - count1 = time_traversal(tree,tree->Root()); - STOPTIMER(FullTraversal); - count2 = time_jump(tree,tree->Root(),tree->Root()); - STOPTIMER(JumpTraversal); PRINTSTATS(Tag); PRINTSTATS(FirstChild); @@ -230,6 +305,20 @@ int main(int argc, char ** argv){ PRINTSTATS(ParentNode); PRINTSTATS(PrevNode); std::cout << "\n"; + + if (target_tag == -1){ + std::cout << "Warning: tag " << tagname << " was not found in the document!\n" + << "Warning: not timing traversal and jumping functions\n"; + return 3; + }; + + STARTTIMER(); + count1 = time_traversal(tree,tree->Root()); + STOPTIMER(FullTraversal); + + count2 = time_jump(tree,tree->Root(),tree->Root()); + STOPTIMER(JumpTraversal); + std::cout << "Full traversal found " << count1 << " " << tagname << " nodes\n"; PRINTSTATS(FullTraversal); std::cout << "\n"; @@ -238,4 +327,5 @@ int main(int argc, char ** argv){ return 0; + } -- 2.17.1