X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=timeXMLTree.cpp;h=6e8bcec1188c952977b26ca21809b9b03fff329c;hb=df5fdb22632be887ecd9f5c46a014e7e970148a2;hp=883e119a28f8b9835a966c9b60207273e7bc4a5e;hpb=0cd1eaf978d6941cf063a375ddeebd1759ebcdea;p=SXSI%2Fxpathcomp.git diff --git a/timeXMLTree.cpp b/timeXMLTree.cpp index 883e119..6e8bcec 100644 --- a/timeXMLTree.cpp +++ b/timeXMLTree.cpp @@ -3,6 +3,12 @@ #include "Utils.h" #include #include +#include + +#define read32u() \ + (intern_src += 4, \ + ((uintnat)(intern_src[-4]) << 24) + (intern_src[-3] << 16) + \ + (intern_src[-2] << 8) + intern_src[-1]) using std::cout; using std::string; @@ -191,29 +197,103 @@ unsigned int time_jump(XMLTree* tree, treeNode node,treeNode root){ } +int usage(char ** argv){ + + std::cout << "usage : " << argv[0] << " [-d] [-s] file.{xml,.srx} tagname\n"; + return 1; +} int main(int argc, char ** argv){ unsigned int count1,count2; - unsigned char * tagname = (unsigned char *) "keyword"; + unsigned char * tagname; + string arg,filename,ext; + bool disable_tc = false; + bool save = false; + bool srx; + XMLTree * tree; + + int i = 1; + if ( i >= argc) + return usage(argv); + + arg = argv[i]; + if (arg.compare("-d") == 0){ + disable_tc = true; + i++; + if ( i >= argc) + return usage(argv); + arg = argv[i]; + }; - if (argc != 2){ - std::cout << "Usage : " << argv[0] << " filename (without .srx)\n"; - return 1; + if (arg.compare("-s") == 0){ + save = true; + i++; + if ( i >= argc) + return usage(argv); + arg = argv[i]; }; - // The samplerate is not taken into account for loading anymore - XMLTree * tree = XMLTree::Load((unsigned char*) argv[1],64); + + // The filename + if (arg.size() < 4) + return usage(argv); + + ext=(arg.substr(arg.size()-4,4)); + if (ext.compare(".srx") == 0){ + // must truncate + filename = arg.substr(0,arg.size()-4); + + srx = true; + } + else if (ext.compare(".xml")==0) { + filename = arg; + srx = false; + } + else + return usage(argv); + i++; + + if (i >= argc) + return usage(argv); + + tagname = (unsigned char*) argv[i]; + + + if (srx) + // The samplerate is not taken into account for loading anymore + tree = XMLTree::Load((unsigned char*) filename.c_str(),64); + else { + try { + //filename, sampling factor, index empty texts, disable tc + XMLDocShredder shredder(filename.c_str(),64,false,disable_tc); + shredder.processStartDocument(""); + shredder.parse(); + shredder.processEndDocument(); + tree = (XMLTree *) shredder.storageIfc_->returnDocument(); + if (save){ + filename = filename.substr(0,filename.size()-4).append(".srx"); + struct stat stats; + int exists = stat(filename.c_str(),&stats); + if(exists == 0) { + std::cout << "Warning : indexed file " << filename << " exists, not overwriting\n"; + } + else { + tree->Save((unsigned char*) filename.substr(0,filename.size()-4).c_str()); + }; + + }; + } + catch (const std::exception& e){ + std::cout << "Error during parsing : " << e.what() << "\n"; + return 2; + }; + }; traversal(tree,tree->Root(),tagname); - STARTTIMER(); - count1 = time_traversal(tree,tree->Root()); - STOPTIMER(FullTraversal); - count2 = time_jump(tree,tree->Root(),tree->Root()); - STOPTIMER(JumpTraversal); PRINTSTATS(Tag); PRINTSTATS(FirstChild); @@ -230,6 +310,20 @@ int main(int argc, char ** argv){ PRINTSTATS(ParentNode); PRINTSTATS(PrevNode); std::cout << "\n"; + + if (target_tag == -1){ + std::cout << "Warning: tag " << tagname << " was not found in the document!\n" + << "Warning: not timing traversal and jumping functions\n"; + return 3; + }; + + STARTTIMER(); + count1 = time_traversal(tree,tree->Root()); + STOPTIMER(FullTraversal); + + count2 = time_jump(tree,tree->Root(),tree->Root()); + STOPTIMER(JumpTraversal); + std::cout << "Full traversal found " << count1 << " " << tagname << " nodes\n"; PRINTSTATS(FullTraversal); std::cout << "\n"; @@ -238,4 +332,5 @@ int main(int argc, char ** argv){ return 0; + }