X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=myTimeXMLTree.cpp;fp=myTimeXMLTree.cpp;h=0000000000000000000000000000000000000000;hb=4b52da1a20a4fe031930bb96d2ca46bec06dc529;hp=25bc5aa2d0844723d1faf41793835028d49fb8b7;hpb=a223af3254fb51c279cfbccdc18c59484fdca74e;p=SXSI%2Fxpathcomp.git diff --git a/myTimeXMLTree.cpp b/myTimeXMLTree.cpp deleted file mode 100644 index 25bc5aa..0000000 --- a/myTimeXMLTree.cpp +++ /dev/null @@ -1,336 +0,0 @@ -#include "XMLDocShredder.h" -#include "XMLTree.h" -#include "Utils.h" -#include -#include -#include -#include -#include - -using namespace std; - -/* Time meassuring */ -double ticks= (double)sysconf(_SC_CLK_TCK); -struct tms t1,t2; - -void start_clock() { - times (&t1); -} - - -double stop_clock() { - times (&t2); - return (t2.tms_utime-t1.tms_utime)/ticks; -} - - -/* end Time meassuring */ - -void printStats(double time, string fname, uint queries) { - cout.width(15); - cout << std::left << fname; - cout << " : "; - cout.width(8); - cout << std::right << queries << " calls, "; - cout.width(8); - cout << std::right << time << "ms, mean: "; - cout.width(8); - cout << std::right << time/queries << endl; -} - - -#define STATS1(fname,vect) {\ - start_clock(); \ - uint q = 0; \ - while(qfname((vect)[q]); \ - q++; \ - } \ - double t = 1000.0*stop_clock(); \ - printStats(t,#fname,(vect).size()); \ - } - -#define STATS1p(fname,vect) {\ - start_clock(); \ - uint q = 0; \ - while(qfname((vect)[q]).min; \ - q++; \ - } \ - double t = 1000.0*stop_clock(); \ - printStats(t,#fname,(vect).size()); \ - } - -#define STATS2(fname,vect) {\ - start_clock(); \ - uint q = 0; \ - while(qfname((vect)[q].first,(vect)[q].second); \ - q++; \ - } \ - double t = 1000.0*stop_clock(); \ - printStats(t,#fname,(vect).size()); \ - } - - TagType target_tag = -1; -vector treenodeQueries; -vector > treenodetagQueries; -vector docidQueries; - -uint acc = 0; - -void runQueries(XMLTree * tree) { - STATS1(Tag,treenodeQueries); - STATS1(Parent,treenodeQueries); - STATS1p(DocIds,treenodeQueries); - STATS1(MyText,treenodeQueries); - STATS1(PrevText,treenodeQueries); - STATS1(NextText,treenodeQueries); - STATS1(FirstChild,treenodeQueries); - STATS1(NextSibling,treenodeQueries); - STATS1(ParentNode,docidQueries); - STATS1(PrevNode,treenodeQueries); - STATS2(TaggedDesc,treenodetagQueries); - STATS2(TaggedFoll,treenodetagQueries); -} - - -void fill_queries(XMLTree * tree, treeNode node,unsigned char* targettagname) { - treeNode res1,res2; - TagType tag; - DocID id1,id2,id3; - queue q; - q.push(node); - while(!q.empty()) { - node = q.front(); - q.pop(); - if (node != NULLT) { - tag = tree->Tag(node); - if (target_tag == -1) { - const unsigned char * tagname; - tagname = tree->GetTagNameByRef(tag); - if (strcmp( (char*) tagname, (char*) targettagname) == 0) - target_tag = tag; - } - treenodeQueries.push_back(node); - treenodetagQueries.push_back(pair(node,tag)); - id1 = tree->MyText(node); - id2 = tree->PrevText(node); - id3 = tree->NextText(node); - id1 = max(id1, max(id2,id3)); - docidQueries.push_back(id1); - res1 = tree->FirstChild(node); - res2 = tree->NextSibling(node); - q.push(res1); - q.push(res2); - } - } -} - - -vector traversalQueries; -uint cFullTraversal = 0; - -void traversal_time(XMLTree * tree) { - start_clock(); - uint q = 0; - while(qFirstChild(node); - acc += tree->NextSibling(node); - q++; - } - double t = 1000.0*stop_clock(); - printStats(t,"FullTraversal",traversalQueries.size()); -} - - -unsigned int traversal(XMLTree *tree,treeNode node) { - uint ret = 0; - TagType tag; - queue q; - q.push(node); - while(!q.empty()) { - node = q.front(); - q.pop(); - if (node != NULLT) { - cFullTraversal++; - tag = tree->Tag(node); - if (tag == target_tag) - ret++; - treeNode t1 = tree->FirstChild(node); - q.push(tree->FirstChild(node)); - treeNode t2 = tree->NextSibling(node); - q.push(tree->NextSibling(node)); - if(t1!=NULLT) - traversalQueries.push_back(t1); - if(t2!=NULLT) - traversalQueries.push_back(t2); - } - } - return ret; -} - - -vector > jumpQueries; -uint cJumpTraversal = 0; - -void jump_time(XMLTree * tree) { - start_clock(); - uint q = 0; - while(qTaggedDesc(node,target_tag); - acc += tree->TaggedFollBelow(node,target_tag,root); - q++; - } - double t = 1000.0*stop_clock(); - printStats(t,"JumpTraversal",jumpQueries.size()); -} - - -/* This simulates the run function of the jumping automata*/ -unsigned int jump_traversal(XMLTree* tree, treeNode node,treeNode root) { - uint ret = 0; - TagType tag; - queue > q; - q.push(pair(node,root)); - while(!q.empty()) { - pair p = q.front(); - q.pop(); - node = p.first; - root = p.second; - if (node != NULLT) { - cJumpTraversal++; - tag = tree->Tag(node); - if (tag == target_tag) - ret++; - pair p1(tree->TaggedDesc(node,target_tag),node); - pair p2(tree->TaggedFollBelow(node,target_tag,root),root); - if(p1.first!=NULLT) - jumpQueries.push_back(p1); - if(p2.first!=NULLT) - jumpQueries.push_back(p2); - q.push(p1); - q.push(p2); - } - } - return ret; -} - - -int usage(char ** argv) { - std::cout << "usage : " << argv[0] << " [-d] [-s] file.{xml,.srx} tagname\n"; - return 1; -} - - -int main(int argc, char ** argv) { - unsigned int count1,count2; - unsigned char * tagname; - string arg,filename,ext; - bool disable_tc = false; - bool save = false; - bool srx; - XMLTree * tree; - - int i = 1; - if ( i >= argc) - return usage(argv); - - arg = argv[i]; - if (arg.compare("-d") == 0) { - disable_tc = true; - i++; - if ( i >= argc) - return usage(argv); - arg = argv[i]; - } - - if (arg.compare("-s") == 0) { - save = true; - i++; - if ( i >= argc) - return usage(argv); - arg = argv[i]; - } - - // The filename - if (arg.size() < 4) - return usage(argv); - - ext=(arg.substr(arg.size()-4,4)); - if (ext.compare(".srx") == 0) { - // must truncate - filename = arg.substr(0,arg.size()-4); - srx = true; - } - else if (ext.compare(".xml")==0) { - filename = arg; - srx = false; - } - else - return usage(argv); - i++; - - if (i >= argc) - return usage(argv); - - tagname = (unsigned char*) argv[i]; - - if (srx) - // The samplerate is not taken into account for loading anymore - tree = XMLTree::Load((unsigned char*) filename.c_str(),64); - else { - try - { - //filename, sampling factor, index empty texts, disable tc - XMLDocShredder shredder(filename.c_str(),64,false,disable_tc); - shredder.processStartDocument(""); - shredder.parse(); - shredder.processEndDocument(); - tree = (XMLTree *) shredder.storageIfc_->returnDocument(); - if (save) { - filename = filename.substr(0,filename.size()-4).append(".srx"); - struct stat stats; - int exists = stat(filename.c_str(),&stats); - if(exists == 0) { - std::cout << "Warning : indexed file " << filename << " exists, not overwriting\n"; - } - else { - tree->Save((unsigned char*) filename.substr(0,filename.size()-4).c_str()); - } - - } - } - catch (const std::exception& e) { - cout << "Error during parsing : " << e.what() << "\n"; - return 2; - } - } - - fill_queries(tree,tree->Root(),tagname); - runQueries(tree); - - if (target_tag == -1) { - cout << "Warning: tag " << tagname << " was not found in the document!\n" - << "Warning: not timing traversal and jumping functions\n"; - return 0; - } - - count1 = traversal(tree,tree->Root()); - count2 = jump_traversal(tree,tree->Root(),tree->Root()); - - cout << "Full traversal found " << count1 << " '" << tagname << "' nodes, " - << cFullTraversal << " function calls." << endl; - traversal_time(tree); - cout << "Jump traversal found " << count2 << " '" << tagname << "' nodes, " - << cJumpTraversal << " function calls." << endl; - jump_time(tree); - - return 0; -}