#include "Utils.h"
#include <sys/time.h>
#include <time.h>
+#include <sys/stat.h>
using std::cout;
using std::string;
using std::left;
using std::right;
-static clock_t tFirstChild = 0;
-static clock_t tNextSibling = 0;
-static clock_t tParent = 0;
-static clock_t tTaggedAncestor = 0;
-static clock_t tTaggedChild = 0;
-static clock_t tTaggedDesc = 0;
-static clock_t tTaggedFoll = 0;
-static clock_t tParentNode = 0;
-static clock_t tPrevNode = 0;
-static clock_t tTag = 0;
-static clock_t tMyText = 0;
-static clock_t tPrevText = 0;
-static clock_t tNextText = 0;
-static clock_t tDocIds = 0;
-
-static clock_t tFullTraversal = 0;
-static clock_t tJumpTraversal = 0;
+static double tFirstChild = 0;
+static double tNextSibling = 0;
+static double tParent = 0;
+static double tTaggedAncestor = 0;
+static double tTaggedChild = 0;
+static double tTaggedDesc = 0;
+static double tTaggedFoll = 0;
+static double tParentNode = 0;
+static double tPrevNode = 0;
+static double tTag = 0;
+static double tMyText = 0;
+static double tPrevText = 0;
+static double tNextText = 0;
+static double tDocIds = 0;
+
+static double tFullTraversal = 0;
+static double tJumpTraversal = 0;
static unsigned int cFirstChild = 0;
static unsigned int cNextSibling = 0;
static unsigned int cJumpTraversal = 0;
-
-static clock_t tmp;
+static struct timeval tmpv1;
+static struct timeval tmpv2;
static TagType target_tag = -1;
-#define STARTTIMER() (tmp= clock())
-#define STOPTIMER(x) do { (t##x) = (t##x) + (clock() - tmp); (c##x)= (c##x)+1; } while (0)
+#define STARTTIMER() (gettimeofday(&tmpv1,NULL))
+#define STOPTIMER(x) do { \
+ gettimeofday(&tmpv2,NULL); \
+ (t##x) = (t##x) + ((tmpv2.tv_sec - tmpv1.tv_sec) * 1000000.0 + \
+ (tmpv2.tv_usec - tmpv1.tv_usec))/1000.0; \
+ (c##x)= (c##x)+1; \
+ } while (0)
+
#define PRINTSTATS(x) do { \
std::cout.width(15); \
std::cout << std::left << #x; \
std::cout << " : "; \
std::cout.width(8); \
- std::cout << std::right << c##x << " calls,"; \
+ std::cout << std::right << c##x << " calls, "; \
std::cout.width(8); \
- std::cout << std::right << t##x << " cycles, total:"; \
- std::cout.width(5); \
- std::cout << std::right << ((t##x) *1000.00) /CLOCKS_PER_SEC \
+ std::cout << std::right << (t##x) \
<< " ms, mean: "; \
- std::cout.width(5); \
+ std::cout.width(8); \
std::cout << std::right \
- << (((t##x)* 1000.00) /CLOCKS_PER_SEC) / c##x \
+ << (t##x) *1.00 / c##x \
<< "\n"; \
} while (0)
-
void traversal(XMLTree * tree, treeNode node,unsigned char* targettagname){
treeNode res1,res2;
TagType tag;
cJumpTraversal++;
tag = tree->Tag(node);
if (tag == target_tag)
-
return 1 +
time_jump(tree, tree->TaggedDesc(node,target_tag),node) +
time_jump(tree, tree->TaggedFollBelow(node,target_tag,root), root);
}
+int usage(char ** argv){
+
+ std::cout << "usage : " << argv[0] << " [-d] [-s] file.{xml,.srx} tagname\n";
+ return 1;
+}
int main(int argc, char ** argv){
unsigned int count1,count2;
- unsigned char * tagname = (unsigned char *) "keyword";
+ unsigned char * tagname;
+ string arg,filename,ext;
+ bool disable_tc = false;
+ bool save = false;
+ bool srx;
+ XMLTree * tree;
+
+ int i = 1;
+ if ( i >= argc)
+ return usage(argv);
+
+ arg = argv[i];
+ if (arg.compare("-d") == 0){
+ disable_tc = true;
+ i++;
+ if ( i >= argc)
+ return usage(argv);
+ arg = argv[i];
+ };
- if (argc != 2){
- std::cout << "Usage : " << argv[0] << " filename (without .srx)\n";
- return 1;
+ if (arg.compare("-s") == 0){
+ save = true;
+ i++;
+ if ( i >= argc)
+ return usage(argv);
+ arg = argv[i];
};
- // The samplerate is not taken into account for loading anymore
- XMLTree * tree = XMLTree::Load((unsigned char*) argv[1],64);
+
+ // The filename
+ if (arg.size() < 4)
+ return usage(argv);
+ ext=(arg.substr(arg.size()-4,4));
+ if (ext.compare(".srx") == 0){
+ // must truncate
+ filename = arg.substr(0,arg.size()-4);
+
+ srx = true;
+ }
+ else if (ext.compare(".xml")==0) {
+ filename = arg;
+ srx = false;
+ }
+ else
+ return usage(argv);
+ i++;
+
+ if (i >= argc)
+ return usage(argv);
+
+ tagname = (unsigned char*) argv[i];
+
+
+
+ if (srx)
+ // The samplerate is not taken into account for loading anymore
+ tree = XMLTree::Load((unsigned char*) filename.c_str(),64);
+ else {
+ try {
+ //filename, sampling factor, index empty texts, disable tc
+ XMLDocShredder shredder(filename.c_str(),64,false,disable_tc);
+ shredder.processStartDocument("");
+ shredder.parse();
+ shredder.processEndDocument();
+ tree = (XMLTree *) shredder.storageIfc_->returnDocument();
+ if (save){
+ filename = filename.substr(0,filename.size()-4).append(".srx");
+ struct stat stats;
+ int exists = stat(filename.c_str(),&stats);
+ if(exists == 0) {
+ std::cout << "Warning : indexed file " << filename << " exists, not overwriting\n";
+ }
+ else {
+ tree->Save((unsigned char*) filename.substr(0,filename.size()-4).c_str());
+ };
+
+ };
+ }
+ catch (const std::exception& e){
+ std::cout << "Error during parsing : " << e.what() << "\n";
+ return 2;
+ };
+ };
traversal(tree,tree->Root(),tagname);
- STARTTIMER();
- count1 = time_traversal(tree,tree->Root());
- STOPTIMER(FullTraversal);
- count2 = time_jump(tree,tree->Root(),tree->Root());
- STOPTIMER(JumpTraversal);
PRINTSTATS(Tag);
PRINTSTATS(FirstChild);
PRINTSTATS(ParentNode);
PRINTSTATS(PrevNode);
std::cout << "\n";
+
+ if (target_tag == -1){
+ std::cout << "Warning: tag " << tagname << " was not found in the document!\n"
+ << "Warning: not timing traversal and jumping functions\n";
+ return 3;
+ };
+
+ STARTTIMER();
+ count1 = time_traversal(tree,tree->Root());
+ STOPTIMER(FullTraversal);
+
+ count2 = time_jump(tree,tree->Root(),tree->Root());
+ STOPTIMER(JumpTraversal);
+
std::cout << "Full traversal found " << count1 << " " << tagname << " nodes\n";
PRINTSTATS(FullTraversal);
std::cout << "\n";
return 0;
+
}