#include "Utils.h"
#include <sys/time.h>
#include <time.h>
+#include <sys/stat.h>
using std::cout;
using std::string;
STOPTIMER(Tag);
if (target_tag == -1){
tagname = tree->GetTagNameByRef(tag);
- if (strcmp( (char*) tagname, (char*) targettagname) == 0)
- target_tag = tag;
+ //if (strcmp( (char*) tagname, (char*) targettagname) == 0)
+ target_tag = tag;
};
STARTTIMER();
res1 = tree->Parent(node);
}
+int usage(char ** argv){
+
+ std::cout << "usage : " << argv[0] << " [-d] [-s] file.{xml,.srx} tagname\n";
+ return 1;
+}
int main(int argc, char ** argv){
unsigned int count1,count2;
- unsigned char * tagname = (unsigned char *) "keyword";
+ unsigned char * tagname;
+ string arg,filename,ext;
+ bool disable_tc = false;
+ bool save = false;
+ bool srx;
+ XMLTree * tree;
+
+ int i = 1;
+ if ( i >= argc)
+ return usage(argv);
+
+ arg = argv[i];
+ if (arg.compare("-d") == 0){
+ disable_tc = true;
+ i++;
+ if ( i >= argc)
+ return usage(argv);
+ arg = argv[i];
+ };
- if (argc != 2){
- std::cout << "Usage : " << argv[0] << " filename (without .srx)\n";
- return 1;
+ if (arg.compare("-s") == 0){
+ save = true;
+ i++;
+ if ( i >= argc)
+ return usage(argv);
+ arg = argv[i];
};
- // The samplerate is not taken into account for loading anymore
- XMLTree * tree = XMLTree::Load((unsigned char*) argv[1],64);
+
+ // The filename
+ if (arg.size() < 4)
+ return usage(argv);
+
+ ext=(arg.substr(arg.size()-4,4));
+ if (ext.compare(".srx") == 0){
+ // must truncate
+ filename = arg.substr(0,arg.size()-4);
+
+ srx = true;
+ }
+ else if (ext.compare(".xml")==0) {
+ filename = arg;
+ srx = false;
+ }
+ else
+ return usage(argv);
+ i++;
+
+ if (i >= argc)
+ return usage(argv);
+
+ tagname = (unsigned char*) argv[i];
+
+
+ if (srx)
+ // The samplerate is not taken into account for loading anymore
+ tree = XMLTree::Load((unsigned char*) filename.c_str(),64);
+ else {
+ try {
+ //filename, sampling factor, index empty texts, disable tc
+ XMLDocShredder shredder(filename.c_str(),64,false,disable_tc);
+ shredder.processStartDocument("");
+ shredder.parse();
+ shredder.processEndDocument();
+ tree = (XMLTree *) shredder.storageIfc_->returnDocument();
+ if (save){
+ filename = filename.substr(0,filename.size()-4).append(".srx");
+ struct stat stats;
+ int exists = stat(filename.c_str(),&stats);
+ if(exists == 0) {
+ std::cout << "Warning : indexed file " << filename << " exists, not overwriting\n";
+ }
+ else {
+ tree->Save((unsigned char*) filename.substr(0,filename.size()-4).c_str());
+ };
+
+ };
+ }
+ catch (const std::exception& e){
+ std::cout << "Error during parsing : " << e.what() << "\n";
+ return 2;
+ };
+ };
traversal(tree,tree->Root(),tagname);
- STARTTIMER();
- count1 = time_traversal(tree,tree->Root());
- STOPTIMER(FullTraversal);
- count2 = time_jump(tree,tree->Root(),tree->Root());
- STOPTIMER(JumpTraversal);
PRINTSTATS(Tag);
PRINTSTATS(FirstChild);
PRINTSTATS(ParentNode);
PRINTSTATS(PrevNode);
std::cout << "\n";
+
+ if (target_tag == -1){
+ std::cout << "Warning: tag " << tagname << " was not found in the document!\n"
+ << "Warning: not timing traversal and jumping functions\n";
+ return 3;
+ };
+
+ STARTTIMER();
+ count1 = time_traversal(tree,tree->Root());
+ STOPTIMER(FullTraversal);
+
+ count2 = time_jump(tree,tree->Root(),tree->Root());
+ STOPTIMER(JumpTraversal);
+
std::cout << "Full traversal found " << count1 << " " << tagname << " nodes\n";
PRINTSTATS(FullTraversal);
std::cout << "\n";
return 0;
+
}