--- /dev/null
+#include "XMLDocShredder.h"
+#include "XMLTree.h"
+#include "Utils.h"
+#include <sys/time.h>
+#include <time.h>
+
+using std::cout;
+using std::string;
+using std::left;
+using std::right;
+
+static clock_t tFirstChild = 0;
+static clock_t tNextSibling = 0;
+static clock_t tTaggedDesc = 0;
+static clock_t tTaggedFoll = 0;
+static clock_t tParentNode = 0;
+static clock_t tPrevNode = 0;
+static clock_t tTag = 0;
+static clock_t tMyText = 0;
+static clock_t tPrevText = 0;
+static clock_t tNextText = 0;
+static clock_t tFullTraversal = 0;
+static clock_t tJumpTraversal = 0;
+
+static unsigned int cFirstChild = 0;
+static unsigned int cNextSibling = 0;
+static unsigned int cTaggedDesc = 0;
+static unsigned int cTaggedFoll = 0;
+static unsigned int cParentNode = 0;
+static unsigned int cPrevNode = 0;
+static unsigned int cTag = 0;
+static unsigned int cMyText = 0;
+static unsigned int cPrevText = 0;
+static unsigned int cNextText = 0;
+static unsigned int cFullTraversal = 0;
+static unsigned int cJumpTraversal = 0;
+
+static clock_t tmp;
+
+static TagType target_tag = -1;
+
+#define STARTTIMER() (tmp= clock())
+#define STOPTIMER(x) do { (t##x) = (t##x) + (clock() - tmp); (c##x)= (c##x)+1; } while (0)
+#define PRINTSTATS(x) do { \
+ std::cout.width(11); \
+ std::cout << std::left << #x; \
+ std::cout << " : "; \
+ std::cout.width(8); \
+ std::cout << std::right << c##x << " calls,"; \
+ std::cout.width(8); \
+ std::cout << std::right << t##x << " cycles, total:"; \
+ std::cout.width(5); \
+ std::cout << std::right << ((t##x) *1000.00) /CLOCKS_PER_SEC \
+ << " ms, mean: "; \
+ std::cout.width(5); \
+ std::cout << std::right \
+ << (((t##x)* 1000.00) /CLOCKS_PER_SEC) / c##x \
+ << "\n"; \
+ } while (0)
+
+
+void traversal(XMLTree * tree, treeNode node,unsigned char* targettagname){
+ treeNode res1,res2;
+ TagType tag;
+ DocID id1,id2,id3;
+ const unsigned char * tagname;
+ if (node != NULLT){
+ STARTTIMER();
+ tag = tree->Tag(node);
+ STOPTIMER(Tag);
+ if (target_tag == -1){
+ tagname = tree->GetTagNameByRef(tag);
+ if (strcmp( (char*) tagname, (char*) targettagname) == 0)
+ target_tag = tag;
+ };
+ STARTTIMER();
+ res1 = tree->TaggedDesc(node,tag);
+ STOPTIMER(TaggedDesc);
+
+ STARTTIMER();
+ res1 = tree->TaggedFoll(node,tag);
+ STOPTIMER(TaggedFoll);
+
+ STARTTIMER();
+ id1 = tree->MyText(node);
+ STOPTIMER(MyText);
+
+ STARTTIMER();
+ id2 = tree->PrevText(node);
+ STOPTIMER(PrevText);
+
+ STARTTIMER();
+ id3 = tree->NextText(node);
+ STOPTIMER(NextText);
+
+ id1 = max(id1, max(id2,id3));
+
+ STARTTIMER();
+ res1 = tree->ParentNode(id1);
+ STOPTIMER(ParentNode);
+
+ STARTTIMER();
+ res1 = tree->PrevNode(id1);
+ STOPTIMER(PrevNode);
+
+ STARTTIMER();
+ res1 = tree->FirstChild(node);
+ STOPTIMER(FirstChild);
+
+ STARTTIMER();
+ res2 = tree->NextSibling(node);
+ STOPTIMER(NextSibling);
+ traversal(tree,res1,targettagname);
+ traversal(tree,res2,targettagname);
+
+ };
+
+}
+
+unsigned int time_traversal(XMLTree *tree,treeNode node,unsigned int count){
+ TagType tag;
+ if (node != NULLT) {
+ cFullTraversal++;
+ tag = tree->Tag(node);
+ if (tag == target_tag)
+ count = count + 1;
+ return time_traversal(tree,tree->NextSibling(node),
+ time_traversal(tree,tree->FirstChild(node),count));
+
+ }
+ else
+ return count;
+}
+
+
+unsigned int time_jump(XMLTree* tree, treeNode node,unsigned int count,treeNode root){
+ TagType tag;
+ if (node != NULLT) {
+ cJumpTraversal++;
+ tag = tree->Tag(node);
+ if (tag == target_tag)
+ count = count + 1;
+ return time_jump(tree,
+ tree->TaggedFollBelow(node,target_tag,root),
+ time_jump(tree,
+ tree->TaggedDesc(node,target_tag),
+ count,
+ node),
+ root);
+
+ }
+ else
+ return count;
+}
+
+
+
+
+
+int main(int argc, char ** argv){
+ unsigned int count1,count2;
+ unsigned char * tagname = (unsigned char *) "keyword";
+
+ if (argc != 2){
+ std::cout << "Usage : " << argv[0] << " filename (without .srx)\n";
+ return 1;
+ };
+
+ // The samplerate is not taken into account for loading anymore
+ XMLTree * tree = XMLTree::Load((unsigned char*) argv[1],64);
+
+ traversal(tree,tree->Root(),tagname);
+
+ STARTTIMER();
+ count1 = time_traversal(tree,tree->Root(),0);
+ STOPTIMER(FullTraversal);
+
+ count2 = time_jump(tree,tree->Root(),0,tree->Root());
+ STOPTIMER(JumpTraversal);
+
+ PRINTSTATS(FirstChild);
+ PRINTSTATS(NextSibling);
+ PRINTSTATS(Tag);
+ PRINTSTATS(TaggedDesc);
+ PRINTSTATS(TaggedFoll);
+ PRINTSTATS(PrevText);
+ PRINTSTATS(MyText);
+ PRINTSTATS(NextText);
+ PRINTSTATS(ParentNode);
+ PRINTSTATS(PrevNode);
+ std::cout << "\n";
+ std::cout << "Full traversal found " << count1 << " " << tagname << " nodes\n";
+ PRINTSTATS(FullTraversal);
+ std::cout << "\n";
+ std::cout << "Jump traversal found " << count2 << " " << tagname << " nodes\n";
+ PRINTSTATS(JumpTraversal);
+
+
+ return 0;
+}
val init_naive_contains : t -> string -> unit
val mk_nil : t -> t
val test_jump : t -> Tag.t -> unit
+ val time_xml_tree : t -> Tag.t -> int list
+ val time_xml_tree2 : t -> Tag.t -> int list
end
module XML =
external tagged_next : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_next"
external tagged_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only"
external tagged_desc_or_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only"
+ external tagged_foll_below : t -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below"
let test_jump tree tag =
let rec loop id ctx =
end
in
aux (root v)
+
+ let rrrr = ref 0
+
+ let time_xml_tree v tag =
+
+ let rec aux id acc =
+ incr rrrr;
+ if (is_nil id)
+ then acc
+ else begin
+ let acc =
+ if tag == (tag_id v id)
+ then
+ id::acc
+ else acc
+ in
+ aux (next_sibling v id) (aux (first_child v id) acc);
+ end
+ in
+ let r = aux (root v) [] in
+ Printf.eprintf "%i\n%!" !rrrr;r
+
+ let rrrr2 = ref 0
+ let time_xml_tree2 v tag =
+ let rec aux id acc ctx=
+ incr rrrr2;
+ if (is_nil id)
+ then acc
+ else begin
+ let acc =
+ if tag == (tag_id v id)
+ then
+ id::acc
+ else acc
+ in
+ aux (tagged_foll_below v id tag ctx) (aux (tagged_desc v id tag) acc id) ctx;
+ end
+ in
+ let r = aux (root v) [] (root v) in
+ Printf.eprintf "%i\n%!" !rrrr2; r
let dump { doc=t } = Tree.print_skel t
let test_xml_tree ppf tags { doc=t } = Tree.test_xml_tree ppf tags t
+ let time_xml_tree { doc=t } tag = Tree.time_xml_tree t tag
+ let time_xml_tree2 { doc=t } tag = Tree.time_xml_tree2 t tag
let test_jump { doc=t } tag = Tree.test_jump t tag
let contains_array = ref [| |]
let doc =
- try
- Tree.Binary.parse_xml_uri Sys.argv.(1)
- with
- | _ ->(
try
Tree.Binary.load Sys.argv.(1)
with
| _ ->
- Printf.printf "Error parsing document\n";
- exit 2)
+ ( try
+ Tree.Binary.parse_xml_uri Sys.argv.(1)
+ with
+ | _ ->(
+
+ Printf.printf "Error parsing document\n";
+ exit 2))
;;
let _ = Tag.init (Tree.Binary.tag_pool doc)
;;
-let tags = (collect_tags doc)
-;;
+(*
+ let tags = (collect_tags doc)
+ ;;
(*
let _ = Tree.Binary.test_xml_tree Format.std_formatter tags doc
;;
(* let _ = Ata.TS.iter (fun t -> Tree.Binary.print_xml_fast stdout t; print_newline();) r *)
;;
-*)
+*) *)
let time f x =
let t1 = Unix.gettimeofday () in
let r = f x in
Printf.eprintf " %fms\n%!" t ;
r
;;
+let _ = Printf.eprintf "Timing full //keyword ... "
+let x = List.length (time (Tree.Binary.time_xml_tree doc) (Tag.tag "keyword"))
let _ = Printf.eprintf "Timing jump //keyword ... "
-let _ = time Tree.Binary.test_jump doc (Tag.tag "keyword")
+let y = List.length (time (Tree.Binary.time_xml_tree2 doc) (Tag.tag "keyword"))
+let _ = Printf.eprintf "coherant : %b\n" (x=y)