From: kim Date: Tue, 1 Sep 2009 22:32:05 +0000 (+0000) Subject: various cleanups, more memoization in symbol table build function X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=1ff2494510cb02d136cbde3a064c0c8c94ec4216;p=SXSI%2Fxpathcomp.git various cleanups, more memoization in symbol table build function git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@560 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index c10edf1..8f84194 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -769,34 +769,121 @@ extern "C" value caml_bit_vector_node_array(value vect){ } -void iterjump(XMLTree* tree, treeNode node, TagType tag){ +int iterjump(XMLTree* tree, treeNode node, TagType tag, treeNode anc){ if (node == NULLT) - return; + return 0; else { - iterjump(tree,tree->TaggedDescendant(node,tag),tag); - iterjump(tree,tree->TaggedFollowing(node,tag),tag); - return; + return /*1+iterjump(tree,tree->TaggedDescendant(node,tag),tag,node) + +*/ iterjump(tree,tree->TaggedFollowingBelow(node,tag,anc),tag,anc); }; } extern "C" value caml_benchmark_jump(value tree,value tag){ - iterjump(XMLTREE(tree),0, Int_val(tag)); + int count; + treeNode root = XMLTREE(tree)->FirstChild(0); + root = XMLTREE(tree)->FirstChild(root); + count = iterjump(XMLTREE(tree), root , Int_val(tag),0); return Val_unit; } -int iterfsns(XMLTree* tree, treeNode node){ +int iterfcns(XMLTree* tree, treeNode node){ + if (node == NULLT) + return 0; + else { + return /*1+ iterfcns(tree,tree->FirstChild(node)) +*/ + iterfcns(tree,tree->NextSibling(node)); + }; +} +/* +extern "C" value caml_benchmark_fcns(value tree){ + int i = iterfcns(XMLTREE(tree),0); + return Val_unit; + +} +*/ +extern "C" value caml_benchmark_fcns(value tree){ + treeNode root = XMLTREE(tree)->FirstChild(0); + root = XMLTREE(tree)->FirstChild(root); + iterfcns(XMLTREE(tree),root); + return Val_unit; + +} +int iterlcps(XMLTree* tree, treeNode node){ if (node == NULLT) return 0; else { int x = tree->Tag(node); - x += iterfsns(tree,tree->FirstChild(node)); - x += iterfsns(tree,tree->NextSibling(node)); + x += iterlcps(tree,tree->LastChild(node)); + x += iterlcps(tree,tree->PrevSibling(node)); return x; }; } -extern "C" value caml_benchmark_fsns(value tree){ - iterfsns(XMLTREE(tree),0); +extern "C" value caml_benchmark_lcps(value tree){ + iterlcps(XMLTREE(tree),0); return Val_unit; } + +extern "C" { + + typedef struct dummy_node_ { + struct dummy_node_* first; + struct dummy_node_* next; + } dummy_node; + + + dummy_node * new_dummy_node () { + + dummy_node * node = (dummy_node*) malloc(sizeof(dummy_node)); + if (!node) + printf("%s","Cannot allocate memory\n"); + + return node; + } + + void free_tree(dummy_node * node){ + if (node){ + free_tree(node->first); + free_tree(node->next); + free(node); + }; + return; + } + + dummy_node * create_tree(XMLTree* tree, treeNode i){ + if (i == NULLT) + return NULL; + else { + dummy_node * f, *n, *r; + f = create_tree(tree,tree->FirstChild(i)); + n = create_tree(tree,tree->NextSibling(i)); + r = new_dummy_node(); + r->first = f; + r->next = n; + return r; + }; + } + + int iter_tree(dummy_node * n){ + if (n == NULL) + return 0; + else { + return (1+ iter_tree(n->first) + iter_tree(n->next)); + }; + } + +} +extern "C" value caml_build_pointers(value tree){ + return ((value) create_tree(XMLTREE(Field(tree,0)),0)); +} + +extern "C" value caml_iter_pointers (value node){ + return Val_int(iter_tree((dummy_node*) node)); + +} + +extern "C" value caml_free_pointers(value node){ + free_tree((dummy_node*) node); + return Val_unit; +} diff --git a/ata.ml b/ata.ml index 4ea2067..3959ccf 100644 --- a/ata.ml +++ b/ata.ml @@ -1038,7 +1038,7 @@ END (ts,t) -> if (TagSet.mem tag ts) then - let _,_,_,f,_ = Transition.node t in + let _,_,_,f,_ = t.Transition.node in let (child,desc,below),(sibl,foll,after) = Formula.st f in (Formlist.cons t fl_acc, StateSet.union ll_acc below, diff --git a/main.ml b/main.ml index 342bb2f..26b445e 100644 --- a/main.ml +++ b/main.ml @@ -54,6 +54,12 @@ let test_loop2 tree tag = Hashtbl.add f (hash 101) `Foo; g t' Tree.root +type pointers +external build_pointers : Tree.t -> pointers = "caml_build_pointers" +external iter_pointers : pointers -> int = "caml_iter_pointers" +external free_pointers : pointers -> unit = "caml_free_pointers" + + let main v query_string output = let _ = Tag.init (Tree.tag_pool v) in @@ -65,11 +71,20 @@ let main v query_string output = Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in let _ = Printf.eprintf "Number of nodes %i\n%!" (Tree.size v) in + let _ = Tree.stats v in let _ = Printf.eprintf "Timing first_child/next_sibling %!" in - let _ = time (Tree.benchmark_fsns) v in + let _ = time (Tree.benchmark_fcns) v in + let _ = Printf.eprintf "Timing last_child/prev_sibling %!" in + let _ = time (Tree.benchmark_lcps) v in let _ = Printf.eprintf "Timing jump to a %!" in let _ = time (Tree.benchmark_jump v) (Tag.tag "a") in +(* let _ = Printf.eprintf "Timing pointer allocation %!" in + let pointers = time (build_pointers) v in + let _ = Printf.eprintf "Timing pointer iteration %!" in + let i = time (iter_pointers) pointers in + let _ = Printf.eprintf "Traversed %i pointers\n\nTiming pointer deallocation %!" i in + let _ = time (free_pointers) pointers in *) (* let _ = Printf.eprintf "Timing //keyword :" in let r = time (test_loop v) (Tag.tag "keyword") in let _ = Printf.eprintf "Count is %i\n%!" r in diff --git a/tree.ml b/tree.ml index b1b1d15..b6efb67 100644 --- a/tree.ml +++ b/tree.ml @@ -132,9 +132,13 @@ external benchmark_jump : tree -> Tag.t -> unit = "caml_benchmark_jump" "noalloc let benchmark_jump t s = benchmark_jump t.doc s -external benchmark_fsns : tree -> unit = "caml_benchmark_fsns" "noalloc" +external benchmark_fcns : tree -> unit = "caml_benchmark_fcns" "noalloc" -let benchmark_fsns t = benchmark_fsns t.doc +let benchmark_fcns t = benchmark_fcns t.doc + +external benchmark_lcps : tree -> unit = "caml_benchmark_lcps" "noalloc" + +let benchmark_lcps t = benchmark_lcps t.doc @@ -188,9 +192,20 @@ module MemAdd = Hashtbl.Make ( let hash (x,y) = HASHINT2(x,Ptset.Int.uid y) end) +module MemUpdate = struct +include Hashtbl.Make ( + struct + type t = Tag.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t + let equal (a1,b1,c1,d1,e1) (a2,b2,c2,d2,e2) = a1==a2 && + b1 == b2 && c1 == c2 && d1 == d2 && e1 == e2 + let hash (a,b,c,d,e) = HASHINT4(HASHINT2(a,Ptset.Int.uid b),Ptset.Int.uid c,Ptset.Int.uid d,Ptset.Int.uid e) + end) + +end + let collect_tags tree = let _ = Printf.eprintf "Collecting Tags\n%!" in -(* let h_union = MemUnion.create BIG_H_SIZE in + let h_union = MemUnion.create BIG_H_SIZE in let pt_cup s1 s2 = try MemUnion.find h_union (s1,s2) @@ -205,9 +220,7 @@ let collect_tags tree = with | Not_found -> let r = Ptset.Int.add t s in MemAdd.add h_add (t,s) r;r - in *) - let pt_cup = Ptset.Int.union in - let pt_add = Ptset.Int.add in + in let h = Hashtbl.create BIG_H_SIZE in let update t sc sb ss sa = let schild,sbelow,ssibling,safter = @@ -234,9 +247,9 @@ let collect_tags tree = let _ = loop false (tree_root tree) Ptset.Int.empty in let _ = Printf.eprintf "Finished\n%!" in h - - - + + + let contains_array = ref [| |] let contains_index = Hashtbl.create 4096 @@ -472,8 +485,8 @@ let is_binary_ancestor t n1 n2 = let parent t n = tree_parent t.doc n -let first_child t = (); fun n -> tree_first_child t.doc n -let first_element t = (); fun n -> tree_first_element t.doc n +let first_child t = let doc = t.doc in ();fun n -> tree_first_child doc n +let first_element t = let doc = t.doc in (); fun n -> tree_first_element doc n (* these function will be called in two times: first partial application on the tag, then application of the tag and the tree, then application of @@ -486,8 +499,8 @@ let select_child t = fun ts -> let v = ptset_to_vector ts in (); fun n -> tree_select_child t.doc n v -let next_sibling t = (); fun n -> tree_next_sibling t.doc n -let next_element t = (); fun n -> tree_next_element t.doc n +let next_sibling t = let doc = t.doc in (); fun n -> tree_next_sibling doc n +let next_element t = let doc = t.doc in (); fun n -> tree_next_element doc n let tagged_following_sibling t tag = (); fun n -> tree_tagged_following_sibling t.doc n tag @@ -752,7 +765,19 @@ let print_xml_fast3 t = tree_print_xml_fast3 t.doc - +let stats t = + let tree = t.doc in + let rec loop left node acc_d total_d num_leaves = + if node == nil then + (acc_d+total_d,if left then num_leaves+1 else num_leaves) + else + let d,td = loop true (tree_first_child tree node) (acc_d+1) total_d num_leaves in + loop false (tree_next_sibling tree node) (acc_d) d td + in + let a,b = loop true root 0 0 0 + in + Printf.eprintf "Average depth: %f, number of leaves %i\n%!" ((float_of_int a)/. (float_of_int b)) b +;; diff --git a/tree.mli b/tree.mli index 59bbcc2..4a8f12c 100644 --- a/tree.mli +++ b/tree.mli @@ -88,4 +88,6 @@ val closing : t -> [`Tree] node -> [`Tree] node val is_open : t -> [`Tree] node -> bool val benchmark_jump : t -> Tag.t -> unit -val benchmark_fsns : t -> unit +val benchmark_fcns : t -> unit +val benchmark_lcps : t -> unit +val stats : t -> unit