From: kim Date: Wed, 20 May 2009 06:38:43 +0000 (+0000) Subject: Fixed caching bugs in ata.ml X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=83b4813de7204842bb59d5cb0aec71aff633ca85;p=SXSI%2Fxpathcomp.git Fixed caching bugs in ata.ml removed debugging stuff in print_xml_fast (tree.ml) git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@406 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index 7dfc4d0..154c166 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -INLINE=1000 +INLINE=10000 DEBUG=false PROFILE=false VERBOSE=false @@ -52,7 +52,7 @@ CXX = g++ endif ifeq ($(PROFILE), true) -PROFILE_FLAGS = -p +PROFILE_FLAGS = -p -S SYNT_PROF = -ppopt -DPROFILE endif SYNT_FLAGS = $(SYNT_DEBUG) $(SYNT_PROF) @@ -113,7 +113,7 @@ libcamlshredder.a: $(CXXOBJECTS) XMLTree/XMLTree.a clean: @echo [CLEAN] - $(HIDE) rm -f *~ *.cm* *.[oa] *.so main + $(HIDE) rm -f *~ *.cm* *.[oa] *.so main *.s $(HIDE) rm -rf .libs diff --git a/ata.ml b/ata.ml index 7a5a64d..42bf24e 100644 --- a/ata.ml +++ b/ata.ml @@ -34,6 +34,7 @@ struct | Or of 'hcons * 'hcons | And of 'hcons * 'hcons | Atom of ([ `Left | `Right | `LLeft | `RRight ]*bool*State.t) + type 'hcons node = { pos : 'hcons expr; mutable neg : 'hcons; @@ -42,34 +43,33 @@ struct } external hash_const_variant : [> ] -> int = "%identity" - module rec HNode : Hcons.S with type data = Node.t = Hcons.Make (Node) - and Node : Hashtbl.HashedType with type t = HNode.t node = + module rec Node : Hcons.S with type data = Data.t = Hcons.Make (Data) + and Data : Hashtbl.HashedType with type t = Node.t node = struct - type t = HNode.t node + type t = Node.t node let equal x y = x.size == y.size && match x.pos,y.pos with - | False,False - | True,True -> true - | Or(xf1,xf2),Or(yf1,yf2) - | And(xf1,xf2),And(yf1,yf2) -> (HNode.equal xf1 yf1) && (HNode.equal xf2 yf2) - | Atom(d1,p1,s1), Atom(d2,p2,s2) -> d1 == d2 && (p1==p2) && s1 == s2 - | _ -> false + | a,b when a == b -> true + | Or(xf1,xf2),Or(yf1,yf2) + | And(xf1,xf2),And(yf1,yf2) -> (xf1 == yf1) && (xf2 == yf2) + | Atom(d1,p1,s1), Atom(d2,p2,s2) -> d1 == d2 && (p1==p2) && s1 == s2 + | _ -> false let hash f = match f.pos with | False -> 0 | True -> 1 - | Or (f1,f2) -> HASHINT3(PRIME2,HNode.uid f1,HNode.uid f2) - | And (f1,f2) -> HASHINT3(PRIME3,HNode.uid f1,HNode.uid f2) + | Or (f1,f2) -> HASHINT3(PRIME2,f1.Node.id, f2.Node.id) + | And (f1,f2) -> HASHINT3(PRIME3,f1.Node.id,f2.Node.id) | Atom(d,p,s) -> HASHINT4(PRIME4,hash_const_variant d,vb p,s) end - type t = HNode.t - let hash = HNode.hash - let uid = HNode.uid - let equal = HNode.equal - let expr f = (HNode.node f).pos - let st f = (HNode.node f ).st - let size f = (HNode.node f).size + type t = Node.t + let hash x = x.Node.key + let uid x = x.Node.id + let equal = Node.equal + let expr f = f.Node.node.pos + let st f = f.Node.node.st + let size f = f.Node.node.size let prio f = match expr f with @@ -108,10 +108,10 @@ struct let cons pos neg s1 s2 size1 size2 = - let nnode = HNode.make { pos = neg; neg = (Obj.magic 0); st = s2; size = size2 } in - let pnode = HNode.make { pos = pos; neg = nnode ; st = s1; size = size1 } + let nnode = Node.make { pos = neg; neg = (Obj.magic 0); st = s2; size = size2 } in + let pnode = Node.make { pos = pos; neg = nnode ; st = s1; size = size1 } in - (HNode.node nnode).neg <- pnode; (* works because the neg field isn't taken into + (Node.node nnode).neg <- pnode; (* works because the neg field isn't taken into account for hashing ! *) pnode,nnode @@ -127,7 +127,7 @@ struct | `RRight -> empty_triple,(StateSet.empty,si,si) in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1) - let not_ f = (HNode.node f).neg + let not_ f = f.Node.node.neg let union_hex ((l1,ll1,lll1),(r1,rr1,rrr1)) ((l2,ll2,lll2),(r2,rr2,rrr2)) = (StateSet.mem_union l1 l2 ,StateSet.mem_union ll1 ll2,StateSet.mem_union lll1 lll2), (StateSet.mem_union r1 r2 ,StateSet.mem_union rr1 rr2,StateSet.mem_union rrr1 rrr2) @@ -224,6 +224,13 @@ module Formlist = struct let print ppf fl = iter (fun t -> Transition.print ppf t; Format.pp_print_newline ppf ()) fl end + +module Formlistlist = +struct + include Hlist.Make(Formlist) + let print ppf fll = + iter (fun fl -> Formlist.print ppf fl; Format.pp_print_newline ppf ())fll +end type 'a t = { id : int; @@ -569,7 +576,6 @@ END end - let choose_jump tagset qtags1 qtagsn a f_nil f_t1 f_s1 f_tn f_sn f_notext f_maytext = let tags1,hastext1,fin1 = inter_text tagset (tags a qtags1) in let tagsn,hastextn,finn = inter_text tagset (tags a qtagsn) in @@ -591,7 +597,7 @@ END else if (hastext1||hastextn) then (`ANY,f_maytext) else (`ANY,f_notext) - let choose_jump_down tree a b c d = + let choose_jump_down tree a b c d = choose_jump a b c d (mk_fun (fun _ -> Tree.nil) "Tree.mk_nil") (mk_fun (Tree.tagged_child tree) "Tree.tagged_child") @@ -610,65 +616,78 @@ END (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx") (mk_fun (Tree.next_element_ctx tree) "Tree.node_element_ctx") (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") - - - module SetTagKey = - struct - type t = Tag.t*SList.t - let equal (t1,s1) (t2,s2) = t1 == t2 && s1 == s2 - let hash (t,s) = HASHINT2(t,SList.uid s) - end - - module CachedTransTable = Hashtbl.Make(SetTagKey) - let td_trans = CachedTransTable.create 4093 - - + + module SetTagKey = + struct + type t = Tag.t*SList.t + let equal (t1,s1) (t2,s2) = t1 == t2 && s1 == s2 + let hash (t,s) = HASHINT2(t,s.SList.Node.id) + end + + module CachedTransTable = Hashtbl.Make(SetTagKey) + let td_trans = CachedTransTable.create 4093 + + let empty_size n = let rec loop acc = function 0 -> acc | n -> loop (SList.cons StateSet.empty acc) (n-1) in loop SList.nil n - let merge rb rb1 rb2 mark t res1 res2 = - if rb then - let res1 = if rb1 then res1 else RS.empty - and res2 = if rb2 then res2 else RS.empty - in - if mark then RS.cons t (RS.concat res1 res2) - else RS.concat res1 res2 - else RS.empty + + module Fold2Res = Hashtbl.Make(struct + type t = Formlistlist.t*SList.t*SList.t + let hash (f,s,t) = HASHINT3(f.Formlistlist.Node.id, + s.SList.Node.id, + t.SList.Node.id) + let equal (a,b,c) (d,e,f) = a==d && b == e && c == f + end) + + let h_fold2 = Fold2Res.create BIG_H_SIZE let top_down ?(noright=false) a tree t slist ctx slot_size = let pempty = empty_size slot_size in + let rempty = Array.make slot_size RS.empty in (* evaluation starts from the right so we put sl1,res1 at the end *) let eval_fold2_slist fll t (sl2,res2) (sl1,res1) = - let res = Array.copy res1 in - let rec fold l1 l2 fll i aq = - match fll with - [fl] -> (* inline for speed *) - let s1 = SList.hd l1 - and s2 = SList.hd l2 in - let r',flags = eval_formlist s1 s2 fl in - let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i) in - (SList.cons r' aq),res - | fl::fll -> - let SList.Cons(s1,ll1) = l1.SList.Node.node - and SList.Cons(s2,ll2) = l2.SList.Node.node in - let r',flags = eval_formlist s1 s2 fl in - let _ = res.(i) <- RS.merge flags t res1.(i) res2.(i) - in - fold ll1 ll2 fll (i+1) (SList.cons r' aq) - | _ -> aq,res - in - fold sl1 sl2 fll 0 SList.nil + let res = Array.copy rempty in + try + let r,b,btab = Fold2Res.find h_fold2 (fll,sl1,sl2) in + if b then for i=0 to slot_size - 1 do + res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); + done; + r,res + with + Not_found -> + let btab = Array.make slot_size (false,false,false,false) in + let rec fold l1 l2 fll i aq ab = + match fll.Formlistlist.Node.node, + l1.SList.Node.node, + l2.SList.Node.node + with + | Formlistlist.Cons(fl,fll), + SList.Cons(s1,ll1), + SList.Cons(s2,ll2) -> + let r',((b,_,_,_) as flags) = eval_formlist s1 s2 fl in + let _ = btab.(i) <- flags + in + fold ll1 ll2 fll (i+1) (SList.cons r' aq) (b||ab) + | _ -> aq,ab + in + let r,b = fold sl1 sl2 fll 0 SList.nil false in + Fold2Res.add h_fold2 (fll,sl1,sl2) (r,b,btab); + if b then for i=0 to slot_size - 1 do + res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); + done; + r,res in - let null_result() = (pempty,Array.make slot_size RS.empty) in - let rec loop t slist ctx = - if t == Tree.nil then null_result() else get_trans t slist (Tree.tag tree t) ctx + let null_result = (pempty,Array.copy rempty) in + let rec loop t slist ctx= + if t == Tree.nil then null_result else get_trans t slist (Tree.tag tree t) ctx and loop_tag tag t slist ctx = - if t == Tree.nil then null_result() else get_trans t slist tag ctx + if t == Tree.nil then null_result else get_trans t slist tag ctx and loop_no_right t slist ctx = - if t == Tree.nil then null_result() else get_trans ~noright:true t slist (Tree.tag tree t) ctx + if t == Tree.nil then null_result else get_trans ~noright:true t slist (Tree.tag tree t) ctx and get_trans ?(noright=false) t slist tag ctx = let cont = try @@ -703,79 +722,79 @@ END ) ) set (Formlist.nil,StateSet.empty,StateSet.empty,ca,da,sa,fa) - in fl::fll_acc, (SList.cons ll lllacc), (SList.cons rr rllacc),ca,da,sa,fa) - slist ([],SList.nil,SList.nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty) + in (Formlistlist.cons fl fll_acc), (SList.cons ll lllacc), (SList.cons rr rllacc),ca,da,sa,fa) + slist (Formlistlist.nil,SList.nil,SList.nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty) in (* Logic to chose the first and next function *) let _,tags_below,_,tags_after = Tree.tags tree tag in (* let _ = Printf.eprintf "Tags below %s are : \n" (Tag.to_string tag) in let _ = Ptset.Int.iter (fun i -> Printf.eprintf "%s " (Tag.to_string i)) tags_below in let _ = Printf.eprintf "\n%!" in *) + let tags_below = Ptset.Int.remove tag tags_below in let f_kind,first = choose_jump_down tree tags_below ca da a and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil ) else choose_jump_next tree tags_after sa fa a in - let empty_res = null_result() in - let cont = - match f_kind,n_kind with - | `NIL,`NIL -> - (fun _ _ -> eval_fold2_slist fl_list t empty_res empty_res ) - | _,`NIL -> ( - match f_kind with - |`TAG(tag) -> - (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop_tag tag (first t) llist t)) - | `ANY -> - (fun t _ -> eval_fold2_slist fl_list t empty_res - (loop (first t) llist t)) - | _ -> assert false) - - | `NIL,_ -> ( - match n_kind with - |`TAG(tag) -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx) empty_res) - - | `ANY -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) empty_res) - - | _ -> assert false) - - | `TAG(tag1),`TAG(tag2) -> - (fun t ctx -> eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop (first t) llist t)) - - | `TAG(tag),`ANY -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop_tag tag (first t) llist t)) - | `ANY,`TAG(tag) -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop_tag tag (next t ctx) rlist ctx) - (loop (first t) llist t) ) - | `ANY,`ANY -> - (fun t ctx -> - eval_fold2_slist fl_list t - (loop (next t ctx) rlist ctx) - (loop (first t) llist t) ) - | _ -> assert false - in - let cont = D_IF_( (fun t ctx -> - let a,b = cont t ctx in - register_trace tree t (slist,a,fl_list,first,next,ctx); - (a,b) - ) ,cont) - in - (CachedTransTable.add td_trans (tag,slist) cont;cont) - in cont t ctx - - in - (if noright then loop_no_right else loop) t slist ctx - - + let empty_res = null_result in + let cont = + match f_kind,n_kind with + | `NIL,`NIL -> + (fun t _ -> eval_fold2_slist fl_list t empty_res empty_res ) + | _,`NIL -> ( + match f_kind with + |`TAG(tag) -> + (fun t _ -> eval_fold2_slist fl_list t empty_res + (loop_tag tag (first t) llist t)) + | `ANY -> + (fun t _ -> eval_fold2_slist fl_list t empty_res + (loop (first t) llist t)) + | _ -> assert false) + + | `NIL,_ -> ( + match n_kind with + |`TAG(tag) -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop_tag tag (next t ctx) rlist ctx) empty_res) + + | `ANY -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx) empty_res) + + | _ -> assert false) + + | `TAG(tag1),`TAG(tag2) -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop_tag tag2 (next t ctx) rlist ctx) + (loop_tag tag1 (first t) llist t)) + + | `TAG(tag),`ANY -> + (fun t ctx -> eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx) + (loop_tag tag (first t) llist t)) + | `ANY,`TAG(tag) -> + (fun t ctx -> + eval_fold2_slist fl_list t + (loop_tag tag (next t ctx) rlist ctx) + (loop (first t) llist t) ) + | `ANY,`ANY -> + (fun t ctx -> + eval_fold2_slist fl_list t + (loop (next t ctx) rlist ctx) + (loop (first t) llist t) ) + | _ -> assert false + in + let cont = D_IF_( (fun t ctx -> + let a,b = cont t ctx in + register_trace tree t (slist,a,fl_list,first,next,ctx); + (a,b) + ) ,cont) + in + (CachedTransTable.add td_trans (tag,slist) cont;cont) + in cont t ctx + + in + (if noright then loop_no_right else loop) t slist ctx + + let run_top_down a tree = let init = SList.cons a.init SList.nil in let _,res = top_down a tree Tree.root init Tree.root 1 diff --git a/hlist.ml b/hlist.ml index e5a4aa5..5509871 100644 --- a/hlist.ml +++ b/hlist.ml @@ -49,10 +49,10 @@ struct type data = Data.t type t = Node.t let make = Node.make - let node = Node.node - let hash = Node.hash + let node x = x.Node.node + let hash x = x.Node.key let equal = Node.equal - let uid = Node.uid + let uid x= x.Node.id let nil = Node.make Nil let cons a b = Node.make (Cons(a,b)) let hd = function { Node.node = Cons(a,_) } -> a | _ -> failwith "hd" diff --git a/main.ml b/main.ml index 2f12d19..ee14b34 100644 --- a/main.ml +++ b/main.ml @@ -13,13 +13,34 @@ let enabled_gc = Gc.get() let disabled_gc = { Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 } - - - - +let hash x = 131*x/(x-1+1) +let test_loop tree tag = + let t' = Tree.tagged_desc tree tag Tree.root in + let f = Hashtbl.create 4096 + in + let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in + let g t ctx = + if t == Tree.nil then 0 + else 1+ ((Hashtbl.find f (hash 101)) (jump t ctx) ctx) + in + Hashtbl.add f (hash 101) g; + (Hashtbl.find f (hash 101)) t' Tree.root +let test_loop2 tree tag = + let t' = Tree.tagged_desc tree tag Tree.root in + let f = Hashtbl.create 4096 + in + let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in + let rec g t ctx = + if t == Tree.nil then 0 + else 1+ (match (Hashtbl.find f (hash 101)) with + `Foo ->g (jump t ctx) ctx + ) + in + Hashtbl.add f (hash 101) `Foo; + g t' Tree.root let main v query_string output = - + let _ = Tag.init (Tree.tag_pool v) in Printf.eprintf "Parsing query : "; let query = try @@ -28,36 +49,42 @@ let main v query_string output = with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in - XPath.Ast.print Format.err_formatter query; - Format.fprintf Format.err_formatter "\n%!"; - Printf.eprintf "Compiling query : "; - let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in - let _ = Ata.dump Format.err_formatter auto in - let _ = Printf.eprintf "%!" in - let jump_to = - match contains with - None -> (max_int,`NOTHING) - | Some s -> - let r = Tree.count v s - in - Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v); - Printf.eprintf "Global count is %i, using " r; - if r < !Options.tc_threshold then begin - Printf.eprintf "TextCollection contains\nCalling global contains : "; - time (Tree.init_contains v) s; - end - else begin - Printf.eprintf "Naive contains\nCalling global contains : "; - time (Tree.init_naive_contains v) s - end;(r,`CONTAINS(s)) - in - let test_list = jump_to in - (* + let _ = Printf.eprintf "Timing //keyword :" in + let r = time (test_loop v) (Tag.tag "keyword") in + let _ = Printf.eprintf "Count is %i\n%!" r in + let _ = Printf.eprintf "Timing //keyword 2:" in + let r = time (test_loop2 v) (Tag.tag "keyword") in + let _ = Printf.eprintf "Count is %i\n%!" r in + XPath.Ast.print Format.err_formatter query; + Format.fprintf Format.err_formatter "\n%!"; + Printf.eprintf "Compiling query : "; + let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in + let _ = Ata.dump Format.err_formatter auto in + let _ = Printf.eprintf "%!" in + let jump_to = + match contains with + None -> (max_int,`NOTHING) + | Some s -> + let r = Tree.count v s + in + Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v); + Printf.eprintf "Global count is %i, using " r; + if r < !Options.tc_threshold then begin + Printf.eprintf "TextCollection contains\nCalling global contains : "; + time (Tree.init_contains v) s; + end + else begin + Printf.eprintf "Naive contains\nCalling global contains : "; + time (Tree.init_naive_contains v) s + end;(r,`CONTAINS(s)) + in + let test_list = jump_to in + (* let test_list = - if (!Options.backward) then begin - Printf.eprintf "Finding min occurences : "; - time - ( List.fold_left (fun ((min_occ,kind)as acc) (tag,_) -> + if (!Options.backward) then begin + Printf.eprintf "Finding min occurences : "; + time + ( List.fold_left (fun ((min_occ,kind)as acc) (tag,_) -> let numtags = Tree.subtree_tags v tag Tree.root in if ((numtags < min_occ) && numtags >= 2) then (numtags,`TAG(tag)) @@ -78,7 +105,7 @@ let main v query_string output = begin let _ = Gc.full_major();Gc.compact() in let _ = Printf.eprintf "%!" in - (* let _ = Gc.set (disabled_gc) in *) + let _ = Gc.set (disabled_gc) in if !Options.backward && ((snd test_list) != `NOTHING )then let r = time (bottom_up_count auto v )(snd test_list) in diff --git a/tree.ml b/tree.ml index 24fca56..a1ddcac 100644 --- a/tree.ml +++ b/tree.ml @@ -481,8 +481,6 @@ let array_find a i j = then begin let tid = tree_my_text tree.doc t in - let _ = Printf.eprintf "my_text %i returned %i\n%!" (inode t) (inode tid) - in output_string outc (text_get_cached_text tree.doc tid); if print_right then loop (next_sibling tree t); @@ -525,8 +523,6 @@ let array_find a i j = let attname = String.sub s 3 ((String.length s) -3) in let fsa = first_child tree a in let tid = tree_my_text tree.doc fsa in - let _ = Printf.eprintf "my_text %i returned %i\n%!" (inode fsa) (inode tid) - in output_char outc ' '; output_string outc attname; output_string outc "=\"";