From ecec752325cb3d207894a4f8d772936bd7ad9f4a Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Mon, 19 Mar 2012 15:19:28 +0100 Subject: [PATCH] Temporary commit --- src/ata.ml | 15 ++ src/ata.mli | 1 + src/cache.ml | 24 ++- src/main.ml | 2 +- src/resJIT.ml | 6 +- src/runtime.ml | 230 +++++++++++++++++++++++--- src/runtime.mli | 2 + src/tag.ml | 2 +- src/tracer.ml | 2 +- src/transition.ml | 6 +- src/translist.ml | 10 +- src/tree.ml | 12 ++ src/tree.mli | 5 + tests/perf_tests/xmark_10.xml.queries | 14 +- 14 files changed, 287 insertions(+), 44 deletions(-) diff --git a/src/ata.ml b/src/ata.ml index f0882d5..8f7529f 100644 --- a/src/ata.ml +++ b/src/ata.ml @@ -255,3 +255,18 @@ let top_down_approx auto states tree = +let get_trans ?(attributes=TagSet.empty) auto states tag = + StateSet.fold (fun q acc -> + List.fold_left (fun ((tr_acc, l_acc, r_acc) as acc) (ts, tr) -> + let ts = if ts == TagSet.star then TagSet.diff ts attributes else ts + in + if TagSet.mem tag ts then + let _, _, _, f = Transition.node tr in + let l, r = Formula.st f in + (Translist.cons tr tr_acc, + StateSet.union l l_acc, + StateSet.union r r_acc) + else acc) acc (Hashtbl.find auto.trans q)) + states + (Translist.nil, StateSet.empty, StateSet.empty) + diff --git a/src/ata.mli b/src/ata.mli index d249e9b..9fa9d67 100644 --- a/src/ata.mli +++ b/src/ata.mli @@ -24,3 +24,4 @@ type jump_kind = val top_down_approx : t -> StateSet.t -> Tree.t -> jump_kind val init : unit -> unit +val get_trans : ?attributes:TagSet.t -> t -> StateSet.t -> Tag.t -> Translist.t * StateSet.t * StateSet.t diff --git a/src/cache.ml b/src/cache.ml index 3efe3dc..60301fe 100644 --- a/src/cache.ml +++ b/src/cache.ml @@ -20,11 +20,21 @@ struct } + let print fmt a = + Format.fprintf fmt "{ offset=%i;\n dummy=_;line=%a \n}\n%!" + a.offset + (Pretty.print_array ~sep:", " (fun fmt x -> + if x==a.dummy then + Format.fprintf fmt "%s" "D" + else + Format.fprintf fmt "%s" "E")) a.line let add a i v = + TRACE("twopass", 2, __ "Before add (%i): %a\n%!" i print a); if a.offset == ~-1 then a.offset <- i; let offset = a.offset in let len = Array.length a.line in + let () = if i >= offset && i < offset + len then a.line.(i - offset) <- v else @@ -45,9 +55,11 @@ struct for j = 0 to len - 1 do narray.(j) <- a.line.(j); done; - narray.(i - offset + 1) <- v; + narray.(i - offset) <- v; a.line <- narray end + in + TRACE("twopass", 2, __ "After add (%i): %a\n%!" i print a) let find a i = let offset = a.offset in @@ -82,11 +94,13 @@ struct let add a i j v = + TRACE("twopass", 2, __ "Adding %i %i\n%!" i j); let line = Lvl1.find a i in if line == a.Lvl1.dummy then - let nline = { line with Lvl1.offset = ~-1 } in - Lvl1.add nline j v; - Lvl1.add a i nline + let nline = Lvl1.create 0 line.Lvl1.dummy in + TRACE("twopass", 2, __ "Reallocating\n%!"); + Lvl1.add a i nline; + Lvl1.add nline j v else Lvl1.add line j v @@ -113,7 +127,7 @@ struct type 'a t = 'a Lvl2.t Lvl1.t let create n a = - let dummy1 = Lvl2.create 0 a in + let dummy1 = Lvl2.create 512 a in { Lvl1.line = Array.create n dummy1; Lvl1.offset = ~-1; Lvl1.dummy = dummy1; diff --git a/src/main.ml b/src/main.ml index 42c8822..2349ad1 100644 --- a/src/main.ml +++ b/src/main.ml @@ -78,7 +78,7 @@ let main v query_string output = let module R = ResJIT.Count in let module M = Runtime.Make(R) in (* mk_runtime run auto doc arg count print outfile *) - mk_runtime M.top_down_run auto v Tree.root R.NS.length R.NS.serialize None + mk_runtime M.twopass_top_down_run auto v Tree.root R.NS.length R.NS.serialize None else let module R = ResJIT.Mat in let module M = Runtime.Make(R) in diff --git a/src/resJIT.ml b/src/resJIT.ml index 64c9624..25502b6 100644 --- a/src/resJIT.ml +++ b/src/resJIT.ml @@ -292,11 +292,11 @@ DEFINE EXEC_REC_TEMPLATE = end) DEFINE EXEC_TEMPLATE = - (TRACE("res-jit", 3, __ "Node %i:\n" (Node.to_int t)); +(* (TRACE("res-jit", 3, __ "Node %i:\n" (Node.to_int t)); TRACE("res-jit", 3, __ " LEFT : %a\n" pr_slot slot1); - TRACE("res-jit", 3, __ " RIGHT : %a\n" pr_slot slot2); + TRACE("res-jit", 3, __ " RIGHT : %a\n" pr_slot slot2); *) exec slot slot1 slot2 t code; - TRACE("res-jit", 3, __ " RES : %a\n\n%!" pr_slot slot)) +(* TRACE("res-jit", 3, __ " RES : %a\n\n%!" pr_slot slot))*) module type S = diff --git a/src/runtime.ml b/src/runtime.ml index 5657f20..b4de9cf 100644 --- a/src/runtime.ml +++ b/src/runtime.ml @@ -9,7 +9,8 @@ module type S = sig val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set val grammar_run : Ata.t -> Grammar2.t -> unit -> result_set - + val naive_top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set + val twopass_top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set end module Make (U : ResJIT.S) : S with type result_set = U.NS.t = @@ -42,6 +43,8 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = let eval_trans auto s1 s2 trans = + TRACE("top-down-run", 2, __ "Evaluating transition list:\n%!"); + TRACE("top-down-run", 2, __ "%a\n%!" Translist.print trans); Translist.fold (fun t ((a_st, a_op, a_todo) as acc)-> let q, _, m, f = Transition.node t in @@ -101,7 +104,7 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = in let ns1 = StateSet.inter s1 orig_s1 and ns2 = StateSet.inter s2 orig_s2 in - let res, ops, todo = eval_trans auto ns1 ns2 trl in + let res, ops, todo = eval_trans auto orig_s1 orig_s2 trl in let code, not_marking = ResJIT.compile ops in let todo_code, todo_notmarking = List.fold_left (fun (l, b) (p, q, o) -> let c, b' = ResJIT.compile o in @@ -127,6 +130,7 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = else sl1 else sl2 in + eprintf "Here 1\n%!"; U.exec sl sl1 sl2 node code; res, sl end @@ -138,6 +142,7 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = else sl1 else sl2 in + eprintf "Here 2\n%!"; U.exec sl sl1 sl2 node code; res, sl end @@ -198,9 +203,6 @@ DEFINE LOOP_TAG (t, states, tag, ctx) = ( l2jit_dispatch _t (tag) (states) (ctx) (L2JIT.find cache2 (tag) (states))) -DEFINE LOOP(t, states, ctx) = loop (t) (states) (ctx) -DEFINE LOOP_TAG(t, states, tag, ctx) = loop_tag (t) (states) (ctx) (tag) - let top_down_run auto tree root states ctx = let res_len = StateSet.max_elt auto.states + 1 in let empty_slot = Array.create res_len U.NS.empty in @@ -222,23 +224,13 @@ DEFINE LOOP_TAG(t, states, tag, ctx) = loop_tag (t) (states) (ctx) (tag) in let cache2 = L2JIT.create () in - let rec loop t states ctx = - if t == Tree.nil then nil_res - else - let tag = Tree.tag tree t in - l2jit_dispatch - t tag (states) (ctx) (L2JIT.find cache2 tag (states)) - and loop_tag t states ctx tag = - if t == Tree.nil then nil_res - else - l2jit_dispatch - t (tag) (states) (ctx) (L2JIT.find cache2 (tag) (states)) - and l2jit_dispatch t tag states ctx opcode = + let rec l2jit_dispatch t tag states ctx opcode = match opcode with | L2JIT.RETURN -> nil_res | L2JIT.CACHE -> - let opcode = L2JIT.compile cache2 auto tree tag states in - l2jit_dispatch t tag states ctx opcode + eprintf "New configuration\n%!"; + let opcode = L2JIT.compile cache2 auto tree tag states in + l2jit_dispatch t tag states ctx opcode | L2JIT.LEFT (tr_list, instr) -> let res1, slot1 = @@ -656,6 +648,206 @@ let dispatch_param1 conf id2 y0 y1 = ;; + (* Slow reference top-down implementation *) + let naive_top_down auto tree root states ctx = + let res_len = StateSet.max_elt auto.states + 1 in + let empty_slot = Array.create res_len U.NS.empty in + let nil_res = auto.bottom_states, empty_slot in + let cache3 = L3JIT.create () in + let l3jit_dispatch trl s1 s2 t sl1 sl2 = + let f = L3JIT.find cache3 trl s1 s2 in + if f == L3JIT.dummy then (L3JIT.cache_apply cache3 auto trl s1 s2) empty_slot sl1 sl2 tree t + else f empty_slot sl1 sl2 tree t + in + let dummy = Translist.nil, StateSet.singleton State.dummy, StateSet.singleton State.dummy in + let cache2 = Cache.Lvl2.create 512 dummy in + let rec loop t states ctx = + if states == StateSet.empty then nil_res + else if t == Tree.nil then (*StateSet.inter states auto.bottom_states, empty_slot *) nil_res + else + let tag = Tree.tag tree t in + + let trans, lstates, rstates = + let c = Cache.Lvl2.find cache2 (Uid.to_int states.StateSet.Node.id) tag in + if c == dummy then + let c = Ata.get_trans auto states tag in + Cache.Lvl2.add cache2 (Uid.to_int states.StateSet.Node.id) tag c; + c + else c + in + let s1, res1 = loop (Tree.first_child tree t) lstates ctx + and s2, res2 = loop (Tree.next_sibling tree t) rstates ctx in + l3jit_dispatch trans s1 s2 t res1 res2 + in + loop root states ctx + + + + + let naive_top_down_run auto tree root = + let res, slot = naive_top_down auto tree root auto.init (Tree.closing tree root) in + slot.(StateSet.min_elt auto.topdown_marking_states) + + + + let eval_form auto s1 s2 f = + let rec loop f = + match Formula.expr f with + | Formula.False | Formula.True | Formula.Pred _ -> f + | Formula.Atom(`Left, b, q) -> + Formula.of_bool (b == (StateSet.mem q s1)) + | Formula.Atom (`Right, b, q) -> + Formula.of_bool(b == (StateSet.mem q s2)) + | Formula.Atom (`Epsilon, _, _) -> assert false + + | Formula.Or(f1, f2) -> + let b1 = loop f1 in + let b2 = loop f2 in + Formula.or_pred b1 b2 + | Formula.And(f1, f2) -> + let b1 = loop f1 in + let b2 = loop f2 in + Formula.and_pred b1 b2 + in + loop f + + let eval_trans auto s1 s2 trans = + Translist.fold + (fun t ((a_st, mark) as acc)-> + let q, _, m, f = Transition.node t in + let form = eval_form auto s1 s2 f in + match Formula.expr form with + | Formula.True -> StateSet.add q a_st, mark || m + | Formula.False -> acc + | _ -> assert false + ) trans (StateSet.empty, false) + + + let set a i v = + TRACE("twopass", 2, __ "Setting node %i to state %a\n%!" + i StateSet.print v); + a.(i) <- v + + let twopass_top_down states_array auto tree root states ctx = + let dummy3 = StateSet.singleton State.dummy in + let cache3 = Cache.Lvl3.create 512 dummy3 in + let dummy2 = Translist.nil, StateSet.singleton State.dummy, StateSet.singleton State.dummy in + let cache2 = Cache.Lvl2.create 512 dummy2 in + let attributes = TagSet.inj_positive (Tree.attribute_tags tree) in + let rec loop t states ctx = + if t == Tree.nil then auto.bottom_states + else if states == StateSet.empty then + let () = set states_array (Node.to_int t) auto.bottom_states in + auto.bottom_states + else + let tag = Tree.tag tree t in + TRACE("twopass", 2, __ "Traversing node %i (tag %s) in states %a\n%!" (Node.to_int t) (Tag.to_string tag) + StateSet.print states + ); + let trans, lstates, rstates = + let c = Cache.Lvl2.find cache2 (Uid.to_int states.StateSet.Node.id) tag in + if c == dummy2 then + let c = Ata.get_trans ~attributes:attributes auto states tag in + Cache.Lvl2.add cache2 (Uid.to_int states.StateSet.Node.id) tag c; + c + else c + in + TRACE("twopass", 2, __ "\nTransitions are:\n%!"); + TRACE("twopass", 2, __ "\nTransitions are:\n%a\n%!" + Translist.print trans + ); + let s1 = loop (Tree.first_child tree t) lstates ctx + and s2 = loop (Tree.next_sibling tree t) rstates ctx in + let st = + let c = Cache.Lvl3.find cache3 + (Uid.to_int s1.StateSet.Node.id) + (Uid.to_int s2.StateSet.Node.id) + (Uid.to_int trans.Translist.Node.id) + in + if c == dummy3 then + let c, _ = eval_trans auto s1 s2 trans in + Cache.Lvl3.add cache3 + (Uid.to_int s1.StateSet.Node.id) + (Uid.to_int s2.StateSet.Node.id) + (Uid.to_int trans.Translist.Node.id) c;c + else c + in + set states_array (Node.to_int t) st; + st + in + loop root states ctx, (dummy2, cache2) + + + type action = Nop | Mark | Dummy + + let twopass_top_down_scan states_array (dummy2, cache2) auto tree root states ctx = + let attributes = TagSet.inj_positive (Tree.attribute_tags tree) in + let cache3 = Cache.Lvl3.create 512 Dummy in + let rec loop t states acc = + if states == StateSet.empty || t = Tree.nil then acc + else + let tag = Tree.tag tree t in + let trans, _, _ = + let c = Cache.Lvl2.find cache2 (Uid.to_int states.StateSet.Node.id) tag in + if c == dummy2 then + let c = Ata.get_trans ~attributes:attributes auto states tag in + Cache.Lvl2.add cache2 (Uid.to_int states.StateSet.Node.id) tag c; + c + else c + in + let fs = Tree.first_child tree t in + let ns = Tree.next_sibling tree t in + let s1 = if fs != Tree.nil then states_array.(Node.to_int fs) else auto.bottom_states + and s2 = if ns != Tree.nil then states_array.(Node.to_int ns) else auto.bottom_states + in + let mark = + let c = Cache.Lvl3.find cache3 + (Uid.to_int s1.StateSet.Node.id) + (Uid.to_int s2.StateSet.Node.id) + (Uid.to_int trans.Translist.Node.id) + in + if c == Dummy then + let _, c = eval_trans auto s1 s2 trans in + let c = if c then Mark else Nop in + Cache.Lvl3.add cache3 + (Uid.to_int s1.StateSet.Node.id) + (Uid.to_int s2.StateSet.Node.id) + (Uid.to_int trans.Translist.Node.id) c;c + else c + in + TRACE("twopass", 2, __ "Evaluating node %i (tag %s).\n%!States=%a\n%!" + (Node.to_int t) + (Tag.to_string tag) + StateSet.print states + ); + TRACE("twopass", 2, __ "Translist=%a\nLeft=%a\nRight=%a\nMark=%s\n\n%!" + Translist.print trans + StateSet.print s1 + StateSet.print s2 + (match mark with + Dummy -> "Dummy" + | Mark -> "Mark" + | Nop -> "Nop")); + if mark == Mark then + loop ns s2 (loop fs s1 (U.NS.snoc acc t)) + else + loop ns s2 (loop fs s1 acc) + in + loop root states U.NS.empty + + let twopass_top_down_run auto tree root = + let len = Node.to_int (Tree.closing tree root) + 1 in + TRACE("twopass", 2, __ "Creating array of size: %i\n%!" len); + let states_array = Array.make len StateSet.empty in + let _, cache = + twopass_top_down states_array auto tree root auto.init Tree.nil + in + twopass_top_down_scan states_array cache auto tree root auto.init Tree.nil + + + + + diff --git a/src/runtime.mli b/src/runtime.mli index 0ce04ef..c499017 100644 --- a/src/runtime.mli +++ b/src/runtime.mli @@ -3,6 +3,8 @@ module type S = sig val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set val grammar_run : Ata.t -> Grammar2.t -> unit -> result_set + val naive_top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set + val twopass_top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set end module Make (U : ResJIT.S) : S with type result_set = U.NS.t diff --git a/src/tag.ml b/src/tag.ml index effe9c3..ddaa89d 100644 --- a/src/tag.ml +++ b/src/tag.ml @@ -11,7 +11,7 @@ type t = int external to_int : t -> int = "%identity" type operations = { tag : string -> t; - to_string : t ->string; + to_string : t -> string; translate : t -> t } diff --git a/src/tracer.ml b/src/tracer.ml index aaa38d3..17e0540 100644 --- a/src/tracer.ml +++ b/src/tracer.ml @@ -3,7 +3,7 @@ open Format type tracer = string type level = int -let tracers = [ "top-down-run"; "top-down-approx"; "result-set"; "level2-jit"; "res-jit"; "grammar" ] +let tracers = [ "top-down-run"; "top-down-approx"; "result-set"; "level2-jit"; "res-jit"; "grammar"; "twopass" ] let active_tracers : (tracer, int) Hashtbl.t = Hashtbl.create 17 let available () = tracers diff --git a/src/transition.ml b/src/transition.ml index e05fd42..9f03aa0 100644 --- a/src/transition.ml +++ b/src/transition.ml @@ -48,8 +48,10 @@ let format_list l = let make_str f x = let b = Buffer.create 10 in let fmt = formatter_of_buffer b in - fprintf fmt "@[%a@]@?" f x; - Buffer.contents b + pp_print_flush fmt (); + fprintf fmt "%a" f x; + pp_print_flush fmt (); + Buffer.contents b in let str_trans t = let lhs = make_str print_lhs t diff --git a/src/translist.ml b/src/translist.ml index 77c4cb5..f3bcccd 100644 --- a/src/translist.ml +++ b/src/translist.ml @@ -1,5 +1,5 @@ - include Hlist.Make(Transition) - let print ppf fl = - let l = fold (fun t acc -> t :: acc) fl [] in - let strings = Transition.format_list l in - List.iter (fun s -> Format.fprintf ppf "%s\n%!" s) strings +include Hlist.Make(Transition) +let print ppf fl = + let l = fold (fun t acc -> t :: acc) fl [] in + let strings = Transition.format_list l in + List.iter (fun s -> Format.fprintf ppf "%s\n%!" s) strings diff --git a/src/tree.ml b/src/tree.ml index 9c6eff3..9b3d4bc 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -170,10 +170,22 @@ end type bit_vector = string external bool_of_int : int -> bool = "%identity" +external int_of_bool : bool -> int = "%identity" let bit_vector_unsafe_get v i = bool_of_int (((Char.code (String.unsafe_get v (i lsr 3))) lsr (i land 7)) land 1) +let chr (c:int) : char = Obj.magic (c land 0xff) +let bit_vector_unsafe_set v i b = + let j = i lsr 3 in + let c = Char.code v.[j] in + let bit = int_of_bool b in + let mask = bit lsl (i land 7) in + if b then v.[j] <- chr (c lor mask) else v.[j] <- (chr (c land (lnot mask))) + +let bit_vector_create n = + let len = if n <= 0 then 0 else (n - 1) / 8 + 1 in + String.make len '\000' type t = { doc : tree; diff --git a/src/tree.mli b/src/tree.mli index d329b5a..0946a4c 100644 --- a/src/tree.mli +++ b/src/tree.mli @@ -86,3 +86,8 @@ val full_text_suffix : t -> string -> node array val full_text_equals : t -> string -> node array val full_text_contains : t -> string -> node array + +type bit_vector +val bit_vector_create : int -> bit_vector +val bit_vector_unsafe_set : bit_vector -> int -> bool -> unit +val bit_vector_unsafe_get : bit_vector -> int -> bool diff --git a/tests/perf_tests/xmark_10.xml.queries b/tests/perf_tests/xmark_10.xml.queries index 73d4a2a..1d44242 100644 --- a/tests/perf_tests/xmark_10.xml.queries +++ b/tests/perf_tests/xmark_10.xml.queries @@ -4,7 +4,7 @@ /child::site/child::closed_auctions/child::closed_auction/child::annotation/child::description/child::text/child::keyword /descendant::listitem/descendant::keyword /descendant::closed_auction/descendant::keyword -/site/closed_auctions/closed_auction/descendant::keyword +/child::site/child::closed_auctions/child::closed_auction/descendant::keyword /child::site/child::closed_auctions/child::closed_auction[child::annotation/child::description/child::text/child::keyword]/child::date /child::site/child::closed_auctions/child::closed_auction[descendant::keyword]/child::date /child::site/child::people/child::person[child::profile/child::gender and child::profile/child::age]/child::name @@ -13,13 +13,13 @@ /descendant::listitem[not(descendant::keyword/child::emph)]/descendant::parlist /descendant::listitem[ (descendant::keyword or descendant::emph) and (descendant::emph or descendant::bold)]/child::parlist /descendant::people[ descendant::person[not(child::address)] and descendant::person[not(child::watches)]]/child::person[child::watches] -/site/regions/europe/item/mailbox/mail/text/keyword -/site/closed_auctions/closed_auction/annotation/description/parlist/listitem -/site/closed_auctions/closed_auction/annotation/description/parlist/listitem/parlist/listitem/*/descendant::keyword -/site/regions/*/item/descendant::keyword -/site/regions/*/person[ address and (phone or homepage) ] +/child::site/child::regions/child::europe/child::item/child::mailbox/child::mail/child::text/child::keyword +/child::site/child::closed_auctions/child::closed_auction/child::annotation/child::description/child::parlist/child::listitem +/child::site/child::closed_auctions/child::closed_auction/child::annotation/child::description/child::parlist/child::listitem/child::parlist/child::listitem/child::*/descendant::keyword +/child::site/child::regions/child::*/child::item/descendant::keyword +/child::site/child::regions/child::*/child::person[ child::address and (child::phone or child::homepage) ] /descendant::listitem[ descendant::keyword and descendant::emph]/descendant::parlist -/site/regions/*/item[ mailbox/mail/date ]/mailbox/mail +/child::site/child::regions/child::*/child::item[ child::mailbox/child::mail/child::date ]/child::mailbox/child::mail /child::*[ descendant::* ] /descendant::* /descendant::*/descendant::* -- 2.17.1