INCLUDE "debug.ml" INCLUDE "trace.ml" INCLUDE "utils.ml" open Format open Ata module type S = sig type result_set val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set val grammar_run : Ata.t -> Grammar2.t -> unit -> result_set end module Make (U : ResJIT.S) : S with type result_set = U.NS.t = struct type result_set = U.NS.t;; let eval_form auto s1 s2 f = let rec loop f = match Formula.expr f with | Formula.False | Formula.True | Formula.Pred _ -> f, [] | Formula.Atom(`Left, b, q) -> Formula.of_bool (b == (StateSet.mem q s1)), if b && StateSet.mem q auto.topdown_marking_states then [ResJIT.LEFT q] else [] | Formula.Atom (`Right, b, q) -> Formula.of_bool(b == (StateSet.mem q s2)), if b && StateSet.mem q auto.topdown_marking_states then [ResJIT.RIGHT q] else [] | Formula.Atom (`Epsilon, _, _) -> assert false | Formula.Or(f1, f2) -> let b1, i1 = loop f1 in let b2, i2 = loop f2 in Formula.or_pred b1 b2, i1 @ i2 | Formula.And(f1, f2) -> let b1, i1 = loop f1 in let b2, i2 = loop f2 in Formula.and_pred b1 b2, i1 @ i2 in loop f let eval_trans auto s1 s2 trans = Translist.fold (fun t ((a_st, a_op, a_todo) as acc)-> let q, _, m, f = Transition.node t in let form, ops = eval_form auto s1 s2 f in match Formula.expr form with | Formula.True -> StateSet.add q a_st, (q, (if m then (ResJIT.SELF() :: ops) else ops)):: a_op, a_todo | Formula.False -> acc | Formula.Pred p -> a_st, a_op, (p.Tree.Predicate.node, q, [(q,(if m then (ResJIT.SELF() :: ops) else ops))]) :: a_todo | _ -> assert false ) trans (StateSet.empty, [], []) module L3JIT = struct type opcode = (t -> t -> t -> Tree.t -> Tree.node -> StateSet.t * t) type t = opcode Cache.Lvl3.t let dummy _ _ _ _ _ = failwith "Uninitialized L3JIT" let create () = Cache.Lvl3.create 1024 dummy let find t tlist s1 s2 = Cache.Lvl3.find t (Uid.to_int s2.StateSet.Node.id) (Uid.to_int s1.StateSet.Node.id) (Uid.to_int tlist.Translist.Node.id) let add t tlist s1 s2 v = Cache.Lvl3.add t (Uid.to_int s2.StateSet.Node.id) (Uid.to_int s1.StateSet.Node.id) (Uid.to_int tlist.Translist.Node.id) v let compile auto trl s1 s2 = let orig_s1, orig_s2 = Translist.fold (fun t (a1, a2) -> let _, _, _, f = Transition.node t in let (_, _, fs1), (_, _, fs2) = Formula.st f in (StateSet.union a1 fs1, StateSet.union a2 fs2) ) trl (StateSet.empty, StateSet.empty) in let ns1 = StateSet.inter s1 orig_s1 and ns2 = StateSet.inter s2 orig_s2 in let res, ops, todo = eval_trans auto ns1 ns2 trl in let code, not_marking = ResJIT.compile ops in let todo_code, todo_notmarking = List.fold_left (fun (l, b) (p, q, o) -> let c, b' = ResJIT.compile o in (p, q, c)::l, b && b') ([], not_marking) todo in let opcode = res, code, todo_notmarking, todo_code in opcode let gen_code auto tlist s1 s2 = let res, code, not_marking, todo_code = compile auto tlist s1 s2 in let f = if todo_code == [] then if not_marking then begin fun empty_slot sl1 sl2 _ node -> let slot1_empty = sl1 == empty_slot and slot2_empty = sl2 == empty_slot in if slot1_empty && slot2_empty then res,sl2 else let sl = if slot2_empty then if slot1_empty then Array.copy empty_slot else sl1 else sl2 in U.exec sl sl1 sl2 node code; res, sl end else (* marking *) begin fun empty_slot sl1 sl2 _ node -> let sl = if sl2 == empty_slot then if sl1 == empty_slot then Array.copy empty_slot else sl1 else sl2 in U.exec sl sl1 sl2 node code; res, sl end else (* todo != [] *) begin fun empty_slot sl1 sl2 tree node -> let sl = if sl2 == empty_slot then if sl1 == empty_slot then Array.copy empty_slot else sl1 else sl2 in U.exec sl sl1 sl2 node code; List.fold_left (fun ares (p, q, code) -> if !p tree node then begin if code != ResJIT.Nil then U.exec sl sl1 sl2 node code; StateSet.add q ares end else ares) res todo_code, sl end in f let cache_apply cache auto tlist s1 s2 = let f = gen_code auto tlist s1 s2 in TRACE("grammar", 2, __ "Inserting: %i, %a, %a\n%!" (Uid.to_int tlist.Translist.Node.id) StateSet.print s1 StateSet.print s2); add cache tlist s1 s2 f; f end DEFINE LOOP (t, states, ctx) = ( let _t = (t) in TRACE("top-down-run", 3, __ "Entering node %i with loop (tag %s, context %i) with states %a\n%!" (Node.to_int _t) (Tag.to_string (Tree.tag tree _t)) (Node.to_int (ctx)) (StateSet.print) (states)); if _t == Tree.nil then nil_res else let tag = Tree.tag tree _t in l2jit_dispatch _t tag (states) (ctx) (L2JIT.find cache2 tag (states)) ) DEFINE LOOP_TAG (t, states, tag, ctx) = ( let _t = (t) in (* to avoid duplicating expression t *) TRACE("top-down-run", 3, __ "Entering node %i with loop_tag (tag %s, context %i) with states %a\n%!" (Node.to_int _t) (Tag.to_string (tag)) (Node.to_int (ctx)) (StateSet.print) (states)); if _t == Tree.nil then nil_res else l2jit_dispatch _t (tag) (states) (ctx) (L2JIT.find cache2 (tag) (states))) let top_down_run auto tree root states ctx = let res_len = StateSet.max_elt auto.states + 1 in let empty_slot = Array.create res_len U.NS.empty in let nil_res = auto.bottom_states, empty_slot in let cache3 = L3JIT.create () in let l3jit_dispatch trl s1 s2 t sl1 sl2 = let f = L3JIT.find cache3 trl s1 s2 in if f == L3JIT.dummy then (L3JIT.cache_apply cache3 auto trl s1 s2) empty_slot sl1 sl2 tree t else f empty_slot sl1 sl2 tree t in let cache2 = L2JIT.create () in let rec l2jit_dispatch t tag states ctx opcode = match opcode with | L2JIT.RETURN -> nil_res | L2JIT.CACHE -> let opcode = L2JIT.compile cache2 auto tree tag states in l2jit_dispatch t tag states ctx opcode | L2JIT.LEFT (tr_list, instr) -> let res1, slot1 = l2jit_dispatch_instr t tag states (Tree.closing tree t) instr in l3jit_dispatch tr_list res1 auto.bottom_states t slot1 empty_slot | L2JIT.RIGHT (tr_list, instr) -> let res2, slot2 = l2jit_dispatch_instr t tag states ctx instr in l3jit_dispatch tr_list auto.bottom_states res2 t empty_slot slot2 | L2JIT.BOTH (tr_list, instr1, instr2) -> let res1, slot1 = l2jit_dispatch_instr t tag states (Tree.closing tree t) instr1 in let res2, slot2 = l2jit_dispatch_instr t tag states ctx instr2 in l3jit_dispatch tr_list res1 res2 t slot1 slot2 and l2jit_dispatch_instr t tag states ctx instr = match instr with | L2JIT.NOP () -> nil_res | L2JIT.FIRST_CHILD s -> LOOP ((Tree.first_child tree t), s, ctx) | L2JIT.NEXT_SIBLING s -> LOOP ((Tree.next_sibling tree t), s, ctx) | L2JIT.FIRST_ELEMENT s -> LOOP ((Tree.first_element tree t), s, ctx) | L2JIT.NEXT_ELEMENT s -> LOOP ((Tree.next_element tree t), s, ctx) | L2JIT.TAGGED_DESCENDANT (s, tag) -> LOOP_TAG ((Tree.tagged_descendant tree t tag), s, tag, ctx) | L2JIT.TAGGED_FOLLOWING (s, tag) -> LOOP_TAG((Tree.tagged_following_before tree t tag ctx), s, tag, ctx) | L2JIT.SELECT_DESCENDANT (s, _, us) -> LOOP((Tree.select_descendant tree t us), s, ctx) | L2JIT.SELECT_FOLLOWING (s, pt, us) -> LOOP ((Tree.select_following_before tree t us ctx), s, ctx) | L2JIT.TAGGED_CHILD (s, tag) -> LOOP_TAG((Tree.tagged_child tree t tag), s, tag, ctx) | L2JIT.TAGGED_FOLLOWING_SIBLING (s, tag) -> LOOP_TAG((Tree.tagged_following_sibling tree t tag), s, tag, ctx) | L2JIT.SELECT_CHILD (s, _, us) -> LOOP ((Tree.select_child tree t us), s, ctx) | L2JIT.SELECT_FOLLOWING_SIBLING (s, _, us) -> LOOP ((Tree.select_following_sibling tree t us), s, ctx) | L2JIT.TAGGED_SUBTREE(s, tag) -> let count = U.NS.subtree_tags tree t tag in if count != U.NS.empty then let r = Array.copy empty_slot in r.(auto.last) <- count; s,r else s,empty_slot | L2JIT.ELEMENT_SUBTREE(s) -> let count = U.NS.subtree_elements tree t in if count != U.NS.empty then let r = Array.copy empty_slot in r.(auto.last) <- count; s,r else s,empty_slot in let r = LOOP (root, states, ctx) in (*L3JIT.stats err_formatter cache3; *) r let full_top_down_run auto states tree root = (*Ata.init (); *) top_down_run auto tree root states (Tree.closing tree root) let top_down_run auto tree root = (*Ata.init (); *) let res, slot = full_top_down_run auto auto.init tree root in slot.(StateSet.min_elt auto.topdown_marking_states) (*** Bottom-up evaluation function **) let ns_print fmt t = Format.fprintf fmt "{ "; U.NS.iter begin fun node -> Format.fprintf fmt "%a " Node.print node; end t; Format.fprintf fmt "}" let slot_print fmt t = Array.iteri begin fun state ns -> Format.eprintf "%a -> %a\n" State.print state ns_print ns; end t let eval_trans auto tree parent res1 res2 = assert false let bottom_up_run auto tree (query, pat) = let leaves = Array.to_list (Tree.full_text_query query tree pat) in let states = auto.states in let res_len = (StateSet.max_elt states) + 1 in let empty_slot = Array.create res_len U.NS.empty in let nil_res = auto.bottom_states, empty_slot in let cache = Cache.Lvl3.create 1024 L3JIT.dummy in let rec loop_leaves l acc = match l with [] -> acc | node :: ll -> let res, lll = bottom_up_next node ll Tree.nil in if (lll <> []) then Printf.eprintf "Leftover elements\n%!"; res and bottom_up_next node rest stop = let fs = Tree.first_child tree node in let res1 = if fs == Tree.nil then nil_res else full_top_down_run auto states tree fs in move_up node res1 true rest stop and move_up node res is_left rest stop = if node == stop then res, rest else let prev_sibling = Tree.prev_sibling tree node in let is_left' = prev_sibling == Tree.nil in let real_parent = Tree.parent tree node in let parent = if is_left' then real_parent else max (Tree.first_child tree real_parent) stop in (* let parent = if is_left' then Tree.parent tree node else prev_sibling in *) let (s1, sl1), (s2, sl2), rest' = if is_left then match rest with [] -> res, nil_res, rest | next :: rest' -> if Tree.is_right_descendant tree node next then let res2, rest' = bottom_up_next next rest' node in res, res2, rest' else res, nil_res, rest else nil_res, res, rest in let tag = Tree.tag tree node in let id1 = Uid.to_int s1.StateSet.Node.id in let id2 = Uid.to_int s2.StateSet.Node.id in let code = let code = Cache.Lvl3.find cache tag id1 id2 in if code == L3JIT.dummy then let trl = StateSet.fold (fun q acc -> List.fold_left (fun acc' (labels, tr) -> if labels == TagSet.any || TagSet.mem tag labels then Translist.cons tr acc' else acc') acc (Hashtbl.find auto.trans q) ) states Translist.nil in let code = L3JIT.gen_code auto trl s1 s2 in Cache.Lvl3.add cache tag id1 id2 code; code else code in let res' = code empty_slot sl1 sl2 tree node in move_up parent res' is_left' rest' stop in let _, slot = loop_leaves leaves (nil_res) in slot.(StateSet.min_elt auto.topdown_marking_states) let get_trans g auto tag states = StateSet.fold (fun q tr_acc -> List.fold_left (fun ((lstates, rstates, tacc) as acc) (ts, trs) -> if TagSet.mem (Tag.translate tag) ts then if not (TagSet.mem Tag.attribute ts) && Grammar2.is_attribute g tag then acc else let _, _, _, phi = Transition.node trs in let (_,_,l), (_,_,r) = Formula.st phi in (StateSet.union l lstates, StateSet.union r rstates, Translist.cons trs tacc) else acc) tr_acc (Hashtbl.find auto.trans q) ) states (StateSet.empty, StateSet.empty, Translist.nil) (* Grammar run *) let dispatch_param0 conf id2 y0 y1 = match conf with | Grammar2.C0 | Grammar2.C2 -> Grammar2.Node0 id2 | Grammar2.C1 | Grammar2.C5 -> Grammar2.Node1(id2,y0) | Grammar2.C3 | Grammar2.C6 -> y0 | Grammar2.C4 -> Grammar2.Node2(id2, y0, y1) let dispatch_param1 conf id2 y0 y1 = match conf with | Grammar2.C2 -> y0 | Grammar2.C3 -> Grammar2.Node0 id2 | Grammar2.C5 -> y1 | Grammar2.C6 -> Grammar2.Node1(id2, y1) | _ -> Grammar2.dummy_param module K_down = struct type t = Grammar2.n_symbol * StateSet.t let hash (x,y) = HASHINT2(Node.to_int x, Uid.to_int y.StateSet.Node.id) let equal (x1,y1) (x2,y2) = x1 == x2 && y1 == y2 end module K_up = struct type t = Grammar2.n_symbol * StateSet.t * StateSet.t * StateSet.t let hash (a,b,c,d) = HASHINT4 (Node.to_int a, Uid.to_int b.StateSet.Node.id, Uid.to_int c.StateSet.Node.id, Uid.to_int d.StateSet.Node.id) let equal (a1, b1, c1, d1) (a2, b2, c2, d2) = a1 == a2 && b1 == b2 && c1 == c2 && d1 == d2 end module DCache = struct include Hashtbl.Make(K_down) let dummy = StateSet.singleton State.dummy let notfound l = l.(0) == dummy && l.(1) == dummy let find h k = try find h k with Not_found -> let a = [| dummy; dummy |] in add h k a; a end module UCache = Hashtbl.Make(K_up) type result = { in0 : StateSet.t; in1 : StateSet.t; out0 : StateSet.t * U.t; out1 : StateSet.t * U.t; main : StateSet.t * U.t } let mk_empty e = { in0 = StateSet.empty; in1 = StateSet.empty; out0 = e; out1 = e; main = e } let mk_nil s v = { mk_empty (s,v) with out0 = StateSet.empty,v; out1 = StateSet.empty,v; } let grammar_run auto g () = let dummy_leaf = Grammar2.dummy_param in let dummy_set = StateSet.singleton State.dummy in let res_len = (StateSet.max_elt auto.states) + 1 in let empty_slot = Array.create res_len U.NS.empty in let nil_res = mk_nil auto.bottom_states empty_slot in let empty_res = mk_empty (StateSet.empty, empty_slot) in let cache3 = L3JIT.create () in let dummy2 = (StateSet.empty, StateSet.empty, Translist.nil) in let cache2 = Cache.Lvl2.create 512 dummy2 in let rule_counter = ref 0 in let preorder_counter = ref 0 in let dcache = DCache.create 1023 in let ucache = UCache.create 1023 in let term_array = [| StateSet.empty; StateSet.empty |] in let get_trans tag states = let c = Cache.Lvl2.find cache2 tag (Uid.to_int states.StateSet.Node.id) in if c == dummy2 then let c = get_trans g auto tag states in begin Cache.Lvl2.add cache2 tag (Uid.to_int states.StateSet.Node.id) c; c end else c in let lambda = ref 0 in let rec start_loop idx states = TRACE("grammar", 2, __ "Node %i\n%!" (Node.to_int idx)); if states == dummy_set then nil_res else if idx < Node.null then nil_res else begin let symbol = Grammar2.start_tag g idx in let fc = Grammar2.start_first_child g idx in let ns = Grammar2.start_next_sibling g fc in if Grammar2.is_terminal g symbol then let t = Grammar2.terminal symbol in terminal_loop t states (Grammar2.Leaf (~-1,0,term_array, fc)) (Grammar2.Leaf (~-1,1,term_array, ns)) else let nt = Grammar2.non_terminal symbol in incr lambda; let lmbd = !lambda in let y0 = (Grammar2.Leaf (lmbd,0, term_array, fc)) and y1 = (Grammar2.Leaf (lmbd,1, term_array, ns)) in rule_loop nt states y0 y1 end and rule_loop (t : Grammar2.n_symbol) states y0 y1 = if t = Node.nil || states == dummy_set then nil_res else let () = incr rule_counter in if !rule_counter land 65535 == 0 then begin Gc.minor() end; let k = (t, states) in let pstates = DCache.find dcache k in let notfound = DCache.notfound pstates in let rhs = Grammar2.get_rule g t in let id1 = Grammar2.get_id1 rhs in let id2 = Grammar2.get_id2 rhs in let conf = Grammar2.get_conf rhs in if notfound then let ny0 = dispatch_param0 conf id2 y0 y1 in let ny1 = dispatch_param1 conf id2 y0 y1 in let res = dispatch_loop id1 states ny0 ny1 in pstates.(0) <- res.in0; pstates.(1) <- res.in1; res (* UCache.add ucache (t, states, fst res.out0, fst res.out1) res.main; let h = Hashtbl.create 7 in for i = 0 to res_len - 1 do Hashtbl.add h (0, i) (snd res.out0).(i); Hashtbl.add h (1, i) (snd res.out1).(i); done; { res with main = ((fst res.main), (U.close h (snd res.main))); } *) else let res0 = partial_loop y0 pstates.(0) in let res1 = partial_loop y1 pstates.(1) in let k2 = (t, states, fst res0.main, fst res1.main) in let s, r = try UCache.find ucache k2 with Not_found -> let ores0 = { res0 with main = fst res0.main, U.var 0 (snd res0.main) } and ores1 = { res1 with main = fst res1.main, U.var 1 (snd res1.main) } in let res = dispatch_loop id1 states (Grammar2.Cache (0,ores0)) (Grammar2.Cache (1, ores1)) in UCache.add ucache k2 res.main; res.main in let h = Hashtbl.create 7 in for i = 0 to res_len - 1 do Hashtbl.add h (0, i) (snd res0.main).(i); Hashtbl.add h (1, i) (snd res1.main).(i); done; { in0 = pstates.(0); in1 = pstates.(1); out0 = res0.main; out1 = res1.main; main = s, U.close h r; } and dispatch_loop id1 states ny0 ny1 = if Grammar2.is_non_terminal g id1 then rule_loop (Grammar2.non_terminal id1) states ny0 ny1 else terminal_loop (Grammar2.terminal id1) states ny0 ny1 and terminal_loop (symbol : Grammar2.t_symbol) states y0 y1 = if symbol == Grammar2.nil_symbol || symbol = Node.nil || states == dummy_set then nil_res else begin let tag = Grammar2.tag symbol in let lst, rst, trans = get_trans tag states in let res0 = partial_loop y0 lst in let res1 = partial_loop y1 rst in let s1, slot1 = res0.main and s2, slot2 = res1.main in let opcode = L3JIT.find cache3 trans s1 s2 in let node = Node.of_int !preorder_counter in incr preorder_counter; let res = if opcode == L3JIT.dummy then (L3JIT.cache_apply cache3 auto trans s1 s2) empty_slot slot1 slot2 (Obj.magic ()) node else opcode empty_slot slot1 slot2 (Obj.magic()) (node) in { in0 = lst; in1 = rst; out0 = res0.main; out1 = res1.main; main = res } end and partial_loop l states = if l == dummy_leaf then nil_res else match l with | Grammar2.Cache (_, r) -> r | Grammar2.Leaf (_,_, _, id) -> start_loop id states | Grammar2.Node0 id -> if (Grammar2.terminal id) == Grammar2.nil_symbol then nil_res else rule_loop (Grammar2.non_terminal id) states dummy_leaf dummy_leaf | Grammar2.Node1 (id, y0) -> rule_loop (Grammar2.non_terminal id) states y0 dummy_leaf | Grammar2.Node2 (id, y0, y1) -> if Grammar2.is_terminal g id then terminal_loop (Grammar2.terminal id) states y0 y1 else rule_loop (Grammar2.non_terminal id) states y0 y1 in let (_, slot) = (start_loop (Node.null) auto.init).main in slot.(StateSet.min_elt auto.topdown_marking_states) ;; end