9 val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set
10 val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set
11 val grammar_run : Ata.t -> Grammar2.t -> unit -> result_set
15 module Make (U : ResJIT.S) : S with type result_set = U.NS.t =
18 type result_set = U.NS.t;;
20 let eval_form auto s1 s2 f =
22 match Formula.expr f with
23 | Formula.False | Formula.True | Formula.Pred _ -> f, []
24 | Formula.Atom(`Left, b, q) ->
25 Formula.of_bool (b == (StateSet.mem q s1)),
26 if b && StateSet.mem q auto.topdown_marking_states then [ResJIT.LEFT q] else []
27 | Formula.Atom (`Right, b, q) ->
28 Formula.of_bool(b == (StateSet.mem q s2)),
29 if b && StateSet.mem q auto.topdown_marking_states then [ResJIT.RIGHT q] else []
30 | Formula.Atom (`Epsilon, _, _) -> assert false
32 | Formula.Or(f1, f2) ->
33 let b1, i1 = loop f1 in
34 let b2, i2 = loop f2 in
35 Formula.or_pred b1 b2, i1 @ i2
36 | Formula.And(f1, f2) ->
37 let b1, i1 = loop f1 in
38 let b2, i2 = loop f2 in
39 Formula.and_pred b1 b2, i1 @ i2
44 let eval_trans auto s1 s2 trans =
46 (fun t ((a_st, a_op, a_todo) as acc)->
47 let q, _, m, f = Transition.node t in
48 let form, ops = eval_form auto s1 s2 f in
49 match Formula.expr form with
52 (q, (if m then (ResJIT.SELF() :: ops) else ops)):: a_op,
54 | Formula.False -> acc
55 | Formula.Pred p -> a_st, a_op,
56 (p.Tree.Predicate.node, q, [(q,(if m then (ResJIT.SELF() :: ops) else ops))]) :: a_todo
58 ) trans (StateSet.empty, [], [])
63 type opcode = (U.t -> U.t -> U.t -> Tree.t -> Tree.node -> StateSet.t * U.t)
65 type t = opcode Cache.Lvl3.t
66 let dummy _ _ _ _ _ = failwith "Uninitialized L3JIT"
69 let show_stats (a : t) =
71 Cache.Lvl3.iteri (fun _ _ _ _ b -> if not b then incr count) a;
72 eprintf "%!L3JIT: %i used entries\n%!" !count
75 let v = Cache.Lvl3.create 1024 dummy in
76 if !Options.verbose then at_exit (fun () -> show_stats v);
79 let find (t : t) tlist s1 s2 =
81 (Uid.to_int s2.StateSet.Node.id)
82 (Uid.to_int s1.StateSet.Node.id)
83 (Uid.to_int tlist.Translist.Node.id)
85 let add (t : t) tlist s1 s2 v =
87 (Uid.to_int s2.StateSet.Node.id)
88 (Uid.to_int s1.StateSet.Node.id)
89 (Uid.to_int tlist.Translist.Node.id)
92 let compile auto trl s1 s2 =
93 let orig_s1, orig_s2 =
94 Translist.fold (fun t (a1, a2) ->
95 let _, _, _, f = Transition.node t in
96 let fs1, fs2 = Formula.st f in
97 (StateSet.union a1 fs1, StateSet.union a2 fs2)
98 ) trl (StateSet.empty, StateSet.empty)
100 let ns1 = StateSet.inter s1 orig_s1
101 and ns2 = StateSet.inter s2 orig_s2 in
102 let res, ops, todo = eval_trans auto ns1 ns2 trl in
103 let code, not_marking = ResJIT.compile ops in
104 let todo_code, todo_notmarking =
105 List.fold_left (fun (l, b) (p, q, o) -> let c, b' = ResJIT.compile o in
106 (p, q, c)::l, b && b')
107 ([], not_marking) todo
109 let opcode = res, code, todo_notmarking, todo_code in
112 let gen_code auto tlist s1 s2 =
113 let res, code, not_marking, todo_code = compile auto tlist s1 s2 in
115 if todo_code == [] then
116 if not_marking then begin fun empty_slot sl1 sl2 _ node ->
117 let slot1_empty = sl1 == empty_slot
118 and slot2_empty = sl2 == empty_slot in
119 if slot1_empty && slot2_empty then res,sl2
124 Array.copy empty_slot
128 U.exec sl sl1 sl2 node code;
131 else (* marking *) begin fun empty_slot sl1 sl2 _ node ->
133 if sl2 == empty_slot then
134 if sl1 == empty_slot then
135 Array.copy empty_slot
139 U.exec sl sl1 sl2 node code;
142 else (* todo != [] *)
143 begin fun empty_slot sl1 sl2 tree node ->
145 if sl2 == empty_slot then
146 if sl1 == empty_slot then
147 Array.copy empty_slot
151 U.exec sl sl1 sl2 node code;
153 (fun ares (p, q, code) ->
154 if !p tree node then begin
155 if code != ResJIT.Nil then U.exec sl sl1 sl2 node code;
158 else ares) res todo_code, sl
164 let cache_apply cache auto tlist s1 s2 =
165 let f = gen_code auto tlist s1 s2 in
166 add cache tlist s1 s2 f; f
169 DEFINE LOOP (t, states, ctx) = (
171 TRACE("top-down-run", 3,
172 __ "Entering node %i with loop (tag %s, context %i) with states %a\n%!"
174 (Tag.to_string (Tree.tag tree _t))
176 (StateSet.print) (states));
177 if _t == Tree.nil then nil_res
179 let tag = Tree.tag tree _t in
181 _t tag (states) (ctx) (L2JIT.find cache2 tag (states))
184 DEFINE LOOP_TAG (t, states, tag, ctx) = (
185 let _t = (t) in (* to avoid duplicating expression t *)
186 TRACE("top-down-run", 3,
187 __ "Entering node %i with loop_tag (tag %s, context %i) with states %a\n%!"
189 (Tag.to_string (tag))
191 (StateSet.print) (states));
192 if _t == Tree.nil then nil_res
195 _t (tag) (states) (ctx) (L2JIT.find cache2 (tag) (states)))
197 DEFINE LOOP(t, states, ctx) = loop (t) (states) (ctx)
198 DEFINE LOOP_TAG(t, states, tag, ctx) = loop_tag (t) (states) (ctx) (tag)
200 let top_down_run auto tree root states ctx =
201 let res_len = StateSet.max_elt auto.states + 1 in
202 let empty_slot = Array.create res_len U.NS.empty in
203 let nil_res = auto.bottom_states, empty_slot in
204 let cache3 = L3JIT.create () in
206 fun s subtree -> if subtree != U.NS.empty then
207 let r = Array.copy empty_slot in
208 r.(auto.last) <- subtree;
213 let l3jit_dispatch trl s1 s2 t sl1 sl2 =
214 let f = L3JIT.find cache3 trl s1 s2 in
215 if f == L3JIT.dummy then (L3JIT.cache_apply cache3 auto trl s1 s2) empty_slot sl1 sl2 tree t
216 else f empty_slot sl1 sl2 tree t
219 let cache2 = L2JIT.create () in
221 let rec loop t states ctx =
222 if t == Tree.nil then nil_res
224 let tag = Tree.tag tree t in
226 t tag (states) (ctx) (L2JIT.find cache2 tag (states))
227 and loop_tag t states ctx tag =
228 if t == Tree.nil then nil_res
231 t (tag) (states) (ctx) (L2JIT.find cache2 (tag) (states))
232 and l2jit_dispatch t tag states ctx opcode =
234 | L2JIT.RETURN -> nil_res
236 let opcode = L2JIT.compile cache2 auto tree tag states in
237 l2jit_dispatch t tag states ctx opcode
239 | L2JIT.LEFT (tr_list, instr) ->
241 l2jit_dispatch_instr t (Tree.closing tree t) instr
243 l3jit_dispatch tr_list res1 auto.bottom_states t slot1 empty_slot
245 | L2JIT.RIGHT (tr_list, instr) ->
247 l2jit_dispatch_instr t ctx instr
249 l3jit_dispatch tr_list auto.bottom_states res2 t empty_slot slot2
251 | L2JIT.BOTH (tr_list, instr1, instr2) ->
253 l2jit_dispatch_instr t (Tree.closing tree t) instr1
256 l2jit_dispatch_instr t ctx instr2
258 l3jit_dispatch tr_list res1 res2 t slot1 slot2
260 and l2jit_dispatch_instr t ctx instr =
262 | L2JIT.FIRST_CHILD s -> LOOP ((Tree.first_child tree t), s, ctx)
263 | L2JIT.NEXT_SIBLING s -> LOOP ((Tree.next_sibling tree t), s, ctx)
265 | L2JIT.FIRST_ELEMENT s -> LOOP ((Tree.first_element tree t), s, ctx)
266 | L2JIT.NEXT_ELEMENT s -> LOOP ((Tree.next_element tree t), s, ctx)
268 | L2JIT.TAGGED_DESCENDANT (s, tag) ->
269 LOOP_TAG ((Tree.tagged_descendant tree t tag), s, tag, ctx)
271 | L2JIT.TAGGED_FOLLOWING (s, tag) ->
272 LOOP_TAG((Tree.tagged_following_before tree t tag ctx), s, tag, ctx)
274 | L2JIT.SELECT_DESCENDANT (s, _, us) ->
275 LOOP((Tree.select_descendant tree t us), s, ctx)
277 | L2JIT.SELECT_FOLLOWING (s, pt, us) ->
278 LOOP ((Tree.select_following_before tree t us ctx), s, ctx)
280 | L2JIT.TAGGED_CHILD (s, tag) ->
281 LOOP_TAG((Tree.tagged_child tree t tag), s, tag, ctx)
283 | L2JIT.TAGGED_FOLLOWING_SIBLING (s, tag) ->
284 LOOP_TAG((Tree.tagged_following_sibling tree t tag), s, tag, ctx)
286 | L2JIT.SELECT_CHILD (s, _, us) ->
287 LOOP ((Tree.select_child tree t us), s, ctx)
289 | L2JIT.SELECT_FOLLOWING_SIBLING (s, _, us) ->
290 LOOP ((Tree.select_following_sibling tree t us), s, ctx)
292 | L2JIT.TAGGED_SUBTREE(s, tag) ->
293 mark_subtree s (U.NS.subtree_tags tree t tag)
295 | L2JIT.ELEMENT_SUBTREE(s) ->
296 mark_subtree s (U.NS.subtree_elements tree t)
298 let r = LOOP (root, states, ctx) in
299 (*L3JIT.stats err_formatter cache3; *)
302 let full_top_down_run auto states tree root =
304 top_down_run auto tree root states (Tree.closing tree root)
306 let top_down_run auto tree root =
308 let res, slot = full_top_down_run auto auto.init tree root in
310 slot.(StateSet.min_elt auto.topdown_marking_states)
313 (*** Bottom-up evaluation function **)
316 Format.fprintf fmt "{ ";
317 U.NS.iter begin fun node ->
318 Format.fprintf fmt "%a " Node.print node;
320 Format.fprintf fmt "}"
322 let slot_print fmt t =
323 Array.iteri begin fun state ns ->
324 Format.eprintf "%a -> %a\n" State.print state ns_print ns;
327 let rec uniq = function
328 | ([] | [ _ ]) as l -> l
329 | e1 :: ((e2 :: ll) as l) -> if e1 == e2 then uniq l
330 else e1 :: e2 :: (uniq ll);;
332 let bottom_up_run auto tree (query, pat) =
333 let array = time ~msg:"Timing text query" (Tree.full_text_query query tree) pat in
334 let leaves = Array.to_list array in
335 let states = auto.states in
336 let res_len = (StateSet.max_elt states) + 1 in
337 let empty_slot = Array.create res_len U.NS.empty in
338 let nil_res = auto.bottom_states, empty_slot in
339 let cache = Cache.Lvl3.create 1024 L3JIT.dummy in
340 let rec loop_leaves l acc =
344 let res, lll = bottom_up_next node ll Tree.nil in
347 eprintf "Leftover nodes: %i\n" (List.length lll);
351 and bottom_up_next node rest stop =
352 let fs = Tree.first_child tree node in
354 if fs == Tree.nil then nil_res
355 else full_top_down_run auto states tree fs
357 move_up node res1 true rest stop
359 and move_up node res is_left rest stop =
360 if node == stop then res, rest
362 let prev_sibling = Tree.prev_sibling tree node in
363 let is_left' = prev_sibling == Tree.nil in
364 let real_parent = Tree.parent tree node in
366 if is_left' then real_parent else max (Tree.first_child tree real_parent) stop
368 (* let parent = if is_left' then Tree.parent tree node else prev_sibling in *)
369 let (s1, sl1), (s2, sl2), rest' =
370 if is_left then match rest with
371 [] -> res, nil_res, rest
373 if Tree.is_right_descendant tree node next
375 let res2, rest' = bottom_up_next next rest' node in
377 else res, nil_res, rest
381 let tag = Tree.tag tree node in
382 let id1 = Uid.to_int s1.StateSet.Node.id in
383 let id2 = Uid.to_int s2.StateSet.Node.id in
385 let code = Cache.Lvl3.find cache tag id1 id2 in
386 if code == L3JIT.dummy then
390 List.fold_left (fun acc' (labels, tr) ->
391 if labels == TagSet.any || TagSet.mem tag labels
392 then Translist.cons tr acc' else acc')
394 (Hashtbl.find auto.trans q)
399 let code = L3JIT.gen_code auto trl s1 s2 in
400 Cache.Lvl3.add cache tag id1 id2 code; code
403 let res' = code empty_slot sl1 sl2 tree node in
404 move_up parent res' is_left' rest' stop
406 let _, slot = loop_leaves leaves (nil_res) in
407 slot.(StateSet.min_elt auto.topdown_marking_states)
409 let get_trans g auto tag states =
410 StateSet.fold (fun q tr_acc ->
412 (fun ((lstates, rstates, tacc) as acc) (ts, trs) ->
413 if TagSet.mem (Tag.translate tag) ts then
414 if not (TagSet.mem Tag.attribute ts) && Grammar2.is_attribute g tag
417 let _, _, _, phi = Transition.node trs in
418 let l, r = Formula.st phi in
419 (StateSet.union l lstates,
420 StateSet.union r rstates,
421 Translist.cons trs tacc)
423 tr_acc (Hashtbl.find auto.trans q)
424 ) states (StateSet.empty, StateSet.empty, Translist.nil)
427 let dispatch_param0 conf id2 y0 y1 =
429 | Grammar2.C0 | Grammar2.C2 -> Grammar2.Node0 id2
430 | Grammar2.C1 | Grammar2.C5 -> Grammar2.Node1(id2,y0)
431 | Grammar2.C3 | Grammar2.C6 -> y0
432 | Grammar2.C4 -> Grammar2.Node2(id2, y0, y1)
434 let dispatch_param1 conf id2 y0 y1 =
437 | Grammar2.C3 -> Grammar2.Node0 id2
439 | Grammar2.C6 -> Grammar2.Node1(id2, y1)
440 | _ -> Grammar2.dummy_param
442 module K_down = struct
443 type t = Grammar2.n_symbol * StateSet.t
444 let hash (x,y) = HASHINT2(Node.to_int x, Uid.to_int y.StateSet.Node.id)
445 let equal (x1,y1) (x2,y2) = x1 == x2 && y1 == y2
449 type t = Grammar2.n_symbol * StateSet.t * StateSet.t * StateSet.t
451 HASHINT4 (Node.to_int a,
452 Uid.to_int b.StateSet.Node.id,
453 Uid.to_int c.StateSet.Node.id,
454 Uid.to_int d.StateSet.Node.id)
455 let equal (a1, b1, c1, d1) (a2, b2, c2, d2) =
456 a1 == a2 && b1 == b2 && c1 == c2 && d1 == d2
461 include Hashtbl.Make(K_down)
462 let dummy = StateSet.singleton State.dummy
463 let notfound l = l.(0) == dummy && l.(1) == dummy
469 let a = [| dummy; dummy |] in
473 module UCache = Hashtbl.Make(K_up)
477 out0 : StateSet.t * U.t;
478 out1 : StateSet.t * U.t;
479 main : StateSet.t * U.t
482 { in0 = StateSet.empty;
483 in1 = StateSet.empty;
491 out0 = StateSet.empty,v;
492 out1 = StateSet.empty,v;
495 let grammar_run auto g () =
496 let dummy_leaf = Grammar2.dummy_param in
497 let dummy_set = StateSet.singleton State.dummy in
498 let res_len = (StateSet.max_elt auto.states) + 1 in
499 let empty_slot = Array.create res_len U.NS.empty in
500 let nil_res = mk_nil auto.bottom_states empty_slot in
501 let empty_res = mk_empty (StateSet.empty, empty_slot) in
502 let cache3 = L3JIT.create () in
503 let dummy2 = (StateSet.empty, StateSet.empty, Translist.nil) in
504 let cache2 = Cache.Lvl2.create 512 dummy2 in
505 let rule_counter = ref 0 in
506 let preorder_counter = ref 0 in
507 let dcache = DCache.create 1023 in
508 let ucache = UCache.create 1023 in
509 let term_array = [| StateSet.empty; StateSet.empty |] in
510 let get_trans tag states =
511 let c = Cache.Lvl2.find cache2 tag (Uid.to_int states.StateSet.Node.id) in
513 let c = get_trans g auto tag states in
515 Cache.Lvl2.add cache2 tag (Uid.to_int states.StateSet.Node.id) c;
520 let lambda = ref 0 in
521 let rec start_loop idx states =
522 TRACE("grammar", 2, __ "Node %i\n%!" (Node.to_int idx));
523 if states == dummy_set then nil_res else
524 if idx < Node.null then nil_res
526 let symbol = Grammar2.start_tag g idx in
527 let fc = Grammar2.start_first_child g idx in
528 let ns = Grammar2.start_next_sibling g fc in
529 if Grammar2.is_terminal g symbol then
530 let t = Grammar2.terminal symbol in
531 terminal_loop t states (Grammar2.Leaf (~-1,0,term_array, fc)) (Grammar2.Leaf (~-1,1,term_array, ns))
533 let nt = Grammar2.non_terminal symbol in
535 let lmbd = !lambda in
536 let y0 = (Grammar2.Leaf (lmbd,0, term_array, fc))
537 and y1 = (Grammar2.Leaf (lmbd,1, term_array, ns)) in
538 rule_loop nt states y0 y1
540 and rule_loop (t : Grammar2.n_symbol) states y0 y1 =
541 if t = Node.nil || states == dummy_set then nil_res else
542 let () = incr rule_counter in
543 if !rule_counter land 65535 == 0 then begin Gc.minor() end;
544 (* let k = (t, states) in*)
545 (* let pstates = DCache.find dcache k in
546 let notfound = DCache.notfound pstates in *)
547 let rhs = Grammar2.get_rule g t in
548 let id1 = Grammar2.get_id1 rhs in
549 let id2 = Grammar2.get_id2 rhs in
550 let conf = Grammar2.get_conf rhs in
551 (* if notfound then*)
552 let ny0 = dispatch_param0 conf id2 y0 y1 in
553 let ny1 = dispatch_param1 conf id2 y0 y1 in
554 let res = dispatch_loop id1 states ny0 ny1 in
555 (* pstates.(0) <- res.in0;
556 pstates.(1) <- res.in1; *)
558 UCache.add ucache (t, states, fst res.out0, fst res.out1)
560 let h = Hashtbl.create 7 in
561 for i = 0 to res_len - 1 do
562 Hashtbl.add h (0, i) (snd res.out0).(i);
563 Hashtbl.add h (1, i) (snd res.out1).(i);
566 main = ((fst res.main), (U.close h (snd res.main)));
570 let res0 = partial_loop y0 pstates.(0) in
571 let res1 = partial_loop y1 pstates.(1) in
572 let k2 = (t, states, fst res0.main, fst res1.main) in
575 UCache.find ucache k2
578 let ores0 = { res0 with main = fst res0.main, U.var 0 (snd res0.main) }
579 and ores1 = { res1 with main = fst res1.main, U.var 1 (snd res1.main) }
581 let res = dispatch_loop id1 states (Grammar2.Cache (0,ores0)) (Grammar2.Cache (1, ores1)) in
582 UCache.add ucache k2 res.main;
585 let h = Hashtbl.create 7 in
586 for i = 0 to res_len - 1 do
587 Hashtbl.add h (0, i) (snd res0.main).(i);
588 Hashtbl.add h (1, i) (snd res1.main).(i);
594 main = s, U.close h r;
597 and dispatch_loop id1 states ny0 ny1 =
598 if Grammar2.is_non_terminal g id1 then
599 rule_loop (Grammar2.non_terminal id1) states ny0 ny1
601 terminal_loop (Grammar2.terminal id1) states ny0 ny1
603 and terminal_loop (symbol : Grammar2.t_symbol) states y0 y1 =
605 if symbol == Grammar2.nil_symbol || symbol = Node.nil || states == dummy_set then nil_res else begin
606 let tag = Grammar2.tag symbol in
607 let lst, rst, trans = get_trans tag states in
608 let res0 = partial_loop y0 lst in
609 let res1 = partial_loop y1 rst in
610 let s1, slot1 = res0.main
611 and s2, slot2 = res1.main in
612 let opcode = L3JIT.find cache3 trans s1 s2 in
613 let node = Node.of_int !preorder_counter in
614 incr preorder_counter;
616 if opcode == L3JIT.dummy then
617 (L3JIT.cache_apply cache3 auto trans s1 s2) empty_slot slot1 slot2 (Obj.magic ()) node
619 opcode empty_slot slot1 slot2 (Obj.magic()) (node)
628 and partial_loop l states =
629 if l == dummy_leaf then nil_res else
631 | Grammar2.Cache (_, r) -> r
632 | Grammar2.Leaf (_,_, _, id) -> start_loop id states
633 | Grammar2.Node0 id ->
634 if (Grammar2.terminal id) == Grammar2.nil_symbol then nil_res
636 rule_loop (Grammar2.non_terminal id) states dummy_leaf dummy_leaf
638 | Grammar2.Node1 (id, y0) ->
639 rule_loop (Grammar2.non_terminal id) states y0 dummy_leaf
640 | Grammar2.Node2 (id, y0, y1) ->
641 if Grammar2.is_terminal g id then
642 terminal_loop (Grammar2.terminal id) states y0 y1
644 rule_loop (Grammar2.non_terminal id) states y0 y1
647 let (_, slot) = (start_loop (Node.null) auto.init).main in
648 slot.(StateSet.min_elt auto.topdown_marking_states)