From: Kim Nguyễn Date: Thu, 16 Feb 2012 18:08:19 +0000 (+0100) Subject: First attempt at having a grammar runtime. X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=0578813dc7114276daf382d47661f027c973eb35;p=SXSI%2Fxpathcomp.git First attempt at having a grammar runtime. Needs more work to optimize array allocation. --- diff --git a/src/cache.ml b/src/cache.ml index ed8af8c..71eb4fb 100644 --- a/src/cache.ml +++ b/src/cache.ml @@ -1,3 +1,5 @@ +INCLUDE "trace.ml" + let realloc l old_size new_size dummy = let l' = Array.create new_size dummy in Array.blit l 0 l' 0 (min old_size new_size); @@ -93,9 +95,10 @@ struct dummy_line1 = dummy_line1; dummy_line2 = dummy_line2 } - let find t i j k = t.line.(i).(j).(k) + let find t k j i = t.line.(i).(j).(k) + - let add t i j k v = + let add t k j i v = let line = t.line in let line1 = let l1 = line.(i) in diff --git a/src/grammar.ml b/src/grammar.ml index cf167fc..bca97aa 100644 --- a/src/grammar.ml +++ b/src/grammar.ml @@ -56,6 +56,7 @@ let is_parameter (n : [< any_type ] Node.t) = let symbol_tag (n : t_symbol) = (Node.to_int n) lsr 2 ;; +let tag = symbol_tag let get_tag g (n : t_symbol) = to_string g (symbol_tag n) @@ -173,7 +174,6 @@ let load filename bp = | e -> (Unix.close fd; raise e) in Unix.close fd; - traversal g; Tag.init (tag_operations g); g diff --git a/src/grammar.mli b/src/grammar.mli index 29fc43c..5d5e8ab 100644 --- a/src/grammar.mli +++ b/src/grammar.mli @@ -1,4 +1,43 @@ type t + type node = [ `Grammar ] Node.t +type p_type = [ `Parameter ] +type n_type = [ `NonTerminal ] +type t_type = [ `Terminal ] +type any_type = [ p_type | n_type | t_type ] +type symbol = [ any_type ] Node.t + +type p_symbol = p_type Node.t +type n_symbol = n_type Node.t +type t_symbol = t_type Node.t +type tn_symbol = [ n_type | t_type ] Node.t + +type partial = Node of tn_symbol * partial array | Leaf of node + + + + +val is_terminal : [< any_type ] Node.t -> bool +val is_non_terminal : [< any_type ] Node.t -> bool +val is_parameter : [< any_type ] Node.t -> bool + +external get_symbol_at : t -> symbol -> node -> symbol = "caml_grammar_get_symbol_at" +external first_child : t -> symbol -> node -> node = "caml_grammar_first_child" +external next_sibling : t -> symbol -> node -> node = "caml_grammar_next_sibling" +external parameter : [< any_type ] Node.t -> p_symbol = "%identity" +external terminal : [< any_type ] Node.t -> t_symbol = "%identity" +external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity" +external get_id1 : t -> n_symbol -> tn_symbol = "caml_grammar_get_id1" +external get_id2 : t -> n_symbol -> tn_symbol = "caml_grammar_get_id2" +external get_param_pos : t -> n_symbol -> int = "caml_grammar_get_param_pos" +val num_params : n_symbol -> int +val num_children : [< t_type | n_type ] Node.t -> int +external is_nil : t -> t_symbol -> bool = "caml_grammar_is_nil" + +val tag : t_symbol -> Tag.t +val tag_operations : t -> Tag.operations + + + val load : string -> bool -> t diff --git a/src/main.ml b/src/main.ml index cb7585f..b364275 100644 --- a/src/main.ml +++ b/src/main.ml @@ -94,8 +94,30 @@ let document = if Filename.check_suffix !Options.input_file ".g.bin" then let g = time ~msg:"Loading grammar" (Grammar.load !Options.input_file) true in begin - ignore(g); - Unix.sleep 10; (* Leave monitoring process the time to read the HWM *) + (* Todo Factorise with main *) + Tag.init (Grammar.tag_operations g); + let query = + time ~msg:"Parsing query" XPath.parse !Options.query + in + if !Options.verbose then begin + Printf.eprintf "Parsed query:\n%!"; + XPath.Ast.print Format.err_formatter query; + Format.fprintf Format.err_formatter "\n%!" + end; + let auto, bu_info = + time ~msg:"Compiling query" (Compile.compile) query + in + if !Options.verbose then Ata.print Format.err_formatter auto; + Gc.full_major(); + Gc.compact(); + Gc.set (tuned_gc); + let runtime = + let module R = ResJIT.Count in + let module M = Runtime.Make(R) in + (* mk_runtime run auto doc arg count print outfile *) + mk_runtime M.grammar_run auto (Obj.magic g) () R.NS.length (Obj.magic R.NS.serialize) None + in + runtime (); exit 0 end else if Filename.check_suffix !Options.input_file ".srx" diff --git a/src/ocaml.ml b/src/ocaml.ml index b8acb29..a0d9cc2 100644 --- a/src/ocaml.ml +++ b/src/ocaml.ml @@ -50,4 +50,4 @@ let size_w o = let size_b o = (size_w o) * (Sys.word_size / 8) -let size_kb o = (size_w o) / (8192 / Sys.word_size) +let size_kb o = (size_b o) / 1024 diff --git a/src/runtime.ml b/src/runtime.ml index 8337d2a..0be30a7 100644 --- a/src/runtime.ml +++ b/src/runtime.ml @@ -8,6 +8,8 @@ module type S = sig type result_set val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set + val grammar_run : Ata.t -> Grammar.t -> unit -> result_set + end module Make (U : ResJIT.S) : S with type result_set = U.NS.t = @@ -85,11 +87,12 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = in let lvl3 = Array.fold_left (fun acc a -> - Array.fold_left (fun acc2 a2 -> - Array.fold_left - (fun acc3 a3 -> if a3 == dummy then acc3 else acc3+1) acc2 a2) - acc a) 0 d - in + Array.fold_left (fun acc2 a2 -> + Array.fold_left + (fun acc3 a3 -> if a3 != dummy then acc3+1 else acc3) + acc2 a2) + acc a) 0 d + in fprintf fmt "L3JIT Statistics: \t%i entries \t%i used L1 lines @@ -121,7 +124,7 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = in let ns1 = StateSet.inter s1 orig_s1 and ns2 = StateSet.inter s2 orig_s2 in - let res, ops, todo = eval_trans auto ns1 ns2 trl in + let res, ops, todo = eval_trans auto s1 s2 trl in let code, not_marking = ResJIT.compile ops in let todo_code, todo_notmarking = List.fold_left (fun (l, b) (p, q, o) -> let c, b' = ResJIT.compile o in @@ -185,6 +188,8 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = let cache_apply cache auto tlist s1 s2 = let f = gen_code auto tlist s1 s2 in + TRACE("grammar", 2, __ "Inserting: %i, %a, %a\n%!" + (Uid.to_int tlist.Translist.Node.id) StateSet.print s1 StateSet.print s2); add cache tlist s1 s2 f; f end @@ -264,7 +269,7 @@ DEFINE LOOP_TAG (t, states, tag, ctx) = ( (* | L2JIT.NEXT_SIBLING s -> LOOP ((Tree.next_node_before tree t ctx), s, ctx) *) | L2JIT.FIRST_ELEMENT s -> LOOP ((Tree.first_element tree t), s, ctx) - | L2JIT.NEXT_ELEMENT s -> LOOP ((Tree.next_element tree t), s, ctx) + | L2JIT.NEXT_ELEMENT s -> LOOP ((Tree.next_element tree t), s, ctx) (* | L2JIT.NEXT_ELEMENT s -> LOOP ((Tree.next_node_before tree t ctx), s, ctx) *) | L2JIT.TAGGED_DESCENDANT (s, tag) -> @@ -312,7 +317,9 @@ DEFINE LOOP_TAG (t, states, tag, ctx) = ( s,empty_slot in - LOOP (root, states, ctx) + let r = LOOP (root, states, ctx) in + (*L3JIT.stats err_formatter cache3; *) + r let full_top_down_run auto states tree root = (*Ata.init (); *) @@ -321,6 +328,7 @@ DEFINE LOOP_TAG (t, states, tag, ctx) = ( let top_down_run auto tree root = (*Ata.init (); *) let res, slot = full_top_down_run auto auto.init tree root in + slot.(StateSet.min_elt auto.topdown_marking_states) @@ -416,5 +424,165 @@ DEFINE LOOP_TAG (t, states, tag, ctx) = ( slot.(StateSet.min_elt auto.topdown_marking_states) +(* Grammar run *) + module ArrayPool = + struct + let pool = Queue.create () + let create dummy = + if Queue.is_empty pool then + Array.create 16 dummy + else + Queue.take pool + let create dummy = Array.create 16 dummy + let free p = Queue.add p pool + end + + + let grammar_run auto g () = + + let start_symbol = (Node.of_int 0) in + let dummy_leaf = Grammar.Leaf (Node.nil) in + + let res_len = (StateSet.max_elt auto.states) + 1 in + let empty_slot = Array.create res_len U.NS.empty in + let nil_res = auto.bottom_states, empty_slot in + let empty_res = StateSet.empty, empty_slot in + let cache3 = L3JIT.create () in + let dummy2 = (StateSet.empty, StateSet.empty, Translist.nil) in + let cache2 = Cache.Lvl2.create 512 dummy2 in + let tmp1 = Array.create 16 dummy_leaf in + let tmp2 = Array.create 16 dummy_leaf in + let get_trans tag states = + let c = Cache.Lvl2.find cache2 tag (Uid.to_int states.StateSet.Node.id) in + if c == dummy2 then + let c = + StateSet.fold (fun q tr_acc -> + List.fold_left + (fun ((lstates, rstates, tacc) as acc) (ts, trs) -> + if TagSet.mem (Tag.translate tag) ts then + let _, _, _, phi = Transition.node trs in + let (_,_,l),(_,_,r) = Formula.st phi in + (StateSet.union l lstates, + StateSet.union r rstates, + Translist.cons trs tacc) + else acc) + tr_acc (Hashtbl.find auto.trans q) + ) states (StateSet.empty, StateSet.empty, Translist.nil) + in + begin + Cache.Lvl2.add cache2 tag (Uid.to_int states.StateSet.Node.id) c; + c + end + else c + in + let rec start_loop idx states = + TRACE("grammar", 2, __ "Node %i\n%!" (Node.to_int idx)); + if idx < Node.null then nil_res + else if StateSet.is_empty states then empty_res + else begin + let symbol = Grammar.get_symbol_at g start_symbol idx in + if Grammar.is_terminal symbol then + let symbol = Grammar.terminal symbol in + let tag = Grammar.tag symbol in + let lst, rst, trans = get_trans tag states in + let fs = Grammar.first_child g start_symbol idx in + let s1, slot1 = start_loop fs lst in + let s2, slot2 = start_loop (Grammar.next_sibling g start_symbol fs) rst in + let opcode = L3JIT.find cache3 trans s1 s2 in + if opcode == L3JIT.dummy then (L3JIT.cache_apply cache3 auto trans s1 s2) empty_slot slot1 slot2 (Obj.magic ()) (Obj.magic ()) + else opcode empty_slot slot1 slot2 (Obj.magic ()) (Obj.magic()) + else + let tn = Grammar.non_terminal symbol in + let nparam = Grammar.num_params tn in + let a_param = tmp1 (*ArrayPool.create dummy_leaf*) in + let child = ref (Grammar.first_child g start_symbol idx) in + for i = 0 to nparam - 1 do + let c = !child in + a_param.(i) <- Grammar.Leaf c; + child := Grammar.next_sibling g start_symbol c; + done; + (*let a_param = Array.init nparam + (fun _ -> let c = !child in + child := Grammar.next_sibling g start_symbol c; + Grammar.Leaf c) + in *) + rule_loop tn a_param states + + end + and counter = ref 0 + and rule_loop (t : Grammar.n_symbol) a_param states = + + incr counter; + if !counter land 8191 == 0 then Gc.minor(); + + let id1 = Grammar.get_id1 g t in + let id2 = Grammar.get_id2 g t in + let param_pos = Grammar.get_param_pos g t in + let nparam1 = Grammar.num_children id1 in + let nparam2 = + if Grammar.is_terminal id2 && Grammar.is_nil g (Grammar.terminal id2) then 0 + else Grammar.num_children id2 + in + let a_param1 = (*ArrayPool.create dummy_leaf*) tmp2 (* Array.create nparam1 dummy_leaf *) in + let a_param2 = Array.create nparam2 dummy_leaf (* Array.create nparam2 dummy_leaf *) in + let i = param_pos - 2 in + (*Array.blit a_param 0 a_param1 0 (i+1); (* Pass parameters before id2 *) *) + (* Array.blit is too slow *) + for k = 0 to i do + a_param1.(k) <- a_param.(k); + done; + a_param1.(i+1) <- Grammar.Node(id2, a_param2); (* id2( ... ) *) + (*Array.blit a_param (i + nparam2 + 1) a_param1 (i+2) (nparam1 - i - 2); (* Pass parameters after id2 *) *) + for k = 0 to nparam1 - i -3 do + a_param1.(i+2+k) <- a_param.(i + nparam2 + 1 + k); + done; + (*Array.blit a_param (i + 1) a_param2 0 nparam2; (* parameters below id2 *) *) + for k = 0 to nparam2 - 1 do + a_param2.(k) <- a_param.(i+1+k) + done; + for i = 0 to nparam1 do + a_param.(i) <- a_param1.(i) + done; + if Grammar.is_non_terminal id1 then + let id1 = Grammar.non_terminal id1 in + rule_loop id1 a_param states + else + let id1 = Grammar.terminal id1 in + terminal_loop id1 a_param states + + and terminal_loop (symbol : Grammar.t_symbol) a_param states = + if Grammar.is_nil g symbol then nil_res else begin + (* todo factor in from start_loop *) + let tag = Grammar.tag symbol in + let lst, rst, trans = get_trans tag states in + let s1, slot1 = partial_loop a_param.(0) lst in + let s2, slot2 = partial_loop a_param.(1) rst in + let opcode = L3JIT.find cache3 trans s1 s2 in + if opcode == L3JIT.dummy then (L3JIT.cache_apply cache3 auto trans s1 s2) empty_slot slot1 slot2 (Obj.magic ()) (Obj.magic ()) + else + opcode empty_slot slot1 slot2 (Obj.magic()) (Obj.magic()) + + (* End: TODO refactor *) + + end + + and partial_loop l states = + match l with + | Grammar.Leaf id -> start_loop id states + | Grammar.Node (id, a_param) -> + if Grammar.is_terminal id then terminal_loop (Grammar.terminal id) a_param states + else rule_loop (Grammar.non_terminal id) a_param states + in + (*L3JIT.stats err_formatter cache3; *) + let _, slot = start_loop (Node.null) auto.init in + slot.(StateSet.min_elt auto.topdown_marking_states) + ;; + + + + + + + end diff --git a/src/runtime.mli b/src/runtime.mli index a30db0d..eec5980 100644 --- a/src/runtime.mli +++ b/src/runtime.mli @@ -2,6 +2,7 @@ module type S = sig type result_set val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set + val grammar_run : Ata.t -> Grammar.t -> unit -> result_set end module Make (U : ResJIT.S) : S with type result_set = U.NS.t diff --git a/utils/alarm.ml b/utils/alarm.ml index 8874756..d1b7b30 100644 --- a/utils/alarm.ml +++ b/utils/alarm.ml @@ -2,7 +2,7 @@ let read_procmem pid = let cin = open_in (Printf.sprintf "/proc/%i/status" pid) in let matchline s = try - Scanf.sscanf s " VmHWM: %i kB" (fun i -> Some i) + Scanf.sscanf s " VmRSS: %i kB" (fun i -> Some i) with | _ -> None in @@ -12,6 +12,7 @@ let read_procmem pid = | None -> loop () in let s = try loop() with _ -> -1 in + Printf.eprintf "Memory: %i\n%!" s; close_in cin; s ;; @@ -21,10 +22,14 @@ let rec monitor pid timeout mem = if p == 0 then let current_mem = read_procmem pid in if current_mem >= !max_mem then max_mem := current_mem; - if (Unix.gettimeofday() > timeout || current_mem >= mem) - then Unix.kill pid Sys.sigkill + if (Unix.gettimeofday() > timeout) + then let () = Printf.eprintf "Timeout reached, killing child process\n%!" in + Unix.kill pid Sys.sigkill + else if !max_mem >= mem + then let () = Printf.eprintf "Memory limit reached, killing child process\n%!" in + Unix.kill pid Sys.sigkill else - let () = Unix.sleep 1 in + let () = Unix.sleep 1 in monitor pid timeout mem ;;