From: Kim Nguyễn Date: Wed, 22 Feb 2012 16:54:15 +0000 (+0100) Subject: New run function based on rank2 grammars: X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=960e4daabcffa00ab73a9b0edb7366f16df284ba;p=SXSI%2Fxpathcomp.git New run function based on rank2 grammars: - hard-code the fact that there are 7 possible shape for bCNF rules of rank 2 - avoid allocating arrays to pass parametters around - run the Gc less often. --- diff --git a/src/grammar2.ml b/src/grammar2.ml index d2d497e..a7ccb09 100644 --- a/src/grammar2.ml +++ b/src/grammar2.ml @@ -205,11 +205,21 @@ struct let tag2, pos2 = pos_id2 params in let id1 = h_find ~msg:"7" tag_to_id tag1 and id2 = h_find ~msg:"8" tag_to_id tag2 in + let conf = + if List.length args = 0 then 0 + else + if List.length args = 1 then + if List.length params = 1 then 1 + else if pos2 = 1 then 2 + else 3 + else (* 2 parameters *) + if List.length params = 1 then 4 + else if pos2 = 1 then 5 + else 6 + in let rule_ = id2 lsl 27 in - let rule_ = (rule_ lor id1) lsl 2 in - let rule_ = (rule_ lor pos2) lsl 2 in - let rule_ = (rule_ lor (List.length params)) lsl 2 in - let rule_ = rule_ lor (List.length args) in + let rule_ = (rule_ lor id1) lsl 3 in + let rule_ = rule_ lor conf in r_array.((h_find ~msg:"9" tag_to_id head) - rules_offset ) <- rule_ ) rules; (*let l = Array.length renum_tags in *) @@ -238,7 +248,7 @@ let parse file = g let _GRAMMAR_MAGIC = 0xaabbcc -let _GRAMMAR_VERSION = 2 +let _GRAMMAR_VERSION = 3 let save g f = let cout = open_out f in @@ -271,14 +281,17 @@ let load f = ignore(Unix.lseek fd pos Unix.SEEK_SET); let bp = Bp.load fd in close_in cin; - { + let g = { start = bp; tags = tags; rules = rules; rules_offset = rules_offset; tag_to_id = tag_to_id; tag_of_id = tag_of_id; - } + } in + Printf.eprintf "Grammar size:%i kb\n%!" + ((Ocaml.size_b g + Bp.alloc_stats ())/1024); + g type node = [ `Start ] Node.t @@ -296,7 +309,9 @@ type tn_symbol = [ any_type ] Node.t type partial = Leaf of node - | Node of tn_symbol * partial array + | Node0 of tn_symbol (* No parameters *) + | Node1 of tn_symbol * partial + | Node2 of tn_symbol * partial * partial let is_nil (t : t_symbol) = @@ -348,20 +363,42 @@ let get_rule g (r : n_symbol) : rhs = Node.of_int (g.rules.((Node.to_int r) - g.rules_offset)) let get_id1 (r : rhs) : tn_symbol = - Node.of_int( - ((Node.to_int r) lsr 6) land 0x7ffffff) + Node.of_int(((Node.to_int r) lsr 3) land 0x7ffffff) let get_id2 (r : rhs) : tn_symbol = - Node.of_int((Node.to_int r) lsr 33) + Node.of_int((Node.to_int r) lsr 30) + +type conf = | C0 (* B(C) *) + | C1 (* B(C(y0)) *) + | C2 (* B(C, y0) *) + | C3 (* B(y0, C) *) + | C4 (* B(C(y0, y1)) *) + | C5 (* B(C(y0), y1) *) + | C6 (* B(y0, C(y1)) *) + +let get_conf (r : rhs) : conf = + (Obj.magic ((Node.to_int r) land 0b111)) + let get_rank (r : rhs) : int = - (Node.to_int r) land 0b11 + match get_conf r with + | C0 -> 0 + | C1 | C2 | C3 -> 1 + | C4 | C5 | C6 -> 2 let get_id1_rank (r : rhs) : int = - ((Node.to_int r) lsr 2) land 0b11 + match get_conf r with + | C0 | C1 | C4 -> 1 + | _ -> 2 let get_id2_pos (r : rhs) : int = - ((Node.to_int r) lsr 4) land 0b11 + match get_conf r with + | C0 | C1 |C2 | C4 | C5 -> 1 + | _ -> 2 let get_id2_rank (r : rhs) : int = - get_rank r + 1 - get_id1_rank r + match get_conf r with + | C0 | C2 | C3 -> 0 + | C1 | C5 | C6 -> 1 + | C4 -> 2 + diff --git a/src/grammar2.mli b/src/grammar2.mli index a775780..e2efc85 100644 --- a/src/grammar2.mli +++ b/src/grammar2.mli @@ -15,8 +15,11 @@ type tn_symbol = [ any_type ] Node.t type partial = Leaf of node - | Node of tn_symbol * partial array + | Node0 of tn_symbol + | Node1 of tn_symbol * partial + | Node2 of tn_symbol * partial * partial +type conf = C0 | C1 | C2 | C3 | C4 | C5 | C6 val parse : string -> t val save : t -> string -> unit @@ -33,6 +36,7 @@ val tag : t_symbol -> Tag.t val start_first_child : t -> node -> node val start_next_sibling : t -> node -> node val get_rule : t -> n_symbol -> rhs +val get_conf : rhs -> conf val get_rank : rhs -> int val get_id1_rank : rhs -> int val get_id2_rank : rhs -> int diff --git a/src/runtime.ml b/src/runtime.ml index 782293f..3bc3256 100644 --- a/src/runtime.ml +++ b/src/runtime.ml @@ -455,8 +455,6 @@ DEFINE LOOP_TAG (t, states, tag, ctx) = ( let cache3 = L3JIT.create () in let dummy2 = (StateSet.empty, StateSet.empty, Translist.nil) in let cache2 = Cache.Lvl2.create 512 dummy2 in - let parameters = Array.create 2 dummy_leaf in - let parameters_tmp = Array.create 2 dummy_leaf in let rule_counter = ref 0 in let start_counter = ref 0 in let () = at_exit (fun () -> Printf.eprintf "start_couter=%i, rule_counter=%i\n%!" @@ -507,67 +505,70 @@ DEFINE LOOP_TAG (t, states, tag, ctx) = ( let nt = Grammar2.non_terminal symbol in let rhs = Grammar2.get_rule g nt in let nparam = Grammar2.get_rank rhs in - let child = ref (Grammar2.start_first_child g idx) in - for i = 0 to nparam - 1 do - let c = !child in - parameters.(i) <- Grammar2.Leaf c; - child := Grammar2.start_next_sibling g c; - done; - rule_loop nt states parameters - + match nparam with + | 0 -> rule_loop nt states 0 dummy_leaf dummy_leaf + | 1 -> rule_loop nt states 1 (Grammar2.Leaf(Grammar2.start_first_child g idx)) dummy_leaf + | 2 -> + let fc = Grammar2.start_first_child g idx in + let ns = Grammar2.start_next_sibling g fc in + rule_loop nt states 2 (Grammar2.Leaf fc) (Grammar2.Leaf ns) + | _ -> assert false end - and rule_loop (t : Grammar2.n_symbol) states a_param = + and rule_loop (t : Grammar2.n_symbol) states rank y0 y1 = incr rule_counter; - if !rule_counter land (4095) == 0 then begin Gc.minor() end; + if !rule_counter land (65535) == 0 then begin Gc.minor() end; let rhs = Grammar2.get_rule g t in let id1 = Grammar2.get_id1 rhs in let id2 = Grammar2.get_id2 rhs in - let param_pos = Grammar2.get_id2_pos rhs in - let nparam1 = Grammar2.get_id1_rank rhs in - let nparam2 = Grammar2.get_id2_rank rhs in - let a_param2 = if nparam2 == 0 then [||] else Array.create nparam2 dummy_leaf in - let i = param_pos - 2 in - let ip1 = i + 1 in - let offset2d = i+2 in - let offset2s = i+nparam2 + 1 in - blit a_param 0 parameters_tmp 0 (i+1); - parameters_tmp.(ip1) <- Grammar2.Node(id2, a_param2); (* id2( ... ) *) - blit a_param offset2s parameters_tmp offset2d (nparam1 - i - 2); - blit a_param ip1 a_param2 0 nparam2; - - blit parameters_tmp 0 parameters 0 nparam1; + let conf = Grammar2.get_conf rhs in if Grammar2.is_non_terminal g id1 then let id1 = Grammar2.non_terminal id1 in - rule_loop id1 states parameters + match conf with + | Grammar2.C0 -> rule_loop id1 states 1 (Grammar2.Node0 id2) dummy_leaf + | Grammar2.C1 -> rule_loop id1 states 1 (Grammar2.Node1(id2,y0)) dummy_leaf + | Grammar2.C2 -> rule_loop id1 states 2 (Grammar2.Node0 id2) y0 + | Grammar2.C3 -> rule_loop id1 states 2 y0 (Grammar2.Node0 id2) + | Grammar2.C4 -> rule_loop id1 states 1 (Grammar2.Node2(id2, y0, y1)) dummy_leaf + | Grammar2.C5 -> rule_loop id1 states 2 (Grammar2.Node1(id2, y0)) y1 + | Grammar2.C6 -> rule_loop id1 states 2 y0 (Grammar2.Node1(id2, y1)) else let id1 = Grammar2.terminal id1 in - terminal_loop id1 states parameters - - and terminal_loop (symbol : Grammar2.t_symbol) states a_param = + match conf with + | Grammar2.C0 | Grammar2.C1 -> assert false + | Grammar2.C2 -> terminal_loop id1 states (Grammar2.Node0 id2) y0 + | Grammar2.C3 -> terminal_loop id1 states y0 (Grammar2.Node0 id2) + | Grammar2.C4 -> assert false + | Grammar2.C5 -> terminal_loop id1 states (Grammar2.Node1(id2, y0)) y1 + | Grammar2.C6 -> terminal_loop id1 states y0 (Grammar2.Node1(id2, y1)) + + and terminal_loop (symbol : Grammar2.t_symbol) states y0 y1 = if symbol == Grammar2.nil_symbol then nil_res else begin (* todo factor in from start_loop *) let tag = Grammar2.tag symbol in let lst, rst, trans = get_trans tag states in - let next = a_param.(1) in - let s1, slot1 = partial_loop a_param.(0) lst in - let s2, slot2 = partial_loop next rst in + let s1, slot1 = partial_loop y0 lst in + let s2, slot2 = partial_loop y1 rst in let opcode = L3JIT.find cache3 trans s1 s2 in if opcode == L3JIT.dummy then (L3JIT.cache_apply cache3 auto trans s1 s2) empty_slot slot1 slot2 (Obj.magic ()) (Obj.magic ()) else opcode empty_slot slot1 slot2 (Obj.magic()) (Obj.magic()) - end and partial_loop l states = match l with | Grammar2.Leaf id -> start_loop id states - | Grammar2.Node (id, a_param) -> - let is_term = Grammar2.is_terminal g id in - if is_term then - terminal_loop (Grammar2.terminal id) states a_param + | Grammar2.Node0 id -> + if (Grammar2.terminal id) == Grammar2.nil_symbol then nil_res + else + rule_loop (Grammar2.non_terminal id) states 0 dummy_leaf dummy_leaf + | Grammar2.Node1 (id, y0) -> + rule_loop (Grammar2.non_terminal id) states 1 y0 dummy_leaf + | Grammar2.Node2 (id, y0, y1) -> + if Grammar2.is_terminal g id then + terminal_loop (Grammar2.terminal id) states y0 y1 else - rule_loop (Grammar2.non_terminal id) states a_param + rule_loop (Grammar2.non_terminal id) states 1 y0 y1 in let _, slot = start_loop (Node.null) auto.init in