X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2Fgrammar2.ml;h=a7ccb09d532f1e9d47d40e10721f589bac3488eb;hb=d3af6abaa7f12850a9edce0b6091700538e52f9c;hp=0c509bfc9bccbc071d0b4c907c6245dba1f8876d;hpb=6bc104a1034ba2d526c44b853836e3b42dd0ab2c;p=SXSI%2Fxpathcomp.git diff --git a/src/grammar2.ml b/src/grammar2.ml index 0c509bf..a7ccb09 100644 --- a/src/grammar2.ml +++ b/src/grammar2.ml @@ -200,16 +200,26 @@ struct loop 1 l in Hashtbl.iter (fun lhs rhs -> - let Node( head, _ ) = lhs in + let Node( head, args ) = lhs in let Node( tag1, params) = rhs in let tag2, pos2 = pos_id2 params in let id1 = h_find ~msg:"7" tag_to_id tag1 - and id2 = h_find ~msg:"8" tag_to_id tag2 + and id2 = h_find ~msg:"8" tag_to_id tag2 in + let conf = + if List.length args = 0 then 0 + else + if List.length args = 1 then + if List.length params = 1 then 1 + else if pos2 = 1 then 2 + else 3 + else (* 2 parameters *) + if List.length params = 1 then 4 + else if pos2 = 1 then 5 + else 6 in let rule_ = id2 lsl 27 in - let rule_ = (rule_ lor id1) lsl 2 in - let rule_ = (rule_ lor pos2) lsl 2 in - let rule_ = rule_ lor (List.length params) in + let rule_ = (rule_ lor id1) lsl 3 in + let rule_ = rule_ lor conf in r_array.((h_find ~msg:"9" tag_to_id head) - rules_offset ) <- rule_ ) rules; (*let l = Array.length renum_tags in *) @@ -238,7 +248,7 @@ let parse file = g let _GRAMMAR_MAGIC = 0xaabbcc -let _GRAMMAR_VERSION = 2 +let _GRAMMAR_VERSION = 3 let save g f = let cout = open_out f in @@ -258,36 +268,30 @@ let save g f = let load f = let cin = open_in f in - let pr_pos () = - Printf.eprintf "Position: %i kiB\n" (pos_in cin / 1024) - in let read () = Marshal.from_channel cin in if read () != _GRAMMAR_MAGIC then failwith "Invalid grammar file"; if read () != _GRAMMAR_VERSION then failwith "Deprecated grammar format"; - pr_pos(); let tags : int array = read () in - pr_pos(); let rules : int array = read () in - pr_pos(); let rules_offset : int = read () in - pr_pos(); let tag_to_id : (string, int) Hashtbl.t = read () in - pr_pos(); let tag_of_id : string array = read () in - pr_pos(); let fd = Unix.descr_of_in_channel cin in let pos = pos_in cin in ignore(Unix.lseek fd pos Unix.SEEK_SET); let bp = Bp.load fd in close_in cin; - { + let g = { start = bp; tags = tags; rules = rules; rules_offset = rules_offset; tag_to_id = tag_to_id; tag_of_id = tag_of_id; - } + } in + Printf.eprintf "Grammar size:%i kb\n%!" + ((Ocaml.size_b g + Bp.alloc_stats ())/1024); + g type node = [ `Start ] Node.t @@ -303,6 +307,13 @@ type t_symbol = t_type Node.t type tn_symbol = [ any_type ] Node.t +type partial = + Leaf of node + | Node0 of tn_symbol (* No parameters *) + | Node1 of tn_symbol * partial + | Node2 of tn_symbol * partial * partial + + let is_nil (t : t_symbol) = (Node.to_int t) == 4 @@ -312,7 +323,7 @@ let nil_symbol : t_symbol = let translate_tag _ t = if t == 4 then ~-1 else t let to_string t tag = if tag < Array.length t.tag_of_id then t.tag_of_id.(Tag.to_int tag) - else "" + else "" let register_tag t s = try Hashtbl.find t.tag_to_id s with @@ -325,14 +336,16 @@ let tag_operations t = { } let start_root : node = Node.of_int 0 -let start_tag t (idx : node) = - t.tags.(Bp.preorder_rank t.start (Node.to_int idx)) +let start_tag g (idx : node) : [ t_symbol = "%identity" -external non_terminal : [< any_type ] Node.t -> t_symbol = "%identity" +external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity" let tag (n : t_symbol) : Tag.t = Obj.magic n @@ -350,15 +363,42 @@ let get_rule g (r : n_symbol) : rhs = Node.of_int (g.rules.((Node.to_int r) - g.rules_offset)) let get_id1 (r : rhs) : tn_symbol = - Node.of_int( - ((Node.to_int r) lsr 4) land 0x7ffffff) + Node.of_int(((Node.to_int r) lsr 3) land 0x7ffffff) let get_id2 (r : rhs) : tn_symbol = - Node.of_int((Node.to_int r) lsr 31) - -let get_param_pos (r : rhs) : int = - ((Node.to_int r) lsr 2) land 0b11 - -let num_params (r : rhs) : int = - (Node.to_int r) land 0b11 + Node.of_int((Node.to_int r) lsr 30) + +type conf = | C0 (* B(C) *) + | C1 (* B(C(y0)) *) + | C2 (* B(C, y0) *) + | C3 (* B(y0, C) *) + | C4 (* B(C(y0, y1)) *) + | C5 (* B(C(y0), y1) *) + | C6 (* B(y0, C(y1)) *) + +let get_conf (r : rhs) : conf = + (Obj.magic ((Node.to_int r) land 0b111)) + + +let get_rank (r : rhs) : int = + match get_conf r with + | C0 -> 0 + | C1 | C2 | C3 -> 1 + | C4 | C5 | C6 -> 2 + +let get_id1_rank (r : rhs) : int = + match get_conf r with + | C0 | C1 | C4 -> 1 + | _ -> 2 + +let get_id2_pos (r : rhs) : int = + match get_conf r with + | C0 | C1 |C2 | C4 | C5 -> 1 + | _ -> 2 + +let get_id2_rank (r : rhs) : int = + match get_conf r with + | C0 | C2 | C3 -> 0 + | C1 | C5 | C6 -> 1 + | C4 -> 2