X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2Fgrammar2.ml;h=ca21d0a99df0b1df65531b5827117be89dd8c8a5;hb=7270a43a59e6ff4b27ab0a339723ed8a24d0a091;hp=82cf036481133be80fb83adc0c8a37c0f969a69a;hpb=3dc9065cb7e4b38bf25e6fb50017efa5b11de4ff;p=SXSI%2Fxpathcomp.git diff --git a/src/grammar2.ml b/src/grammar2.ml index 82cf036..ca21d0a 100644 --- a/src/grammar2.ml +++ b/src/grammar2.ml @@ -200,30 +200,40 @@ struct loop 1 l in Hashtbl.iter (fun lhs rhs -> - let Node( head, _ ) = lhs in + let Node( head, args ) = lhs in let Node( tag1, params) = rhs in let tag2, pos2 = pos_id2 params in let id1 = h_find ~msg:"7" tag_to_id tag1 - and id2 = h_find ~msg:"8" tag_to_id tag2 + and id2 = h_find ~msg:"8" tag_to_id tag2 in + let conf = + if List.length args = 0 then 0 + else + if List.length args = 1 then + if List.length params = 1 then 1 + else if pos2 = 1 then 2 + else 3 + else (* 2 parameters *) + if List.length params = 1 then 4 + else if pos2 = 1 then 5 + else 6 in let rule_ = id2 lsl 27 in - let rule_ = (rule_ lor id1) lsl 2 in - let rule_ = (rule_ lor pos2) lsl 2 in - let rule_ = rule_ lor (List.length params) in + let rule_ = (rule_ lor id1) lsl 3 in + let rule_ = rule_ lor conf in r_array.((h_find ~msg:"9" tag_to_id head) - rules_offset ) <- rule_ ) rules; - let l = Array.length renum_tags in - let tag32 = Array32.create l 0 in + (*let l = Array.length renum_tags in *) + (*let tag32 = Array32.create l 0 in for i = 0 to l - 1 do Array32.set tag32 i (renum_tags.(i) land 0x7ffffff); - done; + done; *) (* Remove the non-terminal names from the hash tables *) let tag_to_id2 = Hashtbl.create 31 in Hashtbl.iter (fun s i -> if i < rules_offset then Hashtbl.add tag_to_id2 s i) tag_to_id; { start = bv; - tags = tag32; - rules = renum_tags; + tags = renum_tags; + rules = r_array; rules_offset = rules_offset; tag_to_id = tag_to_id2; tag_of_id = Array.sub old_new_mapping 0 rules_offset @@ -238,7 +248,7 @@ let parse file = g let _GRAMMAR_MAGIC = 0xaabbcc -let _GRAMMAR_VERSION = 2 +let _GRAMMAR_VERSION = 3 let save g f = let cout = open_out f in @@ -258,62 +268,66 @@ let save g f = let load f = let cin = open_in f in - let pr_pos () = - Printf.eprintf "Position: %i kiB\n" (pos_in cin / 1024) - in let read () = Marshal.from_channel cin in if read () != _GRAMMAR_MAGIC then failwith "Invalid grammar file"; if read () != _GRAMMAR_VERSION then failwith "Deprecated grammar format"; - pr_pos(); let tags : int array = read () in - pr_pos(); let rules : int array = read () in - pr_pos(); let rules_offset : int = read () in - pr_pos(); let tag_to_id : (string, int) Hashtbl.t = read () in - pr_pos(); let tag_of_id : string array = read () in - pr_pos(); let fd = Unix.descr_of_in_channel cin in let pos = pos_in cin in ignore(Unix.lseek fd pos Unix.SEEK_SET); let bp = Bp.load fd in close_in cin; - { + let g = { start = bp; tags = tags; rules = rules; rules_offset = rules_offset; tag_to_id = tag_to_id; tag_of_id = tag_of_id; - } + } in + Printf.eprintf "Grammar size:%i kb\n%!" + ((Ocaml.size_b g + Bp.alloc_stats ())/1024); + g -type node = [ `Grammar ] Node.t +type node = [ `Start ] Node.t -type p_type = [ `Parameter ] type n_type = [ `NonTerminal ] type t_type = [ `Terminal ] -type any_type = [ p_type | n_type | t_type ] -type symbol = [ any_type ] Node.t +type r_type = [ `Rule ] +type any_type = [ n_type | t_type ] +type rhs = [ r_type ] Node.t -type p_symbol = p_type Node.t type n_symbol = n_type Node.t type t_symbol = t_type Node.t -type tn_symbol = [ n_type | t_type ] Node.t +type tn_symbol = [ any_type ] Node.t + + +type 'a partial = + | Cache of 'a + | Leaf of int*int * StateSet.t array * node + | Node0 of tn_symbol (* No parameters *) + | Node1 of tn_symbol * 'a partial + | Node2 of tn_symbol * 'a partial * 'a partial -let is_nil : (t:t_symbol) = +let is_nil (t : t_symbol) = (Node.to_int t) == 4 let nil_symbol : t_symbol = (Node.of_int 4) let translate_tag _ t = if t == 4 then ~-1 else t -let to_string t tag = tag_of_id.(Tag.to_int tag) -let register_tag t tag = - try Hashtbl.find t.tag_to_id (Tag.to_int tag) with +let to_string t tag = + if tag < Array.length t.tag_of_id then t.tag_of_id.(Tag.to_int tag) + else "" + +let register_tag t s = + try Hashtbl.find t.tag_to_id s with Not_found -> 4 let tag_operations t = { @@ -322,21 +336,171 @@ let tag_operations t = { Tag.translate = (fun s -> translate_tag t s); } +let start_root : node = Node.of_int 0 +let start_tag g (idx : node) : [= t.rules_offset -let is_terminal t (n : [< any_type ] Node.t) = not(is_terminal t n) +let is_terminal t (n : [< any_type ] Node.t) = not(is_non_terminal t n) + +external terminal : [< any_type ] Node.t -> t_symbol = "%identity" +external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity" + let tag (n : t_symbol) : Tag.t = Obj.magic n +let get_rule g (r : n_symbol) : rhs = + Node.of_int (g.rules.((Node.to_int r) - g.rules_offset)) + +let get_id1 (r : rhs) : tn_symbol = + Node.of_int(((Node.to_int r) lsr 3) land 0x7ffffff) + +let get_id2 (r : rhs) : tn_symbol = + Node.of_int((Node.to_int r) lsr 30) + +type conf = | C0 (* B(C) *) + | C1 (* B(C(y0)) *) + | C2 (* B(C, y0) *) + | C3 (* B(y0, C) *) + | C4 (* B(C(y0, y1)) *) + | C5 (* B(C(y0), y1) *) + | C6 (* B(y0, C(y1)) *) + +let get_conf (r : rhs) : conf = + (Obj.magic ((Node.to_int r) land 0b111)) + + +let get_rank (r : rhs) : int = + match get_conf r with + | C0 -> 0 + | C1 | C2 | C3 -> 1 + | C4 | C5 | C6 -> 2 + +let get_id1_rank (r : rhs) : int = + match get_conf r with + | C0 | C1 | C4 -> 1 + | _ -> 2 + +let get_id2_pos (r : rhs) : int = + match get_conf r with + | C0 | C1 |C2 | C4 | C5 -> 1 + | _ -> 2 + +let get_id2_rank (r : rhs) : int = + match get_conf r with + | C0 | C2 | C3 -> 0 + | C1 | C5 | C6 -> 1 + | C4 -> 2 + +let is_attribute g tag = + tag > 4 && (to_string g tag).[0] == '2' + +let dummy_param : 'a partial = Leaf (~-1,~-1, [||], Node.nil) + +(* +let rec start_skip g idx count = + if idx < Node.null then count else + let symbol = start_tag g idx in + if is_terminal g symbol then + let symbol = terminal symbol in + if symbol == nil_symbol then count else + let count = count+1 in + let fs = start_first_child g idx in + let countl = start_skip g fs count in + start_skip g fs countl + else + let nt = non_terminal symbol in + let rhs = get_rule g nt in + let nparam = get_rank rhs in + match nparam with + | 0 -> rule_skip g nt dummy_param dummy_param count + | 1 -> rule_skip g nt (Leaf(0,StateSet.empty, Node.nil,start_first_child g idx)) dummy_param count + | 2 -> + let fc = start_first_child g idx in + let ns = start_next_sibling g fc in + rule_skip g nt (Leaf (0,[||],fc)) (Leaf (1,[||],ns)) count + | _ -> assert false + +and rule_skip g t y0 y1 count = + let rhs = get_rule g t in + let id1 = get_id1 rhs in + let id2 = get_id2 rhs in + let conf = get_conf rhs in + if is_non_terminal g id1 then + let id1 = non_terminal id1 in + match conf with + | C0 ->rule_skip g id1 (Node0 id2) dummy_param count + | C1 -> rule_skip g id1 (Node1(id2,y0)) dummy_param count + | C2 -> rule_skip g id1 (Node0 id2) y0 count + | C3 -> rule_skip g id1 y0 (Node0 id2) count + | C4 -> rule_skip g id1 (Node2(id2, y0, y1)) dummy_param count + | C5 -> rule_skip g id1 (Node1(id2, y0)) y1 count + | C6 -> rule_skip g id1 y0 (Node1(id2, y1)) count + else + let id1 = terminal id1 in + match conf with + | C0 | C1 -> assert false + | C2 -> terminal_skip g id1 (Node0 id2) y0 count + | C3 -> terminal_skip g id1 y0 (Node0 id2) count + | C4 -> assert false + | C5 -> terminal_skip g id1 (Node1(id2, y0)) y1 count + | C6 -> terminal_skip g id1 y0 (Node1(id2, y1)) count + +and terminal_skip g (symbol : t_symbol) y0 y1 count = + if symbol == nil_symbol then count else + let count = count + 1 in + let countl = partial_skip g y0 count in + partial_skip g y1 countl + +and partial_skip g l count = + match l with + | Cache _ -> assert false + | Leaf (_,_,_, id) -> start_skip g id count + | Node0 id -> + if (terminal id) == nil_symbol then count + else + rule_skip g (non_terminal id) dummy_param dummy_param count + + | Node1 (id, y0) -> + rule_skip g (non_terminal id) y0 dummy_param count + + | Node2 (id, y0, y1) -> + + if is_terminal g id then + terminal_skip g (terminal id) y0 y1 count + else + rule_skip g (non_terminal id) y0 y1 count + + +let dispatch_param0 conf id2 y0 y1 = + match conf with + | C0 -> Node0 id2 + | C1 -> Node1(id2,y0) + | C2 -> Node0 id2 + | C3 -> Node0 id2 + | C4 -> Node2(id2, y0, y1) + | C5 -> Node1(id2, y0) + | C6 -> y0 + +let dispatch_param1 conf id2 y0 y1 = + match conf with + | C0 -> dummy_param + | C1 -> dummy_param + | C2 -> y0 + | C3 -> Node0 id2 + | C4 -> dummy_param + | C5 -> y1 + | C6 -> Node1(id2, y1) + +*)