+++ /dev/null
-type t = {
- start : Bp.t;
- tags : int array;
- rules : int array;
- rules_offset : int;
- tag_to_id : (string, int) Hashtbl.t;
- tag_of_id : string array
-}
-
-
-
-module Parse =
-struct
-
- let buffer = Buffer.create 512
-
- let parse_tree cin open_tag close_tag =
- let rec loop () =
- let c = input_char cin in
- match c with
- '\n'| '>' -> ()
- | ' ' | ',' | '-' -> loop ()
- | 'a'..'z' | 'B'..'Z' | '0'..'9' | '_' ->
- Buffer.clear buffer;
- Buffer.add_char buffer c;
- loop_tag false
-
- | 'A' -> Buffer.clear buffer;
- Buffer.add_char buffer c;
- loop_tag true
- | ')' -> close_tag (); loop ()
- | _ -> failwith ("Invalid character: " ^ (String.make 1 c))
-
- and loop_tag t =
- let c = input_char cin in
- match c with
- | 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' ->
- Buffer.add_char buffer c;
- loop_tag t
- | '(' -> let s = Buffer.contents buffer in
- open_tag s t;
- Buffer.clear buffer;
- loop ()
- | ' ' -> loop_tag t
- | ',' | '-' -> let s = Buffer.contents buffer in
- open_tag s t;
- close_tag ();
- Buffer.clear buffer;
- loop ()
- | ')' -> let s = Buffer.contents buffer in
- open_tag s t;
- Buffer.clear buffer;
- close_tag ();
- close_tag ();
- loop ()
- | _ -> failwith ("Invalid character: " ^ (String.make 1 c))
- in
- loop ()
-
-
- let tag_info = Hashtbl.create 1023
- let tag_of_id = Hashtbl.create 1023
- let current_id = ref 4
- let init() =
- Hashtbl.clear tag_info;
- Hashtbl.clear tag_of_id;
- current_id := 4;
- Hashtbl.add tag_info "_ROOT" (0, ~-1, false);
- Hashtbl.add tag_info "_A" (1, ~-1, false);
- Hashtbl.add tag_info "_T" (2, ~-1, false);
- Hashtbl.add tag_info "_AT" (3, ~-1, false);
- Hashtbl.add tag_info "_" (4, ~-1, false);
- Hashtbl.add tag_of_id 0 "_ROOT";
- Hashtbl.add tag_of_id 1 "_A";
- Hashtbl.add tag_of_id 2 "_T";
- Hashtbl.add tag_of_id 3 "_AT";
- Hashtbl.add tag_of_id 4 "_"
-
-
- let add_tag s nterm =
- let id, count, nterm =
- try Hashtbl.find tag_info s with
- Not_found ->
- incr current_id;
- let id = !current_id in
- Hashtbl.add tag_of_id id s;
- (!current_id, ~-1, nterm || s = "START")
- in
- let r = id, count+1, nterm in
- Hashtbl.replace tag_info s r;
- r
-
-
- type tree = Node of string * tree list
-
- let parse_small_tree cin =
- let stack = ref [ Node("", []) ] in
- let open_tag s isnterm =
- if s <> "y0" && s <> "y1" then ignore(add_tag s isnterm);
- stack := Node(s, []) :: !stack
- in
- let close_tag () =
- match !stack with
- Node(t1, l1) :: Node(t2, l2) :: r ->
- stack := Node(t2, Node(t1, List.rev l1)::l2) :: r
- | _ -> assert false
- in
- parse_tree cin open_tag close_tag;
- match !stack with
- [ Node(_, [ l ]) ] -> l
- | _ -> raise End_of_file
-
- let parse_big_tree cin =
- let bv = Bp.bitmap_create () in
- let tags = IntArray.create () in
- let open_tag s isnterm =
- let id, _, _ = add_tag s isnterm in
- Bp.bitmap_push_back bv 1;
- IntArray.push_back tags id
- in
- let close_tag () =
- Bp.bitmap_push_back bv 0
- in
- parse_tree cin open_tag close_tag;
- Bp.create bv, IntArray.pack tags
-
- let eat_char cin = ignore (input_char cin)
-
- let h_find ?(msg="") h i =
- try
- Hashtbl.find h i
- with
- Not_found ->
- let r = Obj.repr i in
- if Obj.is_int r then Printf.eprintf "Not_found (%s): %i\n%!" msg (Obj.magic i);
- if Obj.tag r = Obj.string_tag then Printf.eprintf "Not_found (%s): %s\n%!" msg (Obj.magic i);
- raise Not_found
- ;;
-
- let parse cin =
- let rules = Hashtbl.create 1023 in
- init ();
- (* START *)
- ignore (parse_small_tree cin);
- (* > *)
- (* ignore (input_char cin); *)
- let bv, tags = parse_big_tree cin in
- let () =
- try
- while true do
- let lhs = parse_small_tree cin in
- let rhs = parse_small_tree cin in
- Hashtbl.add rules lhs rhs
- done;
- with End_of_file -> ()
- in
- (* First, re-order the tags *)
- let old_new_mapping =
- Array.init (Hashtbl.length tag_of_id)
- (fun i -> h_find ~msg:"1" tag_of_id i)
- in
- Array.fast_sort (fun tag1 tag2 ->
- let t1, count1, isnterm1 =
- h_find ~msg:"2" tag_info tag1
- and t2, count2, isnterm2 =
- h_find ~msg:"3" tag_info tag2
- in
- if t1 <= 4 && t2 <= 4 then compare t1 t2
- else if t1 <= 4 then -1
- else if t2 <= 4 then 1
- else
- if (not isnterm1) && (not isnterm2) then compare t1 t2
- else if isnterm1 && isnterm2 then
- match tag1, tag2 with
- "START", "START" -> 0
- | "START", _ -> ~-1
- | _, "START" -> 1
- | _ -> compare count2 count1
- else if isnterm2 then -1
- else 1) old_new_mapping;
- let tag_to_id = Hashtbl.create 503 in
- Array.iteri (fun i s ->
- Hashtbl.add tag_to_id s i) old_new_mapping;
- let renum_tags = Array.copy tags in
- for i = 0 to Array.length tags - 1 do
- renum_tags.(i) <-
- h_find ~msg:"4" tag_to_id (h_find ~msg:"5" tag_of_id (tags.(i)))
- done;
- let r_array = Array.create (Hashtbl.length rules) 0 in
- let rules_offset = h_find ~msg:"6" tag_to_id "START" + 1 in
- let pos_id2 l =
- let rec loop i l =
- match l with
- [] -> assert false
- | Node(tag, children) :: ll ->
- if tag <> "y0" && tag <> "y1" then
- tag, i
- else loop (i+1) ll
- in
- loop 1 l
- in
- Hashtbl.iter (fun lhs rhs ->
- let Node( head, args ) = lhs in
- let Node( tag1, params) = rhs in
- let tag2, pos2 = pos_id2 params in
- let id1 = h_find ~msg:"7" tag_to_id tag1
- and id2 = h_find ~msg:"8" tag_to_id tag2 in
- let conf =
- if List.length args = 0 then 0
- else
- if List.length args = 1 then
- if List.length params = 1 then 1
- else if pos2 = 1 then 2
- else 3
- else (* 2 parameters *)
- if List.length params = 1 then 4
- else if pos2 = 1 then 5
- else 6
- in
- let rule_ = id2 lsl 27 in
- let rule_ = (rule_ lor id1) lsl 3 in
- let rule_ = rule_ lor conf in
- r_array.((h_find ~msg:"9" tag_to_id head) - rules_offset ) <- rule_
- ) rules;
- (*let l = Array.length renum_tags in *)
- (*let tag32 = Array32.create l 0 in
- for i = 0 to l - 1 do
- Array32.set tag32 i (renum_tags.(i) land 0x7ffffff);
- done; *)
- (* Remove the non-terminal names from the hash tables *)
- let tag_to_id2 = Hashtbl.create 31 in
- Hashtbl.iter (fun s i -> if i < rules_offset then Hashtbl.add tag_to_id2 s i)
- tag_to_id;
- { start = bv;
- tags = renum_tags;
- rules = r_array;
- rules_offset = rules_offset;
- tag_to_id = tag_to_id2;
- tag_of_id = Array.sub old_new_mapping 0 rules_offset
- }
-
-end
-
-let parse file =
- let cin = open_in file in
- let g = Parse.parse cin in
- close_in cin;
- g
-
-let _GRAMMAR_MAGIC = 0xaabbcc
-let _GRAMMAR_VERSION = 3
-
-let save g f =
- let cout = open_out f in
- let write a = Marshal.to_channel cout a [ ]
- in
- write _GRAMMAR_MAGIC;
- write _GRAMMAR_VERSION;
- write g.tags;
- write g.rules;
- write g.rules_offset;
- write g.tag_to_id;
- write g.tag_of_id;
- flush cout;
- let fd = Unix.descr_of_out_channel cout in
- Bp.save g.start fd;
- close_out cout
-
-let load f =
- let cin = open_in f in
- let read () = Marshal.from_channel cin in
- if read () != _GRAMMAR_MAGIC then failwith "Invalid grammar file";
- if read () != _GRAMMAR_VERSION then failwith "Deprecated grammar format";
- let tags : int array = read () in
- let rules : int array = read () in
- let rules_offset : int = read () in
- let tag_to_id : (string, int) Hashtbl.t = read () in
- let tag_of_id : string array = read () in
- let fd = Unix.descr_of_in_channel cin in
- let pos = pos_in cin in
- ignore(Unix.lseek fd pos Unix.SEEK_SET);
- let bp = Bp.load fd in
- close_in cin;
- let g = {
- start = bp;
- tags = tags;
- rules = rules;
- rules_offset = rules_offset;
- tag_to_id = tag_to_id;
- tag_of_id = tag_of_id;
- } in
- Printf.eprintf "Grammar size:%i kb\n%!"
- ((Ocaml.size_b g + Bp.alloc_stats ())/1024);
- g
-
-
-type node = [ `Start ] Node.t
-
-type n_type = [ `NonTerminal ]
-type t_type = [ `Terminal ]
-type r_type = [ `Rule ]
-type any_type = [ n_type | t_type ]
-type rhs = [ r_type ] Node.t
-
-type n_symbol = n_type Node.t
-type t_symbol = t_type Node.t
-type tn_symbol = [ any_type ] Node.t
-
-
-type 'a partial =
- | Cache of 'a
- | Leaf of int*int * StateSet.t array * node
- | Node0 of tn_symbol (* No parameters *)
- | Node1 of tn_symbol * 'a partial
- | Node2 of tn_symbol * 'a partial * 'a partial
-
-
-let is_nil (t : t_symbol) =
- (Node.to_int t) == 4
-
-let nil_symbol : t_symbol =
- (Node.of_int 4)
-
-let translate_tag _ t = if t == 4 then ~-1 else t
-let to_string t tag =
- if tag < Array.length t.tag_of_id then t.tag_of_id.(Tag.to_int tag)
- else "<!INVALID TAG!>"
-
-let register_tag t s =
- try Hashtbl.find t.tag_to_id s with
- Not_found -> 4
-
-let tag_operations t = {
- Tag.tag = (fun s -> register_tag t s);
- Tag.to_string = (fun s -> to_string t s);
- Tag.translate = (fun s -> translate_tag t s);
-}
-
-let start_root : node = Node.of_int 0
-let start_tag g (idx : node) : [<any_type] Node.t =
- Node.of_int (g.tags.(Bp.preorder_rank g.start (Node.to_int idx)))
-
-;;
-
-let start_first_child t (idx : node) =
- Node.of_int (Bp.first_child t.start (Node.to_int idx))
-
-let start_next_sibling t (idx : node) =
- Node.of_int (Bp.next_sibling t.start (Node.to_int idx))
-
-let is_non_terminal t (n : [< any_type ] Node.t) =
- let n = Node.to_int n in
- n >= t.rules_offset
-
-let is_terminal t (n : [< any_type ] Node.t) = not(is_non_terminal t n)
-
-external terminal : [< any_type ] Node.t -> t_symbol = "%identity"
-external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity"
-
-
-let tag (n : t_symbol) : Tag.t = Obj.magic n
-
-let get_rule g (r : n_symbol) : rhs =
- Node.of_int (g.rules.((Node.to_int r) - g.rules_offset))
-
-let get_id1 (r : rhs) : tn_symbol =
- Node.of_int(((Node.to_int r) lsr 3) land 0x7ffffff)
-
-let get_id2 (r : rhs) : tn_symbol =
- Node.of_int((Node.to_int r) lsr 30)
-
-type conf = | C0 (* B(C) *)
- | C1 (* B(C(y0)) *)
- | C2 (* B(C, y0) *)
- | C3 (* B(y0, C) *)
- | C4 (* B(C(y0, y1)) *)
- | C5 (* B(C(y0), y1) *)
- | C6 (* B(y0, C(y1)) *)
-
-let get_conf (r : rhs) : conf =
- (Obj.magic ((Node.to_int r) land 0b111))
-
-
-let get_rank (r : rhs) : int =
- match get_conf r with
- | C0 -> 0
- | C1 | C2 | C3 -> 1
- | C4 | C5 | C6 -> 2
-
-let get_id1_rank (r : rhs) : int =
- match get_conf r with
- | C0 | C1 | C4 -> 1
- | _ -> 2
-
-let get_id2_pos (r : rhs) : int =
- match get_conf r with
- | C0 | C1 |C2 | C4 | C5 -> 1
- | _ -> 2
-
-let get_id2_rank (r : rhs) : int =
- match get_conf r with
- | C0 | C2 | C3 -> 0
- | C1 | C5 | C6 -> 1
- | C4 -> 2
-
-let is_attribute g tag =
- tag > 4 && (to_string g tag).[0] == '2'
-
-let dummy_param : 'a partial = Leaf (~-1,~-1, [||], Node.nil)
-
-(*
-let rec start_skip g idx count =
- if idx < Node.null then count else
- let symbol = start_tag g idx in
- if is_terminal g symbol then
- let symbol = terminal symbol in
- if symbol == nil_symbol then count else
- let count = count+1 in
- let fs = start_first_child g idx in
- let countl = start_skip g fs count in
- start_skip g fs countl
- else
- let nt = non_terminal symbol in
- let rhs = get_rule g nt in
- let nparam = get_rank rhs in
- match nparam with
- | 0 -> rule_skip g nt dummy_param dummy_param count
- | 1 -> rule_skip g nt (Leaf(0,StateSet.empty, Node.nil,start_first_child g idx)) dummy_param count
- | 2 ->
- let fc = start_first_child g idx in
- let ns = start_next_sibling g fc in
- rule_skip g nt (Leaf (0,[||],fc)) (Leaf (1,[||],ns)) count
- | _ -> assert false
-
-and rule_skip g t y0 y1 count =
- let rhs = get_rule g t in
- let id1 = get_id1 rhs in
- let id2 = get_id2 rhs in
- let conf = get_conf rhs in
- if is_non_terminal g id1 then
- let id1 = non_terminal id1 in
- match conf with
- | C0 ->rule_skip g id1 (Node0 id2) dummy_param count
- | C1 -> rule_skip g id1 (Node1(id2,y0)) dummy_param count
- | C2 -> rule_skip g id1 (Node0 id2) y0 count
- | C3 -> rule_skip g id1 y0 (Node0 id2) count
- | C4 -> rule_skip g id1 (Node2(id2, y0, y1)) dummy_param count
- | C5 -> rule_skip g id1 (Node1(id2, y0)) y1 count
- | C6 -> rule_skip g id1 y0 (Node1(id2, y1)) count
- else
- let id1 = terminal id1 in
- match conf with
- | C0 | C1 -> assert false
- | C2 -> terminal_skip g id1 (Node0 id2) y0 count
- | C3 -> terminal_skip g id1 y0 (Node0 id2) count
- | C4 -> assert false
- | C5 -> terminal_skip g id1 (Node1(id2, y0)) y1 count
- | C6 -> terminal_skip g id1 y0 (Node1(id2, y1)) count
-
-and terminal_skip g (symbol : t_symbol) y0 y1 count =
- if symbol == nil_symbol then count else
- let count = count + 1 in
- let countl = partial_skip g y0 count in
- partial_skip g y1 countl
-
-and partial_skip g l count =
- match l with
- | Cache _ -> assert false
- | Leaf (_,_,_, id) -> start_skip g id count
- | Node0 id ->
- if (terminal id) == nil_symbol then count
- else
- rule_skip g (non_terminal id) dummy_param dummy_param count
-
- | Node1 (id, y0) ->
- rule_skip g (non_terminal id) y0 dummy_param count
-
- | Node2 (id, y0, y1) ->
-
- if is_terminal g id then
- terminal_skip g (terminal id) y0 y1 count
- else
- rule_skip g (non_terminal id) y0 y1 count
-
-
-let dispatch_param0 conf id2 y0 y1 =
- match conf with
- | C0 -> Node0 id2
- | C1 -> Node1(id2,y0)
- | C2 -> Node0 id2
- | C3 -> Node0 id2
- | C4 -> Node2(id2, y0, y1)
- | C5 -> Node1(id2, y0)
- | C6 -> y0
-
-let dispatch_param1 conf id2 y0 y1 =
- match conf with
- | C0 -> dummy_param
- | C1 -> dummy_param
- | C2 -> y0
- | C3 -> Node0 id2
- | C4 -> dummy_param
- | C5 -> y1
- | C6 -> Node1(id2, y1)
-
-*)