--- /dev/null
+type t = {
+ start : Bp.t;
+ tags : int array;
+ rules : int array;
+ rules_offset : int;
+ tag_to_id : (string, int) Hashtbl.t;
+ tag_of_id : string array
+}
+
+
+
+module Parse =
+struct
+
+ let buffer = Buffer.create 512
+
+ let parse_tree cin open_tag close_tag =
+ let rec loop () =
+ let c = input_char cin in
+ match c with
+ '\n'| '>' -> ()
+ | ' ' | ',' | '-' -> loop ()
+ | 'a'..'z' | 'B'..'Z' | '0'..'9' | '_' ->
+ Buffer.clear buffer;
+ Buffer.add_char buffer c;
+ loop_tag false
+
+ | 'A' -> Buffer.clear buffer;
+ Buffer.add_char buffer c;
+ loop_tag true
+ | ')' -> close_tag (); loop ()
+ | _ -> failwith ("Invalid character: " ^ (String.make 1 c))
+
+ and loop_tag t =
+ let c = input_char cin in
+ match c with
+ | 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' ->
+ Buffer.add_char buffer c;
+ loop_tag t
+ | '(' -> let s = Buffer.contents buffer in
+ open_tag s t;
+ Buffer.clear buffer;
+ loop ()
+ | ' ' -> loop_tag t
+ | ',' | '-' -> let s = Buffer.contents buffer in
+ open_tag s t;
+ close_tag ();
+ Buffer.clear buffer;
+ loop ()
+ | ')' -> let s = Buffer.contents buffer in
+ open_tag s t;
+ Buffer.clear buffer;
+ close_tag ();
+ close_tag ();
+ loop ()
+ | _ -> failwith ("Invalid character: " ^ (String.make 1 c))
+ in
+ loop ()
+
+
+ let tag_info = Hashtbl.create 1023
+ let tag_of_id = Hashtbl.create 1023
+ let current_id = ref 4
+ let init() =
+ Hashtbl.clear tag_info;
+ Hashtbl.clear tag_of_id;
+ current_id := 4;
+ Hashtbl.add tag_info "_ROOT" (0, ~-1, false);
+ Hashtbl.add tag_info "_A" (1, ~-1, false);
+ Hashtbl.add tag_info "_T" (2, ~-1, false);
+ Hashtbl.add tag_info "_AT" (3, ~-1, false);
+ Hashtbl.add tag_info "_" (4, ~-1, false);
+ Hashtbl.add tag_of_id 0 "_ROOT";
+ Hashtbl.add tag_of_id 1 "_A";
+ Hashtbl.add tag_of_id 2 "_T";
+ Hashtbl.add tag_of_id 3 "_AT";
+ Hashtbl.add tag_of_id 4 "_"
+
+
+ let add_tag s nterm =
+ let id, count, nterm =
+ try Hashtbl.find tag_info s with
+ Not_found ->
+ incr current_id;
+ let id = !current_id in
+ Hashtbl.add tag_of_id id s;
+ (!current_id, ~-1, nterm || s = "START")
+ in
+ let r = id, count+1, nterm in
+ Hashtbl.replace tag_info s r;
+ r
+
+
+ type tree = Node of string * tree list
+
+ let parse_small_tree cin =
+ let stack = ref [ Node("", []) ] in
+ let open_tag s isnterm =
+ if s <> "y0" && s <> "y1" then ignore(add_tag s isnterm);
+ stack := Node(s, []) :: !stack
+ in
+ let close_tag () =
+ match !stack with
+ Node(t1, l1) :: Node(t2, l2) :: r ->
+ stack := Node(t2, Node(t1, List.rev l1)::l2) :: r
+ | _ -> assert false
+ in
+ parse_tree cin open_tag close_tag;
+ match !stack with
+ [ Node(_, [ l ]) ] -> l
+ | _ -> raise End_of_file
+
+ let parse_big_tree cin =
+ let bv = Bp.bitmap_create () in
+ let tags = IntArray.create () in
+ let open_tag s isnterm =
+ let id, _, _ = add_tag s isnterm in
+ Bp.bitmap_push_back bv 1;
+ IntArray.push_back tags id
+ in
+ let close_tag () =
+ Bp.bitmap_push_back bv 0
+ in
+ parse_tree cin open_tag close_tag;
+ Bp.create bv, IntArray.pack tags
+
+ let eat_char cin = ignore (input_char cin)
+
+ let h_find ?(msg="") h i =
+ try
+ Hashtbl.find h i
+ with
+ Not_found ->
+ let r = Obj.repr i in
+ if Obj.is_int r then Printf.eprintf "Not_found (%s): %i\n%!" msg (Obj.magic i);
+ if Obj.tag r = Obj.string_tag then Printf.eprintf "Not_found (%s): %s\n%!" msg (Obj.magic i);
+ raise Not_found
+ ;;
+
+ let parse cin =
+ let rules = Hashtbl.create 1023 in
+ init ();
+ (* START *)
+ ignore (parse_small_tree cin);
+ (* > *)
+ (* ignore (input_char cin); *)
+ let bv, tags = parse_big_tree cin in
+ let () =
+ try
+ while true do
+ let lhs = parse_small_tree cin in
+ let rhs = parse_small_tree cin in
+ Hashtbl.add rules lhs rhs
+ done;
+ with End_of_file -> ()
+ in
+ (* First, re-order the tags *)
+ let old_new_mapping =
+ Array.init (Hashtbl.length tag_of_id)
+ (fun i -> h_find ~msg:"1" tag_of_id i)
+ in
+ Array.fast_sort (fun tag1 tag2 ->
+ let t1, count1, isnterm1 =
+ h_find ~msg:"2" tag_info tag1
+ and t2, count2, isnterm2 =
+ h_find ~msg:"3" tag_info tag2
+ in
+ if t1 <= 4 && t2 <= 4 then compare t1 t2
+ else if t1 <= 4 then -1
+ else if t2 <= 4 then 1
+ else
+ if (not isnterm1) && (not isnterm2) then compare t1 t2
+ else if isnterm1 && isnterm2 then
+ match tag1, tag2 with
+ "START", "START" -> 0
+ | "START", _ -> ~-1
+ | _, "START" -> 1
+ | _ -> compare count2 count1
+ else if isnterm2 then -1
+ else 1) old_new_mapping;
+ let tag_to_id = Hashtbl.create 503 in
+ Array.iteri (fun i s ->
+ Hashtbl.add tag_to_id s i) old_new_mapping;
+ let renum_tags = Array.copy tags in
+ for i = 0 to Array.length tags - 1 do
+ renum_tags.(i) <-
+ h_find ~msg:"4" tag_to_id (h_find ~msg:"5" tag_of_id (tags.(i)))
+ done;
+ let r_array = Array.create (Hashtbl.length rules) 0 in
+ let rules_offset = h_find ~msg:"6" tag_to_id "START" + 1 in
+ let pos_id2 l =
+ let rec loop i l =
+ match l with
+ [] -> assert false
+ | Node(tag, children) :: ll ->
+ if tag <> "y0" && tag <> "y1" then
+ tag, i
+ else loop (i+1) ll
+ in
+ loop 1 l
+ in
+ Hashtbl.iter (fun lhs rhs ->
+ let Node( head, _ ) = lhs in
+ let Node( tag1, params) = rhs in
+ let tag2, pos2 = pos_id2 params in
+ let id1 = h_find ~msg:"7" tag_to_id tag1
+ and id2 = h_find ~msg:"8" tag_to_id tag2
+ in
+ let rule_ = id2 lsl 27 in
+ let rule_ = (rule_ lor id1) lsl 2 in
+ let rule_ = (rule_ lor pos2) lsl 2 in
+ let rule_ = rule_ lor (List.length params) in
+ r_array.((h_find ~msg:"9" tag_to_id head) - rules_offset ) <- rule_
+ ) rules;
+ let l = Array.length renum_tags in
+ let tag32 = Array32.create l 0 in
+ for i = 0 to l - 1 do
+ Array32.set tag32 i (renum_tags.(i) land 0x7ffffff);
+ done;
+ (* Remove the non-terminal names from the hash tables *)
+ let tag_to_id2 = Hashtbl.create 31 in
+ Hashtbl.iter (fun s i -> if i < rules_offset then Hashtbl.add tag_to_id2 s i)
+ tag_to_id;
+ { start = bv;
+ tags = tag32;
+ rules = renum_tags;
+ rules_offset = rules_offset;
+ tag_to_id = tag_to_id2;
+ tag_of_id = Array.sub old_new_mapping 0 rules_offset
+ }
+
+end
+
+let parse file =
+ let cin = open_in file in
+ let g = Parse.parse cin in
+ close_in cin;
+ g
+
+let _GRAMMAR_MAGIC = 0xaabbcc
+let _GRAMMAR_VERSION = 2
+
+let save g f =
+ let cout = open_out f in
+ let write a = Marshal.to_channel cout a [ ]
+ in
+ write _GRAMMAR_MAGIC;
+ write _GRAMMAR_VERSION;
+ write g.tags;
+ write g.rules;
+ write g.rules_offset;
+ write g.tag_to_id;
+ write g.tag_of_id;
+ flush cout;
+ let fd = Unix.descr_of_out_channel cout in
+ Bp.save g.start fd;
+ close_out cout
+
+let load f =
+ let cin = open_in f in
+ let pr_pos () =
+ Printf.eprintf "Position: %i kiB\n" (pos_in cin / 1024)
+ in
+ let read () = Marshal.from_channel cin in
+ if read () != _GRAMMAR_MAGIC then failwith "Invalid grammar file";
+ if read () != _GRAMMAR_VERSION then failwith "Deprecated grammar format";
+ pr_pos();
+ let tags : int array = read () in
+ pr_pos();
+ let rules : int array = read () in
+ pr_pos();
+ let rules_offset : int = read () in
+ pr_pos();
+ let tag_to_id : (string, int) Hashtbl.t = read () in
+ pr_pos();
+ let tag_of_id : string array = read () in
+ pr_pos();
+ let fd = Unix.descr_of_in_channel cin in
+ let pos = pos_in cin in
+ ignore(Unix.lseek fd pos Unix.SEEK_SET);
+ let bp = Bp.load fd in
+ close_in cin;
+ {
+ start = bp;
+ tags = tags;
+ rules = rules;
+ rules_offset = rules_offset;
+ tag_to_id = tag_to_id;
+ tag_of_id = tag_of_id;
+ }
+
+
+type node = [ `Grammar ] Node.t
+
+type p_type = [ `Parameter ]
+type n_type = [ `NonTerminal ]
+type t_type = [ `Terminal ]
+type any_type = [ p_type | n_type | t_type ]
+type symbol = [ any_type ] Node.t
+
+type p_symbol = p_type Node.t
+type n_symbol = n_type Node.t
+type t_symbol = t_type Node.t
+type tn_symbol = [ n_type | t_type ] Node.t
+
+
+let is_nil : (t:t_symbol) =
+ (Node.to_int t) == 4
+
+let nil_symbol : t_symbol =
+ (Node.of_int 4)
+
+let translate_tag _ t = if t == 4 then ~-1 else t
+let to_string t tag = tag_of_id.(Tag.to_int tag)
+let register_tag t tag =
+ try Hashtbl.find t.tag_to_id (Tag.to_int tag) with
+ Not_found -> 4
+
+let tag_operations t = {
+ Tag.tag = (fun s -> register_tag t s);
+ Tag.to_string = (fun s -> to_string t s);
+ Tag.translate = (fun s -> translate_tag t s);
+}
+
+
+let rhs_tag t idx =
+ t.tags.(Bp.preorder_rank t.start idx)
+
+let rhs_first_child t idx =
+ Bp.first_child t.start idx
+
+let rhs_next_sibling t idx =
+ Bp.next_sibling t.start idx
+
+let is_non_terminal t (n : [< any_type ] Node.t) =
+ let n = Node.to_int n in
+ n >= t.rules_offset
+
+let is_terminal t (n : [< any_type ] Node.t) = not(is_terminal t n)
+
+let tag (n : t_symbol) : Tag.t = Obj.magic n
+