From: Kim Nguyễn Date: Tue, 24 Jul 2012 15:33:03 +0000 (+0200) Subject: Remove all traces of Tom's Grammar. X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2Fxpathcomp.git;a=commitdiff_plain;h=a05cf380a8b16c742dbb1e005e96600e6f727436 Remove all traces of Tom's Grammar. --- diff --git a/src/grammar2.ml b/src/grammar2.ml deleted file mode 100644 index ca21d0a..0000000 --- a/src/grammar2.ml +++ /dev/null @@ -1,506 +0,0 @@ -type t = { - start : Bp.t; - tags : int array; - rules : int array; - rules_offset : int; - tag_to_id : (string, int) Hashtbl.t; - tag_of_id : string array -} - - - -module Parse = -struct - - let buffer = Buffer.create 512 - - let parse_tree cin open_tag close_tag = - let rec loop () = - let c = input_char cin in - match c with - '\n'| '>' -> () - | ' ' | ',' | '-' -> loop () - | 'a'..'z' | 'B'..'Z' | '0'..'9' | '_' -> - Buffer.clear buffer; - Buffer.add_char buffer c; - loop_tag false - - | 'A' -> Buffer.clear buffer; - Buffer.add_char buffer c; - loop_tag true - | ')' -> close_tag (); loop () - | _ -> failwith ("Invalid character: " ^ (String.make 1 c)) - - and loop_tag t = - let c = input_char cin in - match c with - | 'a'..'z' | 'A'..'Z' | '0'..'9' | '_' -> - Buffer.add_char buffer c; - loop_tag t - | '(' -> let s = Buffer.contents buffer in - open_tag s t; - Buffer.clear buffer; - loop () - | ' ' -> loop_tag t - | ',' | '-' -> let s = Buffer.contents buffer in - open_tag s t; - close_tag (); - Buffer.clear buffer; - loop () - | ')' -> let s = Buffer.contents buffer in - open_tag s t; - Buffer.clear buffer; - close_tag (); - close_tag (); - loop () - | _ -> failwith ("Invalid character: " ^ (String.make 1 c)) - in - loop () - - - let tag_info = Hashtbl.create 1023 - let tag_of_id = Hashtbl.create 1023 - let current_id = ref 4 - let init() = - Hashtbl.clear tag_info; - Hashtbl.clear tag_of_id; - current_id := 4; - Hashtbl.add tag_info "_ROOT" (0, ~-1, false); - Hashtbl.add tag_info "_A" (1, ~-1, false); - Hashtbl.add tag_info "_T" (2, ~-1, false); - Hashtbl.add tag_info "_AT" (3, ~-1, false); - Hashtbl.add tag_info "_" (4, ~-1, false); - Hashtbl.add tag_of_id 0 "_ROOT"; - Hashtbl.add tag_of_id 1 "_A"; - Hashtbl.add tag_of_id 2 "_T"; - Hashtbl.add tag_of_id 3 "_AT"; - Hashtbl.add tag_of_id 4 "_" - - - let add_tag s nterm = - let id, count, nterm = - try Hashtbl.find tag_info s with - Not_found -> - incr current_id; - let id = !current_id in - Hashtbl.add tag_of_id id s; - (!current_id, ~-1, nterm || s = "START") - in - let r = id, count+1, nterm in - Hashtbl.replace tag_info s r; - r - - - type tree = Node of string * tree list - - let parse_small_tree cin = - let stack = ref [ Node("", []) ] in - let open_tag s isnterm = - if s <> "y0" && s <> "y1" then ignore(add_tag s isnterm); - stack := Node(s, []) :: !stack - in - let close_tag () = - match !stack with - Node(t1, l1) :: Node(t2, l2) :: r -> - stack := Node(t2, Node(t1, List.rev l1)::l2) :: r - | _ -> assert false - in - parse_tree cin open_tag close_tag; - match !stack with - [ Node(_, [ l ]) ] -> l - | _ -> raise End_of_file - - let parse_big_tree cin = - let bv = Bp.bitmap_create () in - let tags = IntArray.create () in - let open_tag s isnterm = - let id, _, _ = add_tag s isnterm in - Bp.bitmap_push_back bv 1; - IntArray.push_back tags id - in - let close_tag () = - Bp.bitmap_push_back bv 0 - in - parse_tree cin open_tag close_tag; - Bp.create bv, IntArray.pack tags - - let eat_char cin = ignore (input_char cin) - - let h_find ?(msg="") h i = - try - Hashtbl.find h i - with - Not_found -> - let r = Obj.repr i in - if Obj.is_int r then Printf.eprintf "Not_found (%s): %i\n%!" msg (Obj.magic i); - if Obj.tag r = Obj.string_tag then Printf.eprintf "Not_found (%s): %s\n%!" msg (Obj.magic i); - raise Not_found - ;; - - let parse cin = - let rules = Hashtbl.create 1023 in - init (); - (* START *) - ignore (parse_small_tree cin); - (* > *) - (* ignore (input_char cin); *) - let bv, tags = parse_big_tree cin in - let () = - try - while true do - let lhs = parse_small_tree cin in - let rhs = parse_small_tree cin in - Hashtbl.add rules lhs rhs - done; - with End_of_file -> () - in - (* First, re-order the tags *) - let old_new_mapping = - Array.init (Hashtbl.length tag_of_id) - (fun i -> h_find ~msg:"1" tag_of_id i) - in - Array.fast_sort (fun tag1 tag2 -> - let t1, count1, isnterm1 = - h_find ~msg:"2" tag_info tag1 - and t2, count2, isnterm2 = - h_find ~msg:"3" tag_info tag2 - in - if t1 <= 4 && t2 <= 4 then compare t1 t2 - else if t1 <= 4 then -1 - else if t2 <= 4 then 1 - else - if (not isnterm1) && (not isnterm2) then compare t1 t2 - else if isnterm1 && isnterm2 then - match tag1, tag2 with - "START", "START" -> 0 - | "START", _ -> ~-1 - | _, "START" -> 1 - | _ -> compare count2 count1 - else if isnterm2 then -1 - else 1) old_new_mapping; - let tag_to_id = Hashtbl.create 503 in - Array.iteri (fun i s -> - Hashtbl.add tag_to_id s i) old_new_mapping; - let renum_tags = Array.copy tags in - for i = 0 to Array.length tags - 1 do - renum_tags.(i) <- - h_find ~msg:"4" tag_to_id (h_find ~msg:"5" tag_of_id (tags.(i))) - done; - let r_array = Array.create (Hashtbl.length rules) 0 in - let rules_offset = h_find ~msg:"6" tag_to_id "START" + 1 in - let pos_id2 l = - let rec loop i l = - match l with - [] -> assert false - | Node(tag, children) :: ll -> - if tag <> "y0" && tag <> "y1" then - tag, i - else loop (i+1) ll - in - loop 1 l - in - Hashtbl.iter (fun lhs rhs -> - let Node( head, args ) = lhs in - let Node( tag1, params) = rhs in - let tag2, pos2 = pos_id2 params in - let id1 = h_find ~msg:"7" tag_to_id tag1 - and id2 = h_find ~msg:"8" tag_to_id tag2 in - let conf = - if List.length args = 0 then 0 - else - if List.length args = 1 then - if List.length params = 1 then 1 - else if pos2 = 1 then 2 - else 3 - else (* 2 parameters *) - if List.length params = 1 then 4 - else if pos2 = 1 then 5 - else 6 - in - let rule_ = id2 lsl 27 in - let rule_ = (rule_ lor id1) lsl 3 in - let rule_ = rule_ lor conf in - r_array.((h_find ~msg:"9" tag_to_id head) - rules_offset ) <- rule_ - ) rules; - (*let l = Array.length renum_tags in *) - (*let tag32 = Array32.create l 0 in - for i = 0 to l - 1 do - Array32.set tag32 i (renum_tags.(i) land 0x7ffffff); - done; *) - (* Remove the non-terminal names from the hash tables *) - let tag_to_id2 = Hashtbl.create 31 in - Hashtbl.iter (fun s i -> if i < rules_offset then Hashtbl.add tag_to_id2 s i) - tag_to_id; - { start = bv; - tags = renum_tags; - rules = r_array; - rules_offset = rules_offset; - tag_to_id = tag_to_id2; - tag_of_id = Array.sub old_new_mapping 0 rules_offset - } - -end - -let parse file = - let cin = open_in file in - let g = Parse.parse cin in - close_in cin; - g - -let _GRAMMAR_MAGIC = 0xaabbcc -let _GRAMMAR_VERSION = 3 - -let save g f = - let cout = open_out f in - let write a = Marshal.to_channel cout a [ ] - in - write _GRAMMAR_MAGIC; - write _GRAMMAR_VERSION; - write g.tags; - write g.rules; - write g.rules_offset; - write g.tag_to_id; - write g.tag_of_id; - flush cout; - let fd = Unix.descr_of_out_channel cout in - Bp.save g.start fd; - close_out cout - -let load f = - let cin = open_in f in - let read () = Marshal.from_channel cin in - if read () != _GRAMMAR_MAGIC then failwith "Invalid grammar file"; - if read () != _GRAMMAR_VERSION then failwith "Deprecated grammar format"; - let tags : int array = read () in - let rules : int array = read () in - let rules_offset : int = read () in - let tag_to_id : (string, int) Hashtbl.t = read () in - let tag_of_id : string array = read () in - let fd = Unix.descr_of_in_channel cin in - let pos = pos_in cin in - ignore(Unix.lseek fd pos Unix.SEEK_SET); - let bp = Bp.load fd in - close_in cin; - let g = { - start = bp; - tags = tags; - rules = rules; - rules_offset = rules_offset; - tag_to_id = tag_to_id; - tag_of_id = tag_of_id; - } in - Printf.eprintf "Grammar size:%i kb\n%!" - ((Ocaml.size_b g + Bp.alloc_stats ())/1024); - g - - -type node = [ `Start ] Node.t - -type n_type = [ `NonTerminal ] -type t_type = [ `Terminal ] -type r_type = [ `Rule ] -type any_type = [ n_type | t_type ] -type rhs = [ r_type ] Node.t - -type n_symbol = n_type Node.t -type t_symbol = t_type Node.t -type tn_symbol = [ any_type ] Node.t - - -type 'a partial = - | Cache of 'a - | Leaf of int*int * StateSet.t array * node - | Node0 of tn_symbol (* No parameters *) - | Node1 of tn_symbol * 'a partial - | Node2 of tn_symbol * 'a partial * 'a partial - - -let is_nil (t : t_symbol) = - (Node.to_int t) == 4 - -let nil_symbol : t_symbol = - (Node.of_int 4) - -let translate_tag _ t = if t == 4 then ~-1 else t -let to_string t tag = - if tag < Array.length t.tag_of_id then t.tag_of_id.(Tag.to_int tag) - else "" - -let register_tag t s = - try Hashtbl.find t.tag_to_id s with - Not_found -> 4 - -let tag_operations t = { - Tag.tag = (fun s -> register_tag t s); - Tag.to_string = (fun s -> to_string t s); - Tag.translate = (fun s -> translate_tag t s); -} - -let start_root : node = Node.of_int 0 -let start_tag g (idx : node) : [= t.rules_offset - -let is_terminal t (n : [< any_type ] Node.t) = not(is_non_terminal t n) - -external terminal : [< any_type ] Node.t -> t_symbol = "%identity" -external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity" - - -let tag (n : t_symbol) : Tag.t = Obj.magic n - -let get_rule g (r : n_symbol) : rhs = - Node.of_int (g.rules.((Node.to_int r) - g.rules_offset)) - -let get_id1 (r : rhs) : tn_symbol = - Node.of_int(((Node.to_int r) lsr 3) land 0x7ffffff) - -let get_id2 (r : rhs) : tn_symbol = - Node.of_int((Node.to_int r) lsr 30) - -type conf = | C0 (* B(C) *) - | C1 (* B(C(y0)) *) - | C2 (* B(C, y0) *) - | C3 (* B(y0, C) *) - | C4 (* B(C(y0, y1)) *) - | C5 (* B(C(y0), y1) *) - | C6 (* B(y0, C(y1)) *) - -let get_conf (r : rhs) : conf = - (Obj.magic ((Node.to_int r) land 0b111)) - - -let get_rank (r : rhs) : int = - match get_conf r with - | C0 -> 0 - | C1 | C2 | C3 -> 1 - | C4 | C5 | C6 -> 2 - -let get_id1_rank (r : rhs) : int = - match get_conf r with - | C0 | C1 | C4 -> 1 - | _ -> 2 - -let get_id2_pos (r : rhs) : int = - match get_conf r with - | C0 | C1 |C2 | C4 | C5 -> 1 - | _ -> 2 - -let get_id2_rank (r : rhs) : int = - match get_conf r with - | C0 | C2 | C3 -> 0 - | C1 | C5 | C6 -> 1 - | C4 -> 2 - -let is_attribute g tag = - tag > 4 && (to_string g tag).[0] == '2' - -let dummy_param : 'a partial = Leaf (~-1,~-1, [||], Node.nil) - -(* -let rec start_skip g idx count = - if idx < Node.null then count else - let symbol = start_tag g idx in - if is_terminal g symbol then - let symbol = terminal symbol in - if symbol == nil_symbol then count else - let count = count+1 in - let fs = start_first_child g idx in - let countl = start_skip g fs count in - start_skip g fs countl - else - let nt = non_terminal symbol in - let rhs = get_rule g nt in - let nparam = get_rank rhs in - match nparam with - | 0 -> rule_skip g nt dummy_param dummy_param count - | 1 -> rule_skip g nt (Leaf(0,StateSet.empty, Node.nil,start_first_child g idx)) dummy_param count - | 2 -> - let fc = start_first_child g idx in - let ns = start_next_sibling g fc in - rule_skip g nt (Leaf (0,[||],fc)) (Leaf (1,[||],ns)) count - | _ -> assert false - -and rule_skip g t y0 y1 count = - let rhs = get_rule g t in - let id1 = get_id1 rhs in - let id2 = get_id2 rhs in - let conf = get_conf rhs in - if is_non_terminal g id1 then - let id1 = non_terminal id1 in - match conf with - | C0 ->rule_skip g id1 (Node0 id2) dummy_param count - | C1 -> rule_skip g id1 (Node1(id2,y0)) dummy_param count - | C2 -> rule_skip g id1 (Node0 id2) y0 count - | C3 -> rule_skip g id1 y0 (Node0 id2) count - | C4 -> rule_skip g id1 (Node2(id2, y0, y1)) dummy_param count - | C5 -> rule_skip g id1 (Node1(id2, y0)) y1 count - | C6 -> rule_skip g id1 y0 (Node1(id2, y1)) count - else - let id1 = terminal id1 in - match conf with - | C0 | C1 -> assert false - | C2 -> terminal_skip g id1 (Node0 id2) y0 count - | C3 -> terminal_skip g id1 y0 (Node0 id2) count - | C4 -> assert false - | C5 -> terminal_skip g id1 (Node1(id2, y0)) y1 count - | C6 -> terminal_skip g id1 y0 (Node1(id2, y1)) count - -and terminal_skip g (symbol : t_symbol) y0 y1 count = - if symbol == nil_symbol then count else - let count = count + 1 in - let countl = partial_skip g y0 count in - partial_skip g y1 countl - -and partial_skip g l count = - match l with - | Cache _ -> assert false - | Leaf (_,_,_, id) -> start_skip g id count - | Node0 id -> - if (terminal id) == nil_symbol then count - else - rule_skip g (non_terminal id) dummy_param dummy_param count - - | Node1 (id, y0) -> - rule_skip g (non_terminal id) y0 dummy_param count - - | Node2 (id, y0, y1) -> - - if is_terminal g id then - terminal_skip g (terminal id) y0 y1 count - else - rule_skip g (non_terminal id) y0 y1 count - - -let dispatch_param0 conf id2 y0 y1 = - match conf with - | C0 -> Node0 id2 - | C1 -> Node1(id2,y0) - | C2 -> Node0 id2 - | C3 -> Node0 id2 - | C4 -> Node2(id2, y0, y1) - | C5 -> Node1(id2, y0) - | C6 -> y0 - -let dispatch_param1 conf id2 y0 y1 = - match conf with - | C0 -> dummy_param - | C1 -> dummy_param - | C2 -> y0 - | C3 -> Node0 id2 - | C4 -> dummy_param - | C5 -> y1 - | C6 -> Node1(id2, y1) - -*) diff --git a/src/grammar2.mli b/src/grammar2.mli deleted file mode 100644 index c884ff1..0000000 --- a/src/grammar2.mli +++ /dev/null @@ -1,56 +0,0 @@ -type t - -type node = [ `Start ] Node.t - -type n_type = [ `NonTerminal ] -type t_type = [ `Terminal ] -type r_type = [ `Rule ] -type any_type = [ n_type | t_type ] -type rhs = [ r_type ] Node.t - -type n_symbol = n_type Node.t -type t_symbol = t_type Node.t -type tn_symbol = [ any_type ] Node.t - -type 'a partial = - | Cache of 'a - | Leaf of int*int * StateSet.t array * node - | Node0 of tn_symbol (* No parameters *) - | Node1 of tn_symbol * 'a partial - | Node2 of tn_symbol * 'a partial * 'a partial - -type conf = C0 | C1 | C2 | C3 | C4 | C5 | C6 - -val parse : string -> t -val save : t -> string -> unit -val load : string -> t -val tag_operations : t -> Tag.operations - -val start_tag : t -> node -> tn_symbol -val is_terminal : t -> [< any_type ] Node.t -> bool -val is_non_terminal : t -> [< any_type ] Node.t -> bool -external terminal : [< any_type ] Node.t -> t_symbol = "%identity" -external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity" -val nil_symbol : t_symbol -val tag : t_symbol -> Tag.t -val start_first_child : t -> node -> node -val start_next_sibling : t -> node -> node -val get_rule : t -> n_symbol -> rhs -val get_conf : rhs -> conf -val get_rank : rhs -> int -val get_id1_rank : rhs -> int -val get_id2_rank : rhs -> int -val get_id2_pos : rhs -> int -val get_id1 : rhs -> tn_symbol -val get_id2 : rhs -> tn_symbol - -val is_attribute : t -> Tag.t -> bool -(* -val start_skip : t -> node -> int -> int -val rule_skip : t -> n_symbol -> 'a partial -> 'a partial -> int -> int -*) -val dummy_param : 'a partial -(* -val dispatch_param0 : conf -> tn_symbol -> 'a partial -> 'a partial -> 'a partial -val dispatch_param1 : conf -> tn_symbol -> 'a partial -> 'a partial -> 'a partial -*) diff --git a/src/logger.ml b/src/logger.ml index e113010..7796b73 100644 --- a/src/logger.ml +++ b/src/logger.ml @@ -8,7 +8,6 @@ let loggers = [ "top-down-run"; "result-set"; "level2-jit"; "res-jit"; - "grammar"; "twopass"; "transition"; "bottom-up" ] diff --git a/src/main.ml b/src/main.ml index 8e0e130..0fbebad 100644 --- a/src/main.ml +++ b/src/main.ml @@ -86,55 +86,8 @@ let () = Options.parse_cmdline() let _ = try Printexc.record_backtrace true; - let document = - if Filename.check_suffix !Options.input_file ".g.bin" || - Filename.check_suffix !Options.input_file ".g" - then - let is_index = Filename.check_suffix !Options.input_file ".g.bin" in - let g = - if is_index then - time ~msg:"Loading grammar" (Grammar2.load) !Options.input_file - else - let g = time ~msg:"Parsing grammar file" Grammar2.parse !Options.input_file in - if !Options.save_file <> "" then - time ~msg:"Saving index" (Grammar2.save g) !Options.save_file; - g - in - begin - (* TODO Factorise with main *) - Tag.init (Grammar2.tag_operations g); - let query = - time ~msg:"Parsing query" XPath.parse !Options.query - in - if !Options.verbose then begin - Printf.eprintf "Parsed query:\n%!"; - XPath.Ast.print Format.err_formatter query; - Format.fprintf Format.err_formatter "\n%!" - end; - let auto, bu_info = - time ~msg:"Compiling query" (Compile.compile) query - in - if !Options.verbose then Ata.print Format.err_formatter auto; - Gc.full_major(); - Gc.compact(); - Gc.set (tuned_gc); - let runtime = - if !Options.count_only then - let module R = ResJIT.Make(NodeSet.Partial(NodeSet.Count)) in - let module M = Runtime.Make(R) in - (* mk_runtime run auto doc arg count print outfile *) - mk_runtime M.grammar_run auto (Obj.magic g) () R.NS.length (Obj.magic R.NS.serialize) None - else - let module R = ResJIT.Mat in - let module M = Runtime.Make(R) in - (* mk_runtime run auto doc arg count print outfile *) - mk_runtime M.grammar_run auto (Obj.magic g) () R.NS.length (Obj.magic R.NS.serialize) None - in - runtime (); - exit 0 - end - else if Filename.check_suffix !Options.input_file ".srx" + if Filename.check_suffix !Options.input_file ".srx" then time ~msg:"Loading file" @@ -173,5 +126,3 @@ let _ = Logger.print Format.err_formatter "BACKTRACE: %s@\n@?" (Printexc.get_backtrace()); Logger.print Format.err_formatter "FATAL ERROR: %s@\n@?" (Printexc.to_string e); exit 2 - - diff --git a/src/runtime.ml b/src/runtime.ml index 46e1b01..0ba08a2 100644 --- a/src/runtime.ml +++ b/src/runtime.ml @@ -8,7 +8,6 @@ module type S = sig type result_set val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set - val grammar_run : Ata.t -> Grammar2.t -> unit -> result_set val naive_top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val twopass_top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set end @@ -150,7 +149,7 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = let cache_apply cache auto tlist s1 s2 = let f = gen_code auto tlist s1 s2 in - LOG(__ "grammar" 2 "Inserting: %i, %a, %a\n%!" + LOG(__ "top-down-run" 2 "Inserting: %i, %a, %a\n%!" (Uid.to_int tlist.Translist.Node.id) StateSet.print s1 StateSet.print s2); if not !Options.no_cache then add cache tlist s1 s2 f; f @@ -397,245 +396,6 @@ DEFINE BOTTOM_UP_NEXT(node, rest, stop) = slot.(StateSet.min_elt auto.topdown_marking_states) -let get_trans g auto tag states = - StateSet.fold (fun q tr_acc -> - List.fold_left - (fun ((lstates, rstates, tacc) as acc) (ts, trs) -> - if TagSet.mem (Tag.translate tag) ts then - if not (TagSet.mem Tag.attribute ts) && Grammar2.is_attribute g tag - then acc - else - let _, _, _, phi = Transition.node trs in - let l, r = Formula.st phi in - (StateSet.union l lstates, - StateSet.union r rstates, - Translist.cons trs tacc) - else acc) - tr_acc (Hashtbl.find auto.trans q) - ) states (StateSet.empty, StateSet.empty, Translist.nil) - -(* Grammar run *) -let dispatch_param0 conf id2 y0 y1 = - match conf with - | Grammar2.C0 | Grammar2.C2 -> Grammar2.Node0 id2 - | Grammar2.C1 | Grammar2.C5 -> Grammar2.Node1(id2,y0) - | Grammar2.C3 | Grammar2.C6 -> y0 - | Grammar2.C4 -> Grammar2.Node2(id2, y0, y1) - -let dispatch_param1 conf id2 y0 y1 = - match conf with - | Grammar2.C2 -> y0 - | Grammar2.C3 -> Grammar2.Node0 id2 - | Grammar2.C5 -> y1 - | Grammar2.C6 -> Grammar2.Node1(id2, y1) - | _ -> Grammar2.dummy_param - - module K_down = struct - type t = Grammar2.n_symbol * StateSet.t - let hash (x,y) = HASHINT2(Node.to_int x, Uid.to_int y.StateSet.Node.id) - let equal (x1,y1) (x2,y2) = x1 == x2 && y1 == y2 - end - - module K_up = struct - type t = Grammar2.n_symbol * StateSet.t * StateSet.t * StateSet.t - let hash (a,b,c,d) = - HASHINT4 (Node.to_int a, - Uid.to_int b.StateSet.Node.id, - Uid.to_int c.StateSet.Node.id, - Uid.to_int d.StateSet.Node.id) - let equal (a1, b1, c1, d1) (a2, b2, c2, d2) = - a1 == a2 && b1 == b2 && c1 == c2 && d1 == d2 - end - - module DCache = - struct - include Hashtbl.Make(K_down) - let dummy = StateSet.singleton State.dummy - let notfound l = l.(0) == dummy && l.(1) == dummy - let find h k = - try - find h k - with - Not_found -> - let a = [| dummy; dummy |] in - add h k a; - a - end - module UCache = Hashtbl.Make(K_up) - type result = { - in0 : StateSet.t; - in1 : StateSet.t; - out0 : StateSet.t * U.t; - out1 : StateSet.t * U.t; - main : StateSet.t * U.t - } - let mk_empty e = - { in0 = StateSet.empty; - in1 = StateSet.empty; - out0 = e; - out1 = e; - main = e - } - let mk_nil s v = - { - mk_empty (s,v) with - out0 = StateSet.empty,v; - out1 = StateSet.empty,v; - } - - let grammar_run auto g () = - let dummy_leaf = Grammar2.dummy_param in - let dummy_set = StateSet.singleton State.dummy in - let res_len = (StateSet.max_elt auto.states) + 1 in - let empty_slot = Array.create res_len U.NS.empty in - let nil_res = mk_nil auto.bottom_states empty_slot in - let cache3 = L3JIT.create () in - let dummy2 = (StateSet.empty, StateSet.empty, Translist.nil) in - let cache2 = Cache.Lvl2.create 512 dummy2 in - let rule_counter = ref 0 in - let preorder_counter = ref 0 in - let term_array = [| StateSet.empty; StateSet.empty |] in - let get_trans tag states = - let c = Cache.Lvl2.find cache2 tag (Uid.to_int states.StateSet.Node.id) in - if c == dummy2 then - let c = get_trans g auto tag states in - begin - Cache.Lvl2.add cache2 tag (Uid.to_int states.StateSet.Node.id) c; - c - end - else c - in - let lambda = ref 0 in - let rec start_loop idx states = - LOG(__ "grammar" 2 "Node %i\n%!" (Node.to_int idx)); - if states == dummy_set then nil_res else - if idx < Node.null then nil_res - else begin - let symbol = Grammar2.start_tag g idx in - let fc = Grammar2.start_first_child g idx in - let ns = Grammar2.start_next_sibling g fc in - if Grammar2.is_terminal g symbol then - let t = Grammar2.terminal symbol in - terminal_loop t states (Grammar2.Leaf (~-1,0,term_array, fc)) (Grammar2.Leaf (~-1,1,term_array, ns)) - else - let nt = Grammar2.non_terminal symbol in - incr lambda; - let lmbd = !lambda in - let y0 = (Grammar2.Leaf (lmbd,0, term_array, fc)) - and y1 = (Grammar2.Leaf (lmbd,1, term_array, ns)) in - rule_loop nt states y0 y1 - end - and rule_loop (t : Grammar2.n_symbol) states y0 y1 = - if t = Node.nil || states == dummy_set then nil_res else - let () = incr rule_counter in - if !rule_counter land 65535 == 0 then begin Gc.minor() end; -(* let k = (t, states) in*) -(* let pstates = DCache.find dcache k in - let notfound = DCache.notfound pstates in *) - let rhs = Grammar2.get_rule g t in - let id1 = Grammar2.get_id1 rhs in - let id2 = Grammar2.get_id2 rhs in - let conf = Grammar2.get_conf rhs in -(* if notfound then*) - let ny0 = dispatch_param0 conf id2 y0 y1 in - let ny1 = dispatch_param1 conf id2 y0 y1 in - let res = dispatch_loop id1 states ny0 ny1 in -(* pstates.(0) <- res.in0; - pstates.(1) <- res.in1; *) - res (* - UCache.add ucache (t, states, fst res.out0, fst res.out1) - res.main; - let h = Hashtbl.create 7 in - for i = 0 to res_len - 1 do - Hashtbl.add h (0, i) (snd res.out0).(i); - Hashtbl.add h (1, i) (snd res.out1).(i); - done; - { res with - main = ((fst res.main), (U.close h (snd res.main))); - } *) -(* - else - let res0 = partial_loop y0 pstates.(0) in - let res1 = partial_loop y1 pstates.(1) in - let k2 = (t, states, fst res0.main, fst res1.main) in - let s, r = - try - UCache.find ucache k2 - with - Not_found -> - let ores0 = { res0 with main = fst res0.main, U.var 0 (snd res0.main) } - and ores1 = { res1 with main = fst res1.main, U.var 1 (snd res1.main) } - in - let res = dispatch_loop id1 states (Grammar2.Cache (0,ores0)) (Grammar2.Cache (1, ores1)) in - UCache.add ucache k2 res.main; - res.main - in - let h = Hashtbl.create 7 in - for i = 0 to res_len - 1 do - Hashtbl.add h (0, i) (snd res0.main).(i); - Hashtbl.add h (1, i) (snd res1.main).(i); - done; - { in0 = pstates.(0); - in1 = pstates.(1); - out0 = res0.main; - out1 = res1.main; - main = s, U.close h r; - } -*) - and dispatch_loop id1 states ny0 ny1 = - if Grammar2.is_non_terminal g id1 then - rule_loop (Grammar2.non_terminal id1) states ny0 ny1 - else - terminal_loop (Grammar2.terminal id1) states ny0 ny1 - - and terminal_loop (symbol : Grammar2.t_symbol) states y0 y1 = - - if symbol == Grammar2.nil_symbol || symbol = Node.nil || states == dummy_set then nil_res else begin - let tag = Grammar2.tag symbol in - let lst, rst, trans = get_trans tag states in - let res0 = partial_loop y0 lst in - let res1 = partial_loop y1 rst in - let s1, slot1 = res0.main - and s2, slot2 = res1.main in - let opcode = L3JIT.find cache3 trans s1 s2 in - let node = Node.of_int !preorder_counter in - incr preorder_counter; - let res = - if opcode == L3JIT.dummy then - (L3JIT.cache_apply cache3 auto trans s1 s2) empty_slot slot1 slot2 (Obj.magic ()) node - else - opcode empty_slot slot1 slot2 (Obj.magic()) (node) - in - { in0 = lst; - in1 = rst; - out0 = res0.main; - out1 = res1.main; - main = res } - end - - and partial_loop l states = - if l == dummy_leaf then nil_res else - match l with - | Grammar2.Cache (_, r) -> r - | Grammar2.Leaf (_,_, _, id) -> start_loop id states - | Grammar2.Node0 id -> - if (Grammar2.terminal id) == Grammar2.nil_symbol then nil_res - else - rule_loop (Grammar2.non_terminal id) states dummy_leaf dummy_leaf - - | Grammar2.Node1 (id, y0) -> - rule_loop (Grammar2.non_terminal id) states y0 dummy_leaf - | Grammar2.Node2 (id, y0, y1) -> - if Grammar2.is_terminal g id then - terminal_loop (Grammar2.terminal id) states y0 y1 - else - rule_loop (Grammar2.non_terminal id) states y0 y1 - in - - let (_, slot) = (start_loop (Node.null) auto.init).main in - slot.(StateSet.min_elt auto.topdown_marking_states) - ;; - (* Slow reference top-down implementation *) let naive_top_down auto tree root states ctx = diff --git a/src/runtime.mli b/src/runtime.mli index c499017..39278e6 100644 --- a/src/runtime.mli +++ b/src/runtime.mli @@ -2,7 +2,6 @@ module type S = sig type result_set val top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val bottom_up_run : Ata.t -> Tree.t -> Compile.text_query * string -> result_set - val grammar_run : Ata.t -> Grammar2.t -> unit -> result_set val naive_top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set val twopass_top_down_run : Ata.t -> Tree.t -> Tree.node -> result_set end