(***********************************************************************) (* *) (* TAToo *) (* *) (* Kim Nguyen, LRI UMR8623 *) (* Université Paris-Sud & CNRS *) (* *) (* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) (* Recherche Scientifique. All rights reserved. This file is *) (* distributed under the terms of the GNU Lesser General Public *) (* License, with the special exception on linking described in file *) (* ../LICENSE. *) (* *) (***********************************************************************) open Format let default_gc = Gc.get() let tuned_gc = { default_gc with Gc.minor_heap_size = 32*1024*1024; Gc.major_heap_increment = 8*1024*1024; Gc.max_overhead = 1000000; Gc.space_overhead = 100; } let time f arg msg = let t1 = Unix.gettimeofday () in let r = f arg in let t2 = Unix.gettimeofday () in let time = (t2 -. t1) *. 1000. in Logger.msg `STATS "%s: %fms" msg time; r let compose_parallel run auto_list tree nodes () = match auto_list with [ auto ] -> [run auto tree nodes] | _ -> assert false let compose_sequential run auto_list tree nodes () = [ List.fold_left (fun acc auto -> run auto tree acc) nodes auto_list ] let restart_parallel run auto_list tree nodes () = match auto_list with [ auto ] -> List.map snd (run auto tree nodes) | _ -> assert false let restart_sequential run auto_list tree nodes () = List.map (fun auto -> run auto tree nodes) auto_list let main () = let () = Options.parse () in let tree_model = List.assoc !Options.tree_model Options.supported_models in let module T = (val tree_model) in let module Runtime = Run.Make(T) in let doc = let fd, close_fd = match !Options.input_file with None | Some "-" | Some "/dev/stdin" -> stdin, ignore | Some input -> let fd = open_in input in fd, fun () -> close_in fd in let d = time Runtime.Tree.load_xml_file fd "parsing xml document" in close_fd (); d in let () = Gc.full_major(); Gc.compact(); Gc.set (tuned_gc) in let queries = time (fun l -> List.map (fun q -> Xpath.Parser.parse (Ulexing.from_utf8_string q)) l) !Options.queries "parsing XPath queries" in (* parallel, compose -> action true, true -> Ata.concat of all automata and single run true, false -> Ata.merge of all automata and single run false, true -> Eval first, then run on results then ... false, false -> Eval first on root, then second on root then ... *) let auto_list = time (fun l -> List.map (fun query -> Xpath.Compile.path query) l) queries "compiling XPath queries" in let auto_list = if !Options.parallel then match auto_list with fst :: rest -> let f = if !Options.compose then Ata.concat else Ata.merge in let big_auto = List.fold_left f fst rest in [big_auto] | _ -> assert false else auto_list in let output = match !Options.output_file with | None | Some "-" | Some "/dev/stdout" -> stdout | Some f -> open_out f in if !Options.stats then begin List.iter (fun query -> Logger.msg `STATS "Query: %a " Xpath.Ast.print_path query) queries; List.iter (fun auto -> Logger.msg `STATS "@[Automaton: @\n%a@]" Ata.print auto) auto_list; end; let result_list = let root = Runtime.ResultSet.create () in let () = Runtime.ResultSet.add (Runtime.Tree.root doc) root in let f, msg = match !Options.parallel, !Options.compose with true, true -> compose_parallel Runtime.eval auto_list doc root, "parallel/compose" | true, false -> restart_parallel Runtime.full_eval auto_list doc root, "parallel/restart" | false, true -> compose_sequential Runtime.eval auto_list doc root , "sequential/compose" | false, false -> restart_sequential Runtime.eval auto_list doc root, "sequential/restart" in time f () ("evaluating query in " ^ msg ^ " mode") in let s = Runtime.stats () in Run.( Logger.msg `STATS "@[tree size: %d@\ntraversals: %d@\ntransition fetch cache miss ratio: %f@\ntransition eval cache miss ratio: %f@\nNumber of visited nodes per pass: %a@]" s.tree_size s.pass (float s.fetch_trans_cache_miss /. float s.fetch_trans_cache_access) (float s.eval_trans_cache_miss /. float s.eval_trans_cache_access) (let i = ref 0 in Pretty.print_list ~sep:"," (fun fmt n -> Format.fprintf fmt "%i: %i" !i n;incr i)) s.nodes_per_run); time (fun () -> let count = ref 1 in List.iter (fun results -> output_string output "\n"; if !Options.count then begin output_string output (string_of_int (Runtime.ResultSet.length results)); output_char output '\n'; end else Runtime.ResultSet.iter (fun n -> Runtime.Tree.print_xml output doc n; output_char output '\n' ) results; output_string output "\n"; incr count ) result_list; flush output; if output != stdout then close_out output ) () "serializing results" let () = try main () with Arg.Bad msg -> eprintf "Error: %s\n%!" msg; Options.usage (); exit 1 | Sys_error msg -> eprintf "Error: %s\n%!" msg; exit 2 | Tree.Parse_error msg -> eprintf "Error: %s, %s\n%!" (match !Options.input_file with Some s -> ("file " ^ s) | None -> "[stdin]") msg; exit 3 | Xpath.Ulexer.Error (s, e, msg) -> eprintf "Error: character %i-%i: %s\n%!" s e msg; exit 4 (* | e -> Printexc.print_backtrace stderr; flush stderr; eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e); exit 128 *)