X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=blobdiff_plain;f=src%2Ftatoo.ml;h=5e8144da55a55d75ed2f76c3622858c5100a9932;hp=ca79411bc66358b451ec7012088d2e67446c999b;hb=84751fead39221a8e01d20a4692faf0b63a7c996;hpb=969febf12344a3fe3bf793a323b2e88f7b20ebae diff --git a/src/tatoo.ml b/src/tatoo.ml index ca79411..5e8144d 100644 --- a/src/tatoo.ml +++ b/src/tatoo.ml @@ -13,11 +13,14 @@ (* *) (***********************************************************************) -(* - Time-stamp: -*) - open Format +let default_gc = Gc.get() +let tuned_gc = { default_gc with + Gc.minor_heap_size = 32*1024*1024; + Gc.major_heap_increment = 8*1024*1024; + Gc.max_overhead = 1000000; + Gc.space_overhead = 100; +} let time f arg msg = let t1 = Unix.gettimeofday () in @@ -28,21 +31,85 @@ let time f arg msg = r +let compose_parallel run auto_list tree nodes () = + match auto_list with + [ auto ] -> [run auto tree nodes] + | _ -> assert false + +let compose_sequential run auto_list tree nodes () = + [ List.fold_left (fun acc auto -> + run auto tree acc) nodes auto_list ] + + +let restart_parallel run auto_list tree nodes () = + match auto_list with + [ auto ] -> List.map snd (run auto tree nodes) + | _ -> assert false + +let restart_sequential run auto_list tree nodes () = + List.map (fun auto -> run auto tree nodes) auto_list + let main () = let () = Options.parse () in + let tree_model = List.assoc !Options.tree_model + Options.supported_models + in + let module T = (val tree_model) in + let module Runtime = Run.Make(T) + in + let doc = - let fd = open_in !Options.input_file in - let d = time Naive_tree.load_xml_file fd "parsing xml document" in - close_in fd; d + let fd, close_fd = match !Options.input_file with + None | Some "-" | Some "/dev/stdin" -> stdin, ignore + | Some input -> + let fd = open_in input in fd, fun () -> close_in fd + in + let d = time Runtime.Tree.load_xml_file fd "parsing xml document" in + close_fd (); d in - let query = + let () = + Gc.full_major(); + Gc.compact(); + Gc.set (tuned_gc) + in + let queries = time - Xpath.Parser.parse - (Ulexing.from_latin1_string !Options.query) - "parsing XPath query" + (fun l -> + List.map (fun q -> + Xpath.Parser.parse + (Ulexing.from_utf8_string q)) l) + !Options.queries + "parsing XPath queries" in - let auto = - time Xpath.Compile.path query "compiling XPath query" + (* parallel, compose -> action + true, true -> Ata.concat of all automata and single run + true, false -> Ata.merge of all automata and single run + false, true -> Eval first, then run on results then ... + false, false -> Eval first on root, then second on root then ... + *) + let auto_list = + time + (fun l -> + List.map (fun query -> Xpath.Compile.path query) l) + queries + "compiling XPath queries" + in + let auto_list = + if !Options.parallel then + match auto_list with + fst :: rest -> + let f = + if !Options.compose then + Ata.concat + else + Ata.merge + in + let big_auto = List.fold_left f fst rest in + [big_auto] + | _ -> assert false + + else + auto_list in let output = match !Options.output_file with @@ -50,29 +117,59 @@ let main () = | Some f -> open_out f in if !Options.stats then begin - Logger.msg `STATS "Query: %a " Xpath.Ast.print_path query; - Logger.msg `STATS "@[Automaton: @\n%a@]" Ata.print auto; + List.iter (fun query -> + Logger.msg `STATS "Query: %a " Xpath.Ast.print_path query) queries; + List.iter (fun auto -> + Logger.msg `STATS "@[Automaton: @\n%a@]" Ata.print auto) auto_list; end; - let module Naive = Eval.Make(Naive_tree) in - let results = - time (Naive.eval auto doc) (Naive_tree.root doc) "evaluating query" + let result_list = + let root = Runtime.ResultSet.create () in + let () = Runtime.ResultSet.add (Runtime.Tree.root doc) root in + let f, msg = + match !Options.parallel, !Options.compose with + true, true -> + compose_parallel Runtime.eval auto_list doc root, "parallel/compose" + | true, false -> + restart_parallel Runtime.full_eval auto_list doc root, "parallel/restart" + | false, true -> + compose_sequential Runtime.eval auto_list doc root , "sequential/compose" + | false, false -> + restart_sequential Runtime.eval auto_list doc root, "sequential/restart" + in + time f () ("evaluating query in " ^ msg ^ " mode") in + let s = Runtime.stats () in + Run.( + Logger.msg `STATS + "@[tree size: %d@\ntraversals: %d@\ntransition fetch cache miss ratio: %f@\ntransition eval cache miss ratio: %f@\nNumber of visited nodes per pass: %a@]" + s.tree_size s.pass + (float s.fetch_trans_cache_miss /. float s.fetch_trans_cache_access) + (float s.eval_trans_cache_miss /. float s.eval_trans_cache_access) + (let i = ref 0 in + Pretty.print_list ~sep:"," (fun fmt n -> Format.fprintf fmt "%i: %i" !i n;incr i)) + s.nodes_per_run); time (fun () -> - output_string output "\n"; - if !Options.count then begin - output_string output (string_of_int (List.length results)); - output_char output '\n'; - end else - List.iter (fun n -> - Naive_tree.print_xml output doc n; - output_char output '\n' - ) results; - output_string output "\n"; - flush output; - if output != stdout then close_out output - -) () "serializing results" + let count = ref 1 in + List.iter (fun results -> + output_string output "\n"; + if !Options.count then begin + output_string output (string_of_int (Runtime.ResultSet.length results)); + output_char output '\n'; + end else + Runtime.ResultSet.iter (fun n -> + Runtime.Tree.print_xml output doc n; + output_char output '\n' + ) results; + output_string output "\n"; + incr count + ) result_list; + flush output; + if output != stdout then close_out output + + ) () "serializing results" let () = @@ -81,6 +178,12 @@ let () = with Arg.Bad msg -> eprintf "Error: %s\n%!" msg; Options.usage (); exit 1 | Sys_error msg -> eprintf "Error: %s\n%!" msg; exit 2 - | Tree.Parse_error msg -> eprintf "Error: file %s, %s\n%!" !Options.input_file msg; exit 3 + | Tree.Parse_error msg -> + eprintf "Error: %s, %s\n%!" + (match !Options.input_file with + Some s -> ("file " ^ s) + | None -> "[stdin]") msg; exit 3 | Xpath.Ulexer.Error (s, e, msg) -> eprintf "Error: character %i-%i: %s\n%!" s e msg; exit 4 - | e -> eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e); exit 128 +(* | e -> Printexc.print_backtrace stderr; + flush stderr; + eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e); exit 128 *)