From 5b5dcd45cf86701ccfe917c1d6ad73b83bb523c3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Mon, 22 Apr 2013 17:27:59 +0200 Subject: [PATCH] Implement command line options, clean-up screen output. --- src/ata.ml | 24 ++++++-------- src/eval.ml | 4 +-- src/naive_tree.ml | 34 +++++++++++++------- src/options.ml | 39 +++++++++++++++++++++++ src/tatoo.ml | 78 ++++++++++++++++++++++++++++++--------------- src/tree.ml | 5 +-- src/xpath/parser.ml | 8 +++-- 7 files changed, 135 insertions(+), 57 deletions(-) create mode 100644 src/options.ml diff --git a/src/ata.ml b/src/ata.ml index d6aeaec..6a3570c 100644 --- a/src/ata.ml +++ b/src/ata.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) INCLUDE "utils.ml" @@ -256,12 +256,12 @@ let create s ss = let n2 = ref 0 in Cache.N2.iteri (fun _ _ _ b -> if b then incr n2) auto.cache2; Cache.N4.iteri (fun _ _ _ _ _ b -> if b then incr n4) auto.cache4; - Format.eprintf "INFO: automaton %i, cache2: %i entries, cache6: %i entries\n%!" + Format.eprintf "STATS: automaton %i, cache2: %i entries, cache6: %i entries\n%!" (auto.id :> int) !n2 !n4; let c2l, c2u = Cache.N2.stats auto.cache2 in let c4l, c4u = Cache.N4.stats auto.cache4 in - Format.eprintf "INFO: cache2: length: %i, used: %i, occupation: %f\n%!" c2l c2u (float c2u /. float c2l); - Format.eprintf "INFO: cache4: length: %i, used: %i, occupation: %f\n%!" c4l c4u (float c4u /. float c4l) + Format.eprintf "STATS: cache2: length: %i, used: %i, occupation: %f\n%!" c2l c2u (float c2u /. float c2l); + Format.eprintf "STATS: cache4: length: %i, used: %i, occupation: %f\n%!" c4l c4u (float c4u /. float c4l) ); auto @@ -485,7 +485,7 @@ let _flush_str_fmt () = pp_print_flush _str_fmt (); let print fmt a = fprintf fmt - "\nInternal UID: %i@\n\ + "Internal UID: %i@\n\ States: %a@\n\ Selection states: %a@\n\ Alternating transitions:@\n" @@ -514,14 +514,15 @@ let print fmt a = in let line = Pretty.line (max_all + max_pre + 6) in let prev_q = ref State.dummy in + fprintf fmt "%s@\n" line; List.iter (fun (q, s1, s2, s3) -> - if !prev_q != q && !prev_q != State.dummy then fprintf fmt " %s\n%!" line; + if !prev_q != q && !prev_q != State.dummy then fprintf fmt "%s@\n" line; prev_q := q; - fprintf fmt " %s, %s" s1 s2; + fprintf fmt "%s, %s" s1 s2; fprintf fmt "%s" (Pretty.padding (max_pre - Pretty.length s1 - Pretty.length s2)); - fprintf fmt " %s %s@\n%!" Pretty.right_arrow s3; + fprintf fmt " %s %s@\n" Pretty.right_arrow s3; ) strs_strings; - fprintf fmt " %s\n%!" line + fprintf fmt "%s@\n" line (* [complete transitions a] ensures that for each state q @@ -556,7 +557,6 @@ let cleanup_states a = in StateSet.iter loop a.selection_states; let unused = StateSet.diff a.states !memo in - eprintf "Unused states %a\n%!" StateSet.print unused; StateSet.iter (fun q -> Hashtbl.remove a.transitions q) unused; a.states <- !memo @@ -566,10 +566,6 @@ let cleanup_states a = *) let normalize_negations auto = - eprintf "Automaton before normalize_trans:\n"; - print err_formatter auto; - eprintf "--------------------\n%!"; - let memo_state = Hashtbl.create 17 in let todo = Queue.create () in let rec flip b f = diff --git a/src/eval.ml b/src/eval.ml index e8e1d5a..1b58617 100644 --- a/src/eval.ml +++ b/src/eval.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) INCLUDE "utils.ml" @@ -138,7 +138,7 @@ END redo := top_down_run auto tree node cache !iter; incr iter; done; - at_exit (fun () -> eprintf "INFO: %i iterations\n" !iter); + at_exit (fun () -> eprintf "@[STATS: %i iterations@]@." !iter); let r = get_results auto tree node cache in TRACE(Html.gen_trace (module T : Tree.S with type t = T.t) (tree)); r diff --git a/src/naive_tree.ml b/src/naive_tree.ml index b7c0be6..65ef334 100644 --- a/src/naive_tree.ml +++ b/src/naive_tree.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) type node = { @@ -208,20 +208,29 @@ struct let node = top ctx in node.next_sibling <- nil; consume_closing ctx node; - match ctx.stack with - [ root ] -> - root.next_sibling <- nil; - { root = root; - size = ctx.current_preorder - } - | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN) + Expat.final psr; + let root = List.hd ctx.stack in + root.next_sibling <- nil; + { root = root; + size = ctx.current_preorder + } ) + let error e parser_ = + let msg = Printf.sprintf "%i.%i %s" + (Expat.get_current_line_number parser_) + (Expat.get_current_column_number parser_) + (Expat.xml_error_to_string e) + in + raise (Tree.Parse_error msg) let parse_string s = let parser_, finalize = create_parser () in - Expat.parse parser_ s; - finalize () + try + Expat.parse parser_ s; + finalize () + with + Expat.Expat_error e -> error e parser_ let parse_file fd = let buffer = String.create 4096 in @@ -231,7 +240,10 @@ struct if read != 0 then let () = Expat.parse_sub parser_ buffer 0 read in loop () - in loop (); finalize () + in try + loop (); finalize () + with + Expat.Expat_error e -> error e parser_ end diff --git a/src/options.ml b/src/options.ml new file mode 100644 index 0000000..142b709 --- /dev/null +++ b/src/options.ml @@ -0,0 +1,39 @@ +open Arg + +let count = ref false +let input_file = ref "" +let output_file : string option ref = ref None +let query = ref "" +let stats = ref false + +let specs = align [ + "-c", Set count, ""; + "--count", Set count, " write the number of results only"; + "-s", Set stats, ""; + "--stats", Set stats, " display timing and various statistics"; +] + +let usage_msg = Printf.sprintf "usage: %s [options] input.xml query [output.xml]" Sys.argv.(0) + +let get_anon, anon_arg = + let args = ref [] in + (fun () -> !args), + (fun s -> args := s::!args) + +let usage () = usage specs usage_msg + +let parse () = + parse specs anon_arg usage_msg; + match List.rev (get_anon ()) with + input :: q :: maybe_output -> + input_file := input; + query := q; + begin + match maybe_output with + [] -> () + | [ output ] -> output_file := Some output + | _ -> raise (Arg.Bad "too many arguments") + end + | [] | [ _ ] -> raise (Arg.Bad "not enough arguments") + + diff --git a/src/tatoo.ml b/src/tatoo.ml index 1b8c17d..6dc66ef 100644 --- a/src/tatoo.ml +++ b/src/tatoo.ml @@ -14,39 +14,65 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) -let doc = - let fd = open_in Sys.argv.(1) in - let d = Naive_tree.load_xml_file fd in - close_in fd; d - - -let query = - let arg2 = Sys.argv.(2) in - Xpath.Parser.parse (Ulexing.from_latin1_string arg2) - -let auto = - Xpath.Compile.path query - open Format -let () = - fprintf err_formatter "Query: %a\n%!" Xpath.Ast.print_path query; - fprintf err_formatter "Automata: %a\n%!" Ata.print auto; - fprintf err_formatter "Evaluating automaton:\n%!"; +let main () = + let () = Options.parse () in + let doc = + let fd = open_in !Options.input_file in + let d = Naive_tree.load_xml_file fd in + close_in fd; d + in + let query = + Xpath.Parser.parse (Ulexing.from_latin1_string !Options.query) + in + let auto = + Xpath.Compile.path query + in + let output = + match !Options.output_file with + | None | Some "-" | Some "/dev/stdout" -> stdout + | Some f -> open_out f + in + if !Options.stats then begin + fprintf err_formatter "@[STATS: Query: %a @]@." Xpath.Ast.print_path query; + fprintf err_formatter "@[STATS: @[Automaton: @\n"; + Ata.print err_formatter auto; + fprintf err_formatter "@]@]@."; + end; let module Naive = Eval.Make(Naive_tree) in let t1 = Unix.gettimeofday() in let results = Naive.eval auto doc (Naive_tree.root doc) in let teval = (Unix.gettimeofday () -. t1) *. 1000. in let t1 = Unix.gettimeofday () in - output_string stdout "\n"; - List.iter (fun n -> - Naive_tree.print_xml stdout doc n; - output_char stdout '\n' - ) results; - output_string stdout "\n"; + output_string output "\n"; + if !Options.count then begin + output_string output (string_of_int (List.length results)); + output_char output '\n'; + end else + List.iter (fun n -> + Naive_tree.print_xml output doc n; + output_char output '\n' + ) results; + output_string output "\n"; let tprint = (Unix.gettimeofday () -. t1) *. 1000. in - flush stdout; - fprintf err_formatter "evaluation time: %fms\nserialization time: %fms\n%!" teval tprint + flush output; + if output != stdout then close_out output; + if !Options.stats then begin + fprintf err_formatter "@[STATS: evaluation time: %fms@]@." teval; + fprintf err_formatter "@[STATS: serialization time: %fms@]@." tprint + end + + +let () = + try + main () + with + Arg.Bad msg -> eprintf "Error: %s\n%!" msg; Options.usage (); exit 1 + | Sys_error msg -> eprintf "Error: %s\n%!" msg; exit 2 + | Tree.Parse_error msg -> eprintf "Error: file %s, %s\n%!" !Options.input_file msg; exit 3 + | Xpath.Ulexer.Error (s, e, msg) -> eprintf "Error: character %i-%i: %s\n%!" s e msg; exit 4 + | e -> eprintf "FATAL ERROR: %s\n%!" (Printexc.to_string e); exit 128 diff --git a/src/tree.ml b/src/tree.ml index 9ef78ce..e06a11e 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) (** The different kind of XML nodes and utility functions *) @@ -41,9 +41,10 @@ module NodeKind = k1 == Node || k2 == Node || k1 == k2 end - (** Signatures for trees *) +exception Parse_error of string + module type S = sig type node diff --git a/src/xpath/parser.ml b/src/xpath/parser.ml index ed5283e..7386577 100644 --- a/src/xpath/parser.ml +++ b/src/xpath/parser.ml @@ -14,11 +14,15 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) include Xpath_internal_parser let parse (l : Ulexing.lexbuf) = - xpath_query (fun _ -> Ulexer.token l) (Lexing.from_string "!!dummy!!") + try + xpath_query (fun _ -> Ulexer.token l) (Lexing.from_string "!!dummy!!") + with + Parsing.Parse_error -> + Ulexer.error (Ulexing.lexeme_start l) (Ulexing.lexeme_end l) "syntax error" -- 2.17.1