-let index_empty_texts = ref false
+open Utils
+open Format
+
+let index_empty_texts = ref true
let sample_factor = ref 64
let disable_text_collection = ref false
let tc_threshold = ref 60000
let time = ref false
let bottom_up = ref false
let no_jump = ref false
+let no_cache = ref false
let verbose = ref false
let text_index_type = ref 0
+let do_perf = ref false
+let twopass = ref false
+let repeat = ref 1
+let docstats = ref false
let set_index_type = function
| "default" -> text_index_type := 0
| "rlcsa" -> text_index_type := 2
| s -> raise (Arg.Bad(s))
-let usage_msg = Printf.sprintf "%s <input.{xml|srx}> 'query' [output]" Sys.argv.(0)
-
+let usage_msg = Printf.sprintf "%s [options] <input.{xml|srx}> 'query' [output]" Sys.argv.(0)
let pos = ref 0
let anon_fun =
| 2 -> output_file := Some s; incr pos
| _ -> raise (Arg.Bad(s))
-let spec = [ "-c", Arg.Set(count_only), "counting only (don't materialize the result set)";
- "-t", Arg.Set(time), "print timing statistics";
- "-max-tc", Arg.Set_int(tc_threshold), "set maximum count for which the TextCollection is used";
- "-f", Arg.Set_int(sample_factor), "sample factor [default=64]";
- "-i", Arg.Set(index_empty_texts), "index empty texts [default=false]";
- "-d", Arg.Set(disable_text_collection), "disable text collection[default=false]";
- "-s", Arg.Set_string(save_file), "save the intermediate representation into file.srx";
- "-b", Arg.Set(bottom_up), "real bottom up run";
- "-nj", Arg.Set(no_jump), "disable jumping";
- "-index-type", Arg.Symbol ([ "default"; "swcsa"; "rlcsa" ], set_index_type),
- "choose text index type";
- "-v", Arg.Set(verbose), "verbose mode";
- ]
+let set_logger s =
+ List.iter (fun t ->
+ if t = "" then ()
+ else
+ match String.explode t ':' with
+ [ tr; lvl ] ->
+ let l = try int_of_string lvl with _ -> raise (Arg.Bad (lvl)) in
+ if Logger.is_logger tr then Logger.activate tr l
+ else raise (Arg.Bad (t))
+ | _ -> raise (Arg.Bad (t))
+ ) (String.explode s ',')
+
+let pretty_loggers () =
+ ignore(flush_str_formatter());
+ Pretty.print_list
+ ~sep:", "
+ (fun f s -> fprintf f "%s" s)
+ str_formatter
+ (Logger.available ());
+ flush_str_formatter ()
+
+let spec = Arg.align
+ [ "-c", Arg.Set(count_only),
+ " counting only (don't materialize the result set)";
+
+ "-two", Arg.Set(twopass),
+ " Use twopass algorithm";
+
+ "-f", Arg.Set_int(sample_factor),
+ "<n> sample factor [default=64]";
+
+ "-ne", Arg.Clear(index_empty_texts),
+ " don't index empty texts [default=index]";
+
+ "-d", Arg.Set(disable_text_collection),
+ " disable text collection[default=false]";
+
+ "-s", Arg.Set_string(save_file),
+ "<save_file> save the intermediate representation into file.srx";
+
+ "-b", Arg.Set(bottom_up), " real bottom up run";
+
+ "-nj", Arg.Set(no_jump), " disable jumping";
+
+ "-nc", Arg.Set(no_cache), " disable caching";
+
+
+ "-p", Arg.Set(do_perf), " dump perf counters (Linux only)";
+
+ "-index-type", Arg.Symbol ([ "default"; "swcsa"; "rlcsa" ],
+ set_index_type),
+ " choose text index type";
+
+ "-r", Arg.Set_int(repeat),
+ " repeat query execution n time (benchmarking only, default 1)";
+
+ "-doc-stats", Arg.Set(docstats),
+ " Compute document statistics (performs full traversal)";
+
+
+ "-v", Arg.Set(verbose), " verbose mode"; ] @
+IFNDEF NTRACE
+THEN [
+ "-log", Arg.String (set_logger),
+ "<logger1:l1,...,loggern:ln> enable logging with the specified level. Valid loggers are: "
+ ^ (pretty_loggers ())
+ ]
+ELSE []
+END
+
let parse_cmdline() =
let _ = Arg.parse spec anon_fun usage_msg
in
- if (!pos > 3 || !pos < 2)
- then begin Arg.usage spec usage_msg; exit 1 end
+ if (!pos > 3 || !pos < 2)
+ then begin Arg.usage spec usage_msg; exit 1 end