X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=benchmark%2Fbenchmark.ml;fp=benchmark%2Fbenchmark.ml;h=0000000000000000000000000000000000000000;hb=4b52da1a20a4fe031930bb96d2ca46bec06dc529;hp=c726166e49f04d91b06fdd5e4cfb7d5a12a35bd2;hpb=a223af3254fb51c279cfbccdc18c59484fdca74e;p=SXSI%2Fxpathcomp.git diff --git a/benchmark/benchmark.ml b/benchmark/benchmark.ml deleted file mode 100644 index c726166..0000000 --- a/benchmark/benchmark.ml +++ /dev/null @@ -1,284 +0,0 @@ -module Utils = -struct - type queryset = { documents : (string*int) array; - queries : string array;} - - let make_queryset docs queries = - { documents = Array.of_list (List.map (function f -> (f,(Unix.stat f).Unix.st_size)) docs); - queries = Array.of_list queries - } - - - type stats = { - mutable query : int; (* index in the query set *) - mutable input_document : int; (* index in the query set *) - mutable input_size : int; - mutable print_output : bool; - - mutable input_parsing_time : float; - mutable query_compile_time : float; - mutable query_execution_time : float; - mutable serialization_time : float; - - mutable memory_usage : int; - } - - type stats_token = - | Query of int - | Input_document of int - | Print_output of int - | Input_size of int - | Input_parsing_time of int - | Query_compile_time of int - | Query_execution_time of int - | Serialization_time of int - | Memory_usage of int - - - let print_stats fmt s = - Format.fprintf fmt "\ -Query number : %i -Document number : %i -Document size : %i bytes -Output was serialized : %s -Parsing time : %f ms -Compile time : %f ms -Execution time : %f ms -Serialization time : %f ms -Memory usage : %i kB\n" s.query - s.input_document s.input_size (if s.print_output then "yes" else "no") - s.input_parsing_time s.query_compile_time s.query_execution_time - s.serialization_time (s.memory_usage/1024) - - let empty_stats () = { - query = -1; - input_document = -1; - input_size = -1; - print_output = false; - input_parsing_time = infinity; - query_compile_time = infinity; - query_execution_time = infinity; - serialization_time = infinity; - memory_usage = 0 - } - - type result = (string*string)*(stats array list) - - - let re_vmrss = Str.regexp ".*VmRSS:[^0-9]*\\([0-9]+\\) kB.*" - let re_status = Str.regexp ".*Status:[^A-Z]*\\([A-Z]\\).*" - - let monitor_mem pid = - let zombie = ref false in - let rss = ref 0 in - let in_c = open_in (Printf.sprintf "/proc/%i/status" pid) in - let _ = try - while true do - let s = input_line in_c in - if Str.string_match re_vmrss s 0 - then rss:= int_of_string (Str.matched_group 1 s); - if Str.string_match re_status s 0 - then zombie:= (Str.matched_group 1 s) = "Z" - done; - - with - End_of_file -> close_in in_c - in - (!zombie,!rss*1024) - - - let input_str = ref "" - - let get_res i = Str.matched_group i !input_str - - let rec match_out stats = function [] -> [] - | (s,l)::r -> - if Str.string_match s !input_str 0 - then let () = - List.iter - (function Query i -> stats.query <- int_of_string (get_res i) - | Input_document i -> stats.input_document <- int_of_string (get_res i) - | Input_size i -> stats.input_size <- int_of_string (get_res i) - | Print_output _ -> stats.print_output <- false; - | Input_parsing_time i -> stats.input_parsing_time <- float_of_string (get_res i) - | Query_compile_time i -> stats.query_compile_time <- float_of_string (get_res i) - | Query_execution_time i -> stats.query_execution_time <- float_of_string (get_res i) - | Serialization_time i -> stats.serialization_time <- float_of_string (get_res i) - | Memory_usage i -> stats.memory_usage <- int_of_string (get_res i)) l - in r - else (s,l)::(match_out stats r) - - let parse_result pid in_c stats l = - let descr = Unix.descr_of_in_channel in_c in - let l = ref (List.map (fun (s,f) -> (Str.regexp s,f)) l) in - try - while !l <> [] && (0 == (fst (Unix.waitpid [ Unix.WNOHANG ] pid))) do - let _ = - match Unix.select [descr] [] [] 0.75 with - | [d ],_,_ -> - input_str := input_line in_c; - l := match_out stats !l; - | _ -> () - in - if pid != 0 then - let z,rss = monitor_mem pid in - if rss > stats.memory_usage - then stats.memory_usage <- rss; - if z then raise End_of_file - done - with - | End_of_file | Unix.Unix_error(Unix.ECHILD,_,_) -> () - | e -> raise e - - let build_xquery document query file print_output = - let oc = open_out file in - output_string oc "let $doc := doc(\""; - output_string oc document; - output_string oc "\") for $i in $doc"; - output_string oc query; - output_string oc "\n let $a := "; - if print_output - then output_string oc "$i" - else output_string oc "()"; - output_string oc "\n let $b := 1+1"; - output_string oc "\n return $a\n"; - close_out oc - - (* usually xslt processor take the filename - as argument on the command line *) - let build_xslt _ query file print_output = - let oc = open_out file in - output_string oc " - - - - - - - - -\n"; - if print_output - then output_string oc "\n"; - output_string oc " -\n"; - close_out oc -;; - - -end - -(* Signatures to build the test suite *) -module type CONFIGURATION = sig - val path : string - val result_basename : string - val num_runs : int - val run_without_output : bool - val run_with_output : bool -end - - -module type ENGINE = sig - val name : string - val description : string - val command : string - val mk_queryfile : bool -> (string -> string -> string -> unit) - val mk_cmdline : bool -> string -> string -> string -> string -> string list - val time_factor : float - val reference : bool - val parse_rules : (string * Utils.stats_token list) list -end - -module type ENGINE_TESTER = -sig - module Conf : CONFIGURATION - val test_engine : Utils.result list -> Utils.queryset -> Utils.result list -end -module INIT_TESTER (C : CONFIGURATION) : ENGINE_TESTER = -struct - module Conf = C - let test_engine l _ = l -end - -module MK (E: ENGINE) (C : ENGINE_TESTER) : ENGINE_TESTER = -struct - module Conf = C.Conf - open Utils - - let run_query mem print_output qnum docnum qset = - let docname = fst qset.documents.(docnum) - in - let query = qset.queries.(qnum) in - let stats = empty_stats () in - let qfile = - Printf.sprintf "%s_%s_Q%.2d_D%.2d.query" - Conf.result_basename E.name qnum docnum - in - let _ = (E.mk_queryfile print_output) - docname query qfile - in - let qoutput = Printf.sprintf "%s_%s_Q%.2d_D%.2d.output" - Conf.result_basename E.name qnum docnum - in - let cmdline = Array.of_list - (E.command :: (E.mk_cmdline print_output qoutput qfile docname query)) - in - let in_fd,out_fd = Unix.pipe () in - let in_c = Unix.in_channel_of_descr in_fd in - let pid = Unix.create_process E.command cmdline - Unix.stdin out_fd out_fd - in - parse_result (if mem then pid else 0) in_c stats E.parse_rules; - (try - ignore(Unix.waitpid [] pid) - with - _ -> ()); - Unix.close in_fd; - Unix.close out_fd; - Some stats - - let extract = function Some a -> a | _ -> failwith "extract" - let vb = function false -> 0 | _ -> 1 - let test_engine res qset = - let header = (E.name, E.description) in - let _ = Printf.printf "Testing engine %s\n%!" E.name in - let lres = ref [] in - let _ = Array.iteri - (fun qnum _ -> - Array.iteri ( fun docnum (_,docsize) -> - let sres = [| (empty_stats()); (empty_stats()) |] in - Printf.printf "Running query %.2d on document %.2d :\n%!" qnum docnum; - - for k = 0 to 1 do - for j = (vb (not Conf.run_without_output)) to (vb Conf.run_with_output) do - if j = 0 then Printf.printf "No output : \t%!" - else Printf.printf "With output : \t%!"; - for i = 1 to Conf.num_runs do - let s = extract(run_query (k==1) (j==1) qnum docnum qset) in - sres.(j).query_execution_time <- s.query_execution_time; - sres.(j).query_compile_time <- s.query_compile_time; - if j == 1 then sres.(j).serialization_time <- s.serialization_time; - if k == 1 then sres.(j).memory_usage <- s.memory_usage; - Printf.printf "pass %i ... %!" i; - done; - Printf.printf "Ok\n%!"; - done; - if (k == 0) - then Printf.printf "Monitoring memory use: ... \n%!"; - done; - let _ = Array.iteri (fun i s -> s.print_output <- (i==1); - s.query <- qnum; - s.input_document <- docnum; - s.input_size <- docsize) sres - in lres := sres::!lres - ) qset.documents - )qset.queries - in - C.test_engine ((header,!lres)::res) qset - - - -end