module Utils = struct type queryset = { documents : (string*int) array; queries : string array;} let make_queryset docs queries = { documents = Array.of_list (List.map (function f -> (f,(Unix.stat f).Unix.st_size)) docs); queries = Array.of_list queries } type stats = { mutable query : int; (* index in the query set *) mutable input_document : int; (* index in the query set *) mutable input_size : int; mutable print_output : bool; mutable input_parsing_time : float; mutable query_compile_time : float; mutable query_execution_time : float; mutable serialization_time : float; mutable memory_usage : int; } type stats_token = | Query of int | Input_document of int | Print_output of int | Input_size of int | Input_parsing_time of int | Query_compile_time of int | Query_execution_time of int | Serialization_time of int | Memory_usage of int let print_stats fmt s = Format.fprintf fmt "\ Query number : %i Document number : %i Document size : %i bytes Output was serialized : %s Parsing time : %f ms Compile time : %f ms Execution time : %f ms Serialization time : %f ms Memory usage : %i kB\n" s.query s.input_document s.input_size (if s.print_output then "yes" else "no") s.input_parsing_time s.query_compile_time s.query_execution_time s.serialization_time (s.memory_usage/1024) let empty_stats () = { query = -1; input_document = -1; input_size = -1; print_output = false; input_parsing_time = infinity; query_compile_time = infinity; query_execution_time = infinity; serialization_time = infinity; memory_usage = 0 } type result = (string*string)*(stats array list) let re_vmrss = Str.regexp ".*VmRSS:[^0-9]*\\([0-9]+\\) kB.*" let re_status = Str.regexp ".*Status:[^A-Z]*\\([A-Z]\\).*" let monitor_mem pid = let zombie = ref false in let rss = ref 0 in let in_c = open_in (Printf.sprintf "/proc/%i/status" pid) in let _ = try while true do let s = input_line in_c in if Str.string_match re_vmrss s 0 then rss:= int_of_string (Str.matched_group 1 s); if Str.string_match re_status s 0 then zombie:= (Str.matched_group 1 s) = "Z" done; with End_of_file -> close_in in_c in (!zombie,!rss*1024) let input_str = ref "" let get_res i = Str.matched_group i !input_str let rec match_out stats = function [] -> [] | (s,l)::r -> if Str.string_match s !input_str 0 then let () = List.iter (function Query i -> stats.query <- int_of_string (get_res i) | Input_document i -> stats.input_document <- int_of_string (get_res i) | Input_size i -> stats.input_size <- int_of_string (get_res i) | Print_output _ -> stats.print_output <- false; | Input_parsing_time i -> stats.input_parsing_time <- float_of_string (get_res i) | Query_compile_time i -> stats.query_compile_time <- float_of_string (get_res i) | Query_execution_time i -> stats.query_execution_time <- float_of_string (get_res i) | Serialization_time i -> stats.serialization_time <- float_of_string (get_res i) | Memory_usage i -> stats.memory_usage <- int_of_string (get_res i)) l in r else (s,l)::(match_out stats r) let parse_result pid in_c stats l = let descr = Unix.descr_of_in_channel in_c in let l = ref (List.map (fun (s,f) -> (Str.regexp s,f)) l) in try while !l <> [] && (0 == (fst (Unix.waitpid [ Unix.WNOHANG ] pid))) do let _ = match Unix.select [descr] [] [] 0.75 with | [d ],_,_ -> input_str := input_line in_c; l := match_out stats !l; | _ -> () in if pid != 0 then let z,rss = monitor_mem pid in if rss > stats.memory_usage then stats.memory_usage <- rss; if z then raise End_of_file done with | End_of_file | Unix.Unix_error(Unix.ECHILD,_,_) -> () | e -> raise e let build_xquery document query file print_output = let oc = open_out file in output_string oc "let $doc := doc(\""; output_string oc document; output_string oc "\") for $i in $doc"; output_string oc query; output_string oc "\n let $a := "; if print_output then output_string oc "$i" else output_string oc "()"; output_string oc "\n let $b := 1+1"; output_string oc "\n return $a\n"; close_out oc (* usually xslt processor take the filename as argument on the command line *) let build_xslt _ query file print_output = let oc = open_out file in output_string oc " \n"; if print_output then output_string oc "\n"; output_string oc " \n"; close_out oc ;; end (* Signatures to build the test suite *) module type CONFIGURATION = sig val path : string val result_basename : string val num_runs : int val run_without_output : bool val run_with_output : bool end module type ENGINE = sig val name : string val description : string val command : string val mk_queryfile : bool -> (string -> string -> string -> unit) val mk_cmdline : bool -> string -> string -> string -> string -> string list val time_factor : float val reference : bool val parse_rules : (string * Utils.stats_token list) list end module type ENGINE_TESTER = sig module Conf : CONFIGURATION val test_engine : Utils.result list -> Utils.queryset -> Utils.result list end module INIT_TESTER (C : CONFIGURATION) : ENGINE_TESTER = struct module Conf = C let test_engine l _ = l end module MK (E: ENGINE) (C : ENGINE_TESTER) : ENGINE_TESTER = struct module Conf = C.Conf open Utils let run_query mem print_output qnum docnum qset = let docname = fst qset.documents.(docnum) in let query = qset.queries.(qnum) in let stats = empty_stats () in let qfile = Printf.sprintf "%s_%s_Q%.2d_D%.2d.query" Conf.result_basename E.name qnum docnum in let _ = (E.mk_queryfile print_output) docname query qfile in let qoutput = Printf.sprintf "%s_%s_Q%.2d_D%.2d.output" Conf.result_basename E.name qnum docnum in let cmdline = Array.of_list (E.command :: (E.mk_cmdline print_output qoutput qfile docname query)) in let in_fd,out_fd = Unix.pipe () in let in_c = Unix.in_channel_of_descr in_fd in let pid = Unix.create_process E.command cmdline Unix.stdin out_fd out_fd in parse_result (if mem then pid else 0) in_c stats E.parse_rules; (try ignore(Unix.waitpid [] pid) with _ -> ()); Unix.close in_fd; Unix.close out_fd; Some stats let extract = function Some a -> a | _ -> failwith "extract" let vb = function false -> 0 | _ -> 1 let test_engine res qset = let header = (E.name, E.description) in let _ = Printf.printf "Testing engine %s\n%!" E.name in let lres = ref [] in let _ = Array.iteri (fun qnum _ -> Array.iteri ( fun docnum (_,docsize) -> let sres = [| (empty_stats()); (empty_stats()) |] in Printf.printf "Running query %.2d on document %.2d :\n%!" qnum docnum; for k = 0 to 1 do for j = (vb (not Conf.run_without_output)) to (vb Conf.run_with_output) do if j = 0 then Printf.printf "No output : \t%!" else Printf.printf "With output : \t%!"; for i = 1 to Conf.num_runs do let s = extract(run_query (k==1) (j==1) qnum docnum qset) in sres.(j).query_execution_time <- s.query_execution_time; sres.(j).query_compile_time <- s.query_compile_time; if j == 1 then sres.(j).serialization_time <- s.serialization_time; if k == 1 then sres.(j).memory_usage <- s.memory_usage; Printf.printf "pass %i ... %!" i; done; Printf.printf "Ok\n%!"; done; if (k == 0) then Printf.printf "Monitoring memory use: ... \n%!"; done; let _ = Array.iteri (fun i s -> s.print_output <- (i==1); s.query <- qnum; s.input_document <- docnum; s.input_size <- docsize) sres in lres := sres::!lres ) qset.documents )qset.queries in C.test_engine ((header,!lres)::res) qset end