X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=main.ml;h=c8ed40e1cfed841f0c95f99ba0da467a8ab10a13;hb=05d12de7ce75866e7e7e19fba37720fda99c38ad;hp=c446ef39f2eef515f619a41ed90e13490fdbe743;hpb=dc91851aaeac91a71eba2c266d0227adea0c5815;p=SXSI%2Fxpathcomp.git diff --git a/main.ml b/main.ml index c446ef3..c8ed40e 100644 --- a/main.ml +++ b/main.ml @@ -13,16 +13,19 @@ let time f x = let t1 = Unix.gettimeofday () in let r = f x in let t2 = Unix.gettimeofday () in - let t = (1000. *.(t2 -. t1)) in + let t = (1000. *. (t2 -. t1)) in l:= t::!l; Printf.eprintf " %fms\n%!" t ; r ;; let total_time () = List.fold_left (+.) 0. !l;; - +let enabled_gc = Gc.get() +let disabled_gc = { Gc.get() with + Gc.max_overhead = 1000000; + Gc.space_overhead = 100 } let main v query output = - let _ = Tag.init (Tree.Binary.tag_pool v) in + let _ = Tag.init (Tree.tag_pool v) in Printf.eprintf "Parsing query : "; let query = try time @@ -32,27 +35,56 @@ let main v query output = in XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; -(* Printf.eprintf "Dummy iteration : "; - time (fill_hashtag) v; - Printf.eprintf "Dummy iteration (tag access cached) : "; - time (fill_hashtag) v; -*) Printf.eprintf "Compiling query : "; let auto,ltags,contains = time XPath.Compile.compile query in let _ = Ata.dump Format.err_formatter auto in let _ = Printf.eprintf "%!" in - let _ = match contains with - None -> () - | Some s -> Tree.Binary.init_contains v s + + let do_contains = match contains with + None -> false + | Some s -> + let r = Tree.count v s + in + Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v); + Printf.eprintf "Global count is %i, using " r; + if r < !Options.tc_threshold then begin + Printf.eprintf "TextCollection contains\nTiming call to raw global contains (1st time): "; + time (Tree.unsorted_contains v) s; + Printf.eprintf "Calling global contains : "; + time (Tree.init_contains v) s; + Printf.eprintf "Timing call to global count contains : "; + let r = time (Tree.count_contains v) s + in + Printf.eprintf " number of matching nodes %i \n%!" r; + Printf.eprintf "Timing call to raw global contains (2nd time): "; + time (Tree.unsorted_contains v) s; + end + else begin + Printf.eprintf "Naive contains\nCalling global contains : "; + time (Tree.init_naive_contains v) s + end;true in - Printf.eprintf "Execution time %s : " (if !Options.count_only then "(counting only)" else ""); + Printf.eprintf "Execution time %s : " + (if !Options.count_only then "(counting only)" else if !Options.backward then "(bottomup)" else ""); begin - if !Options.count_only then - failwith "Count only not implemented in this version" + let _ = Gc.full_major();Gc.compact() in + let _ = Gc.set (disabled_gc) in + if !Options.backward then + let tag,set = List.hd ltags in + let r = if do_contains + then time (bottom_up_count_contains auto) v + else time (bottom_up_count auto v) tag in + let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r + in () + else + if !Options.count_only then + let r = time ( top_down_count auto ) v in (* not clean *) + let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r + in () else - let _ = Gc.set ({ Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 }) in - let result = time (if !Options.time then run_time auto else run auto) v in - Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result); + let result = time (top_down auto) v in + let rcount = IdSet.length result in + Printf.eprintf "Number of nodes in the result set : %i\n" rcount; Printf.eprintf "\n%!"; begin match output with @@ -62,11 +94,15 @@ let main v query output = time( fun () -> let oc = open_out f in output_string oc "\n"; - TS.rev_iter (fun t -> output_string oc "----------\n"; - Tree.Binary.print_xml_fast oc t; - output_char oc '\n') result) (); + IdSet.iter (fun t -> + Tree.print_xml_fast oc t; + output_char oc '\n'; + output_string oc "----------\n"; + ) result) (); end; end; + let _ = Gc.set enabled_gc in +(* let _ = Ata.dump Format.err_formatter auto in *) Printf.eprintf "Total running time : %fms\n%!" (total_time()) ;; @@ -77,19 +113,19 @@ let v = then begin Printf.eprintf "Loading from file : "; - time (Tree.Binary.load ~sample:!Options.sample_factor ) - (Filename.chop_suffix !Options.input_file ".srx"); - end + time (Tree.load ~sample:!Options.sample_factor ) + (Filename.chop_suffix !Options.input_file ".srx"); + end else let v = - time (fun () -> let v = Tree.Binary.parse_xml_uri !Options.input_file; + time (fun () -> let v = Tree.parse_xml_uri !Options.input_file; in Printf.eprintf "Parsing document : %!";v ) () in if !Options.save_file <> "" then begin Printf.eprintf "Writing file to disk : "; - time (Tree.Binary.save v) !Options.save_file; + time (Tree.save v) !Options.save_file; end; v in