(******************************************************************************) (* SXSI : XPath evaluator *) (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) INCLUDE "debug.ml" open Ata let l = ref [] ;; let time f x = let t1 = Unix.gettimeofday () in let r = f x in let t2 = Unix.gettimeofday () in let t = (1000. *.(t2 -. t1)) in l:= t::!l; Printf.eprintf " %fms\n%!" t ; r ;; let total_time () = List.fold_left (+.) 0. !l;; let test_slashslash tree k = let test = match k with "*" -> TagSet.remove (Tag.tag "") TagSet.star | s -> TagSet.singleton (Tag.tag k) in let attorstring = TagSet.cup TagSet.pcdata TagSet.attribute in let rec aux t acc = if Tree.Binary.is_node t then let tag = Tree.Binary.tag t in let l = Tree.Binary.first_child t and r = Tree.Binary.next_sibling t in let acc = if TagSet.mem tag test then TS.append t acc else acc in let rl = if TagSet.mem tag attorstring then acc else aux l acc in aux r rl else acc in let _ = Printf.eprintf "Testing optimal //%s ... " k in let r = time (aux tree ) TS.empty in Printf.eprintf "Result set is %i nodes\n%!" (TS.length r) let test_jump tree k = let ttag = Tag.tag k in let rec loop acc tree = if Tree.Binary.is_node tree then let acc = TS.cons tree acc in loop acc (Tree.Binary.tagged_next tree ttag) else acc in let _ = Printf.eprintf "Testing jumping for tag %s ... " k in let r = time (loop TS.empty ) (Tree.Binary.tagged_next tree ttag) in Printf.eprintf "Result set is %i nodes\n%!" (TS.length r) let test_traversal tree k = let ttag = Tag.tag k in let iid t = if Tree.Binary.is_node t then Tree.Binary.id t else -1 in let rec aux t = if Tree.Binary.is_node t then let tag = Tree.Binary.tag t in let l = Tree.Binary.first_child t and r = Tree.Binary.next_sibling t in let _ = Printf.eprintf "Tree with id %i and tag=%s, tagged_desc %s is %i tagged_foll is %i, tagged_next is %i\n%!" (Tree.Binary.id t) (Tag.to_string tag) (k) (iid (Tree.Binary.tagged_desc t ttag)) (iid (Tree.Binary.tagged_foll t ttag)) (iid (Tree.Binary.tagged_next t ttag)) in aux l; aux r; else () in aux tree let test_count_subtree tree k = let ttag = Tag.tag k in let _ = Printf.eprintf "Counting subtrees with tag %s ... %!" k in let r = time(Tree.Binary.subtree_tags tree) ttag in Printf.eprintf "%i nodes \n%!" r let main v query output = let _ = Tag.init (Tree.Binary.tag_pool v) in Printf.eprintf "Parsing query : "; let query = try time XPath.Parser.parse_string query with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; Printf.eprintf "Compiling query : "; let auto = time XPath.Compile.compile query in let _ = Ata.dump Format.err_formatter auto ; Format.fprintf Format.err_formatter "\n%!" in let _ = test_count_subtree v "keyword" in let _ = test_jump v "keyword" in Printf.eprintf "Execution time : "; let result = time (BottomUpNew.run auto) v in Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result); begin match output with | None -> () | Some f -> Printf.eprintf "Serializing results : "; time( fun () -> let oc = open_out f in output_string oc "\n"; TS.iter (fun t -> Tree.Binary.print_xml_fast oc t; output_char oc '\n') result) (); end; (* let _ = Ata.dump Format.err_formatter auto in Format.fprintf Format.err_formatter "\n%!"; *) Printf.eprintf "Total time : %fms\n%!" (total_time()) ;; Options.parse_cmdline();; let v = if (Filename.check_suffix !Options.input_file ".srx") then begin Printf.eprintf "Loading from file : "; time (Tree.Binary.load ~sample:!Options.sample_factor ) (Filename.chop_suffix !Options.input_file ".srx"); end else let v = time (fun () -> let v = Tree.Binary.parse_xml_uri !Options.input_file; in Printf.eprintf "Parsing document : %!";v ) () in if !Options.save_file <> "" then begin Printf.eprintf "Writing file to disk : "; time (Tree.Binary.save v) !Options.save_file; end; v in main v !Options.query !Options.output_file;; IFDEF DEBUG THEN Printf.eprintf "\n=================================================\nDEBUGGING\n%!"; Tree.DEBUGTREE.print_stats Format.err_formatter;; Gc.full_major() ENDIF