(******************************************************************************) (* SXSI : XPath evaluator *) (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) INCLUDE "debug.ml" open Ata let l = ref [] ;; let time f x = let t1 = Unix.gettimeofday () in let r = f x in let t2 = Unix.gettimeofday () in let t = (1000. *.(t2 -. t1)) in l:= t::!l; Printf.eprintf " %fms\n%!" t ; r ;; let total_time () = List.fold_left (+.) 0. !l;; let poa = TagSet.add Tag.pcdata (TagSet.singleton Tag.attribute) let rec fill_hashtag t = if Tree.Binary.is_node t then begin let tag = Tree.Binary.tag t in let a = if TagSet.mem tag poa then 0 else fill_hashtag (Tree.Binary.first_child t) in let b = fill_hashtag (Tree.Binary.next_sibling t) in a+b+1 end else 0 let test_slashslash tree k = let test = match k with "*" -> TagSet.remove (Tag.tag "") TagSet.star | s -> TagSet.singleton (Tag.tag k) in let attorstring = TagSet.cup TagSet.pcdata TagSet.attribute in let rec aux t acc = if Tree.Binary.is_node t then let tag = Tree.Binary.tag t in let l = Tree.Binary.first_child t and r = Tree.Binary.next_sibling t in let acc = if TagSet.mem tag test then TS.append t acc else acc in let rl = if TagSet.mem tag attorstring then acc else aux l acc in aux r rl else acc in let _ = Printf.eprintf "Testing optimal //%s ... " k in let r = time (aux tree ) TS.empty in Printf.eprintf "Result set is %i nodes\n%!" (TS.length r) let test_jump tree k = let ttag = Tag.tag k in let rec loop acc tree = if Tree.Binary.is_node tree then let acc = TS.cons tree acc in loop acc (Tree.Binary.tagged_foll tree ttag) else acc in let _ = Printf.eprintf "Testing jumping for tag %s ... " k in let r = time (loop TS.empty ) (Tree.Binary.tagged_next tree ttag) in Printf.eprintf "Result set is %i nodes\n%!" (TS.length r) let test_traversal tree k = let ttag = Tag.tag k in let iid t = if Tree.Binary.is_node t then Tree.Binary.id t else -1 in let rec aux t = if Tree.Binary.is_node t then let tag = Tree.Binary.tag t in let l = Tree.Binary.first_child t and r = Tree.Binary.next_sibling t in let _ = Printf.eprintf "Tree with id %i and tag=%s, tagged_desc %s is %i tagged_foll is %i\n%!" (Tree.Binary.id t) (Tag.to_string tag) (k) (iid (Tree.Binary.tagged_desc t ttag)) (iid (Tree.Binary.tagged_foll t ttag)) in aux l; aux r; else () in aux tree let test_count_subtree tree k = let ttag = Tag.tag k in let _ = Printf.eprintf "Counting subtrees with tag %s ... %!" k in let r = time(Tree.Binary.subtree_tags tree) ttag in Printf.eprintf "%i nodes \n%!" r let test_contains tree s = let _ = Printf.eprintf "Fetching DocIds containing %s ... %!" s in time (fun s -> let r = Tree.Binary.contains tree s in Tree.Binary.DocIdSet.iter (fun t -> output_string stderr (Tree.Binary.get_string tree t); output_char stderr '\n') r ) s let test_count_contains tree s = let _ = Printf.eprintf "Counting DocIds containing %s ... %!" s in let r = time (Tree.Binary.count_contains tree) s in Printf.eprintf "%i documents ids\n%!" (r) let test_contains_old tree s = let _ = Printf.eprintf "Fetching (old) DocIds containing %s ... %!" s in let r = time (Tree.Binary.contains_old tree) s in Printf.eprintf "%i documents ids\n%!" (Tree.Binary.DocIdSet.cardinal r) let test_contains_iter tree s = let _ = Printf.eprintf "Fetching (old) DocIds containing %s ... %!" s in let r = time (Tree.Binary.contains_iter tree) s in Printf.eprintf "%i documents ids\n%!" (Tree.Binary.DocIdSet.cardinal r) module Stack = struct type t = { mutable table: Tree.Binary.t array; mutable top : int } let empty = { table = Array.make 0 (Obj.magic 0); top = 0 } let cons e s = let ls = Array.length s.table in if ls > s.top then begin s.table.(s.top) <- e; s.top <- s.top + 1; s end else let a = Array.make (ls * 2 + 1) (Tree.Binary.root e) in Array.blit s.table 0 a 0 ls; s.table <- a; s.table.(s.top) <- e; s.top <- s.top + 1; s end let test_fast tree = let rec aux t acc = if Tree.Binary.is_node t then aux (Tree.Binary.right t)( aux (Tree.Binary.left t) (Stack.cons t acc)) else acc in let _ = Printf.eprintf "Fast traversal ...%!" in time (aux tree) Stack.empty let test_cps tree = let rec aux t acc cont = if Tree.Binary.is_node t then aux (Tree.Binary.left t) (Stack.cons t acc) ((Tree.Binary.right t)::cont) else match cont with | [] -> acc | p::r -> aux p acc r in let _ = Printf.eprintf "CPS traversal ...%!" in time (aux tree Stack.empty) [] let main v query output = let _ = Tag.init (Tree.Binary.tag_pool v) in Printf.eprintf "Parsing query : "; let query = try time XPath.Parser.parse_string query with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; (* Printf.eprintf "Dummy iteration : "; time (fill_hashtag) v; Printf.eprintf "Dummy iteration (tag access cached) : "; time (fill_hashtag) v; *) Printf.eprintf "Compiling query : "; let auto,_ = time XPath.Compile.compile query in Printf.eprintf "Execution time %s : " (if !Options.count_only then "(counting only)" else ""); begin if !Options.count_only then let result = time (BottomUpNew.run_count auto) v in Printf.eprintf "Number of nodes in the result set : %i\n" result else let result = time (BottomUpNew.run auto) v in Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result); begin match output with | None -> () | Some f -> Printf.eprintf "Serializing results : "; time( fun () -> let oc = open_out f in output_string oc "\n"; TS.iter (fun t -> output_string oc "----------\n"; Tree.Binary.print_xml_fast oc t; output_char oc '\n') result) (); end; end; Printf.eprintf "Total running time : %fms\n%!" (total_time()) ;; Options.parse_cmdline();; let v = if (Filename.check_suffix !Options.input_file ".srx") then begin Printf.eprintf "Loading from file : "; time (Tree.Binary.load ~sample:!Options.sample_factor ) (Filename.chop_suffix !Options.input_file ".srx"); end else let v = time (fun () -> let v = Tree.Binary.parse_xml_uri !Options.input_file; in Printf.eprintf "Parsing document : %!";v ) () in if !Options.save_file <> "" then begin Printf.eprintf "Writing file to disk : "; time (Tree.Binary.save v) !Options.save_file; end; v in main v !Options.query !Options.output_file;; IFDEF DEBUG THEN Printf.eprintf "\n=================================================\nDEBUGGING\n%!"; Tree.DEBUGTREE.print_stats Format.err_formatter;; Gc.full_major() ENDIF