1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
10 let () = init_timer();;
12 let default_gc = Gc.get()
13 let tuned_gc = { Gc.get() with
14 Gc.minor_heap_size = 4*1024*1024;
15 Gc.major_heap_increment = 1024*1024;
16 Gc.max_overhead = 1000000;
18 let hash x = 131*x/(x-1+1)
20 let test_loop tree tag =
21 let t' = Tree.tagged_descendant tree tag Tree.root in
22 let f = Hashtbl.create 4096
24 let jump t _ = Tree.tagged_following_below tree tag t Tree.root in
26 if t == Tree.nil then 0
27 else 1+ ((Hashtbl.find f (hash 101)) (jump t ctx) ctx)
29 Hashtbl.add f (hash 101) g;
30 (Hashtbl.find f (hash 101)) t' Tree.root
33 let root = Tree.root in
34 let fin = Tree.closing tree root in
35 let rec loop t = if t <= fin then
36 let tag = Tree.tag tree t in
37 (* let _ = Tag.to_string tag in *)
38 if tag == Tag.pcdata then (ignore (Tree.get_text tree t));
39 let t = (Obj.magic ((Obj.magic t) + 1)) in
45 let test_loop2 tree tag =
46 let t' = Tree.tagged_descendant tree tag Tree.root in
47 let f = Hashtbl.create 4096
49 let jump t _ = Tree.tagged_following_below tree tag t Tree.root in
51 if t == Tree.nil then 0
52 else 1+ (match (Hashtbl.find f (hash 101)) with
53 `Foo ->g (jump t ctx) ctx
56 Hashtbl.add f (hash 101) `Foo;
60 let _ = Printf.eprintf "Contains(bree)" in
61 let _ = time (Tree.test_contains doc) "bree" in
62 let _ = Printf.eprintf "Contains(brain)" in
63 let _ = time (Tree.test_contains doc) "brain" in
64 let _ = Printf.eprintf "Contains(brain)" in
65 let i = time (Tree.test_contains doc) "brain" in
66 let _ = Printf.eprintf "%i\nContains(Australia)" i in
67 let i = time (Tree.test_contains doc) "AUSTRALIA" in
68 let _ = Printf.eprintf "%i\n Contains(1930)" i in
69 let i = time (Tree.test_contains doc) "1930" in
70 let _ = Printf.eprintf "%i\n startswith(bar)" i in
71 let i = time (Tree.test_prefix doc) "bar" in
72 let _ = Printf.eprintf "%i\n endswith(LAND)" i in
73 let i = time (Tree.test_suffix doc) "LAND" in
74 let _ = Printf.eprintf "%i\n =(2001)" i in
75 let i = time (Tree.test_equals doc) "2001" in
76 let _ = Printf.eprintf "%i\n =(Nguyen)" i in
77 let i = time (Tree.test_equals doc) "Nguyen" in
78 Printf.eprintf "%i\n" i ;
82 external build_pointers : Tree.t -> pointers = "caml_build_pointers"
83 external iter_pointers : pointers -> int = "caml_iter_pointers"
84 external free_pointers : pointers -> unit = "caml_free_pointers"
87 let main v query_string output =
89 let _ = Tag.init (Tree.tag_pool v) in
90 Printf.eprintf "Parsing query : ";
93 XPath.Parser.parse_string query_string
95 Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1
97 let _ = Printf.eprintf "Number of nodes %i\n%!" (Tree.size v) in
98 (* let _ = test_text v in *)
99 (* let _ = Tree.stats v in *)
100 let _ = Printf.eprintf "Timing first_child/next_sibling %!" in
101 let _ = time ~count:3 (Tree.benchmark_fcns) v in
102 (* let _ = Printf.eprintf "Timing last_child/prev_sibling %!" in
103 let _ = time (Tree.benchmark_lcps) v in
104 let _ = Printf.eprintf "Timing jump to a %!" in
105 let _ = time (Tree.benchmark_jump v) (Tag.tag "a") in
106 let _ = Printf.eprintf "Timing pointer allocation %!" in
107 let pointers = time (build_pointers) v in
108 let _ = Printf.eprintf "Timing pointer iteration %!" in
109 let i = time (iter_pointers) pointers in
110 let _ = Printf.eprintf "Traversed %i pointers\n\nTiming pointer deallocation %!" i in
111 let _ = time (free_pointers) pointers in *)
112 (* let _ = Printf.eprintf "Timing //keyword :" in
113 let r = time (test_loop v) (Tag.tag "keyword") in
114 let _ = Printf.eprintf "Count is %i\n%!" r in
115 let _ = Printf.eprintf "Timing //keyword 2:" in
116 let r = time (test_loop2 v) (Tag.tag "keyword") in
117 let _ = Printf.eprintf "Count is %i\n%!" r in
118 let _ = Printf.eprintf "Timing //node() :" in
119 let _ = time (test_full) v in *)
120 XPath.Ast.print Format.err_formatter query;
121 Format.fprintf Format.err_formatter "\n%!";
122 Printf.eprintf "Compiling query : ";
123 let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in
124 let _ = Ata.dump Format.err_formatter auto in
125 let _ = Printf.eprintf "%!" in
128 None -> (max_int,`NOTHING)
130 let r = Tree.count v s
132 Printf.eprintf "%i documents in the TextCollection\n" (Tree.text_size v);
133 Printf.eprintf "Global count is %i, using " r;
134 if r < !Options.tc_threshold then begin
135 Printf.eprintf "TextCollection contains\nCalling global contains : ";
136 time (Tree.init_textfun op v) s;
139 Printf.eprintf "Naive contains\nCalling global contains : ";
140 time (Tree.init_naive_contains v) s
143 let test_list = jump_to in
146 if (!Options.backward) then begin
147 Printf.eprintf "Finding min occurences : ";
149 ( List.fold_left (fun ((min_occ,kind)as acc) (tag,_) ->
150 let numtags = Tree.subtree_tags v tag Tree.root in
151 if ((numtags < min_occ) && numtags >= 2)
152 then (numtags,`TAG(tag))
153 else acc) jump_to) ltags
155 else (max_int,`NOTHING)
157 let _ = if (snd test_list) != `NOTHING then
158 let occ,s1,s2 = match test_list with
159 | (x,`TAG (tag)) -> (x, "tag", (Tag.to_string tag))
160 | (x,`CONTAINS(s)) -> (x, "contains()", s)
163 Printf.eprintf "Will jump to %s %s occuring %i time\n%!" s1 s2 occ
165 Printf.eprintf "Execution time %s : "
166 (if !Options.count_only then "(counting only)" else if !Options.backward then "(bottomup)" else "");
168 let _ = Gc.full_major();Gc.compact() in
169 let _ = Printf.eprintf "%!" in
170 let _ = Gc.set (tuned_gc) in
171 if !Options.backward && ((snd test_list) != `NOTHING )then
172 if !Options.count_only then
173 let r = time_mem (bottom_up_count auto v )(snd test_list) in
174 let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r
177 let r = time_mem (bottom_up auto v )(snd test_list) in
178 let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" (IdSet.length r)
184 Printf.eprintf "Serializing results : ";
186 (*let oc = open_out f in *)
187 let oc = Unix.openfile f [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in
188 (*output_string oc "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n";*)
190 Tree.print_xml_fast3 v t oc;
191 (*output_char oc '\n'; *)
197 if !Options.backward then Printf.eprintf "WARNING: couldn't find a jumping point, running top-down\n"
199 if !Options.count_only then
200 let r = time ~count:5 ( top_down_count1 auto ) v in
201 let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r
204 let module GR = Ata(*.Test(struct let doc = v end) *) in
205 let result = time ~count:5 (GR.top_down1 auto) v in
206 let _ = Printf.eprintf "Counting results " in
207 let rcount = time (IdSet.length) result in
208 Printf.eprintf "Number of nodes in the result set : %i\n" rcount;
209 Printf.eprintf "\n%!";
214 Printf.eprintf "Serializing results : ";
216 Unix.openfile f [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644
222 Tree.print_xml_fast3 v t oc;
227 Printf.eprintf "Total running time : %fms\n%!" (total_time())
230 Options.parse_cmdline();;
233 if (Filename.check_suffix !Options.input_file ".srx")
236 Printf.eprintf "Loading from file : ";
237 time (Tree.load ~sample:!Options.sample_factor ~load_text:(not !Options.count_only))
242 time (fun () -> let v = Tree.parse_xml_uri !Options.input_file;
243 in Printf.eprintf "Parsing document : %!";v
246 if !Options.save_file <> ""
248 Printf.eprintf "Writing file to disk : ";
249 time (Tree.save v) !Options.save_file;
253 main v !Options.query !Options.output_file;;