1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
10 external init_lib : unit -> unit = "caml_init_lib"
12 exception CPlusPlusError of string
14 let () = Callback.register_exception "CPlusPlusError" (CPlusPlusError "")
20 type 'a node = private int
21 type node_kind = [`Text | `Tree ]
25 ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t;
28 external inode : 'a node -> int = "%identity"
29 external nodei : int -> 'a node = "%identity"
30 let compare_node x y = (inode x) - (inode y)
31 let equal_node : 'a node -> 'a node -> bool = (==)
34 external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri"
35 external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string"
36 external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print"
37 external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save"
38 external tree_load : Unix.file_descr -> bool -> int -> tree = "caml_xml_tree_load"
40 external nullt : unit -> 'a node = "caml_xml_tree_nullt"
42 let nil : [`Tree ] node = nodei ~-1
43 let nulldoc : [`Text ] node = nodei ~-1
44 let root : [`Tree ] node = nodei 0
46 external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text"
47 external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text"
49 let text_is_empty t n = (equal_node nulldoc n) || text_is_empty t n
51 external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix"
52 external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix"
53 external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal"
54 external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains"
55 external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan"
57 external text_count : tree -> string -> int = "caml_text_collection_count"
58 external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix"
59 external text_count_suffix : tree -> string -> int = "caml_text_collection_count_suffix"
60 external text_count_equal : tree -> string -> int = "caml_text_collection_count_equal"
61 external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains"
62 external text_count_lessthan : tree -> string -> int = "caml_text_collection_count_lessthan"
64 external text_prefix : tree -> string -> [`Text ] node array = "caml_text_collection_prefix"
65 external text_suffix : tree -> string -> [`Text ] node array = "caml_text_collection_suffix"
66 external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals"
67 external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
68 external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan"
71 external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc"
72 external tree_size : tree -> int = "caml_xml_tree_size" "noalloc"
73 external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc"
74 external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" "noalloc"
75 external tree_subtree_tags : tree -> [`Tree] node -> Tag.t -> int = "caml_xml_tree_subtree_elements" "noalloc"
77 let tree_is_nil x = equal_node x nil
78 external tree_is_leaf : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_leaf" "noalloc"
79 external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc"
80 external tree_is_child : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_child" "noalloc"
81 external tree_is_first_child : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_first_child" "noalloc"
82 external tree_num_children : tree -> [`Tree ] node -> int = "caml_xml_tree_num_children" "noalloc"
83 external tree_child_number : tree -> [`Tree ] node -> int = "caml_xml_tree_child_number" "noalloc"
84 external tree_depth : tree -> [`Tree ] node -> int = "caml_xml_tree_depth" "noalloc"
85 external tree_preorder : tree -> [`Tree ] node -> int = "caml_xml_tree_preorder" "noalloc"
86 external tree_postorder : tree -> [`Tree ] node -> int = "caml_xml_tree_postorder" "noalloc"
87 external tree_tag : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" "noalloc"
88 external tree_doc_ids : tree -> [`Tree ] node -> [`Text] node*[`Text] node = "caml_xml_tree_doc_ids"
90 external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc"
91 external tree_child : tree -> [`Tree] node -> int -> [`Tree] node = "caml_xml_tree_child" "noalloc"
92 external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc"
93 external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc"
94 external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" "noalloc"
95 external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc"
96 external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc"
97 external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc"
98 external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc"
101 external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc"
102 external unordered_set_length : unordered_set -> int = "caml_unordered_set_length"
103 external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc"
105 external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_child" "noalloc"
106 external tree_tagged_following_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_following_sibling" "noalloc"
107 external tree_select_following_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_following_sibling" "noalloc"
108 external tree_tagged_descendant : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_descendant" "noalloc"
109 external tree_select_descendant : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_descendant" "noalloc"
110 external tree_tagged_following : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_following" "noalloc"
111 external tree_tagged_following_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_below" "noalloc"
112 external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_below" "noalloc"
115 external tree_tagged_following_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_before" "noalloc"
116 external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_before" "noalloc"
118 external tree_my_text : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text" "noalloc"
119 external tree_my_text_unsafe : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text_unsafe" "noalloc"
120 external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" "noalloc"
121 external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" "noalloc"
123 external tree_parent_node : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_node" "noalloc"
125 (*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *)
127 external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc"
128 external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc"
131 external benchmark_jump : tree -> Tag.t -> unit = "caml_benchmark_jump" "noalloc"
133 let benchmark_jump t s = benchmark_jump t.doc s
135 external benchmark_fcns : tree -> unit = "caml_benchmark_fcns" "noalloc"
137 let benchmark_fcns t = benchmark_fcns t.doc
139 external benchmark_lcps : tree -> unit = "caml_benchmark_lcps" "noalloc"
141 let benchmark_lcps t = benchmark_lcps t.doc
149 let text_size tree = inode (snd ( tree_doc_ids tree root ))
151 let text_get_text t (x:[`Text] node) =
152 if x == nulldoc then ""
153 else text_get_text t x
158 module HPtset = Hashtbl.Make(Ptset.Int)
160 let vector_htbl = HPtset.create MED_H_SIZE
162 let ptset_to_vector s =
164 HPtset.find vector_htbl s
167 let v = unordered_set_alloc (Ptset.Int.cardinal s) in
168 let _ = Ptset.Int.iter (fun e -> unordered_set_insert v e) s in
169 HPtset.add vector_htbl s v; v
173 let subtree_size t i = tree_subtree_size t.doc i
174 let subtree_elements t i = tree_subtree_elements t.doc i
175 let text_size t = text_size t.doc
177 module MemUnion = Hashtbl.Make (struct
178 type t = Ptset.Int.t*Ptset.Int.t
179 let equal (x,y) (z,t) = x == z || y == t
180 let equal a b = equal a b || equal b a
181 let hash (x,y) = (* commutative hash *)
182 let x = Ptset.Int.uid x
183 and y = Ptset.Int.uid y
185 if x <= y then HASHINT2(x,y) else HASHINT2(y,x)
188 module MemAdd = Hashtbl.Make (
190 type t = Tag.t*Ptset.Int.t
191 let equal (x,y) (z,t) = (x == z)&&(y == t)
192 let hash (x,y) = HASHINT2(x,Ptset.Int.uid y)
195 module MemUpdate = struct
196 include Hashtbl.Make (
198 type t = Tag.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t
199 let equal (a1,b1,c1,d1,e1) (a2,b2,c2,d2,e2) = a1==a2 &&
200 b1 == b2 && c1 == c2 && d1 == d2 && e1 == e2
201 let hash (a,b,c,d,e) = HASHINT4(HASHINT2(a,Ptset.Int.uid b),Ptset.Int.uid c,Ptset.Int.uid d,Ptset.Int.uid e)
206 let collect_tags tree =
207 let _ = Printf.eprintf "Collecting Tags\n%!" in
208 let h_union = MemUnion.create BIG_H_SIZE in
211 MemUnion.find h_union (s1,s2)
213 | Not_found -> let s = Ptset.Int.union s1 s2
215 MemUnion.add h_union (s1,s2) s;s
217 let h_add = MemAdd.create BIG_H_SIZE in
219 try MemAdd.find h_add (t,s)
221 | Not_found -> let r = Ptset.Int.add t s in
222 MemAdd.add h_add (t,s) r;r
224 let h = Hashtbl.create BIG_H_SIZE in
225 let update t sc sb ss sa =
226 let schild,sbelow,ssibling,safter =
231 (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
234 (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
236 let rec loop right id acc_after =
238 then Ptset.Int.empty,Ptset.Int.empty,acc_after else
239 let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
240 let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
241 let tag = tree_tag tree id in
242 update tag child1 desc1 sibling2 after2;
243 ( pt_add tag sibling2,
244 pt_add tag (pt_cup desc1 desc2),
245 if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
247 let _ = loop false (tree_root tree) Ptset.Int.empty in
248 let _ = Printf.eprintf "Finished\n%!" in
254 let contains_array = ref [| |]
255 let contains_index = Hashtbl.create 4096
258 Hashtbl.find contains_index i
262 let init_textfun f t s =
264 | `CONTAINS -> text_contains t.doc s
265 | `STARTSWITH -> text_prefix t.doc s
266 | `ENDSWITH -> text_suffix t.doc s
267 | `EQUALS -> text_equals t.doc s
269 (*Array.fast_sort (compare) a; *)
271 Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
273 let count_contains t s = text_count_contains t.doc s
275 let init_naive_contains t s =
276 let i,j = tree_doc_ids t.doc (tree_root t.doc)
278 let regexp = Str.regexp_string s in
281 let _ = Str.search_forward regexp arg 0;
285 let rec loop n acc l =
288 let s = text_get_text t.doc n
291 then loop (nodei ((inode n)+1)) (n::acc) (l+1)
292 else loop (nodei ((inode n)+1)) acc l
294 let acc,l = loop i [] 0 in
295 let a = Array.create l nulldoc in
296 let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
302 let array_find a i j =
303 let l = Array.length a in
304 let rec loop idx x y =
305 if x > y || idx >= l then nulldoc
307 if a.(idx) >= x then if a.(idx) > y then nulldoc else (last_idx := idx;a.(idx))
308 else loop (idx+1) x y
310 if a.(0) > j || a.(l-1) < i then nulldoc
311 else loop !last_idx i j
313 let text_below tree t =
314 let l = Array.length !contains_array in
315 let i,j = tree_doc_ids tree.doc t in
316 let id = if l == 0 then i else (array_find !contains_array i j) in
317 tree_parent_node tree.doc id
319 let text_next tree t root =
320 let l = Array.length !contains_array in
321 let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
322 let _,j = tree_doc_ids tree.doc root in
323 let id = if l == 0 then if inf > j then nulldoc else inf
324 else array_find !contains_array inf j
326 tree_parent_node tree.doc id
330 module DocIdSet = struct
331 include Set.Make (struct type t = [`Text] node
332 let compare = compare_node end)
335 let is_nil t = t == nil
337 let is_node t = t != nil
338 let is_root t = t == root
341 let _ = Tag.init (Obj.magic t) in
342 let table = collect_tags t
344 let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
345 Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
346 Printf.eprintf "Child tags: ";
347 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) c;
348 Printf.eprintf "\nDescendant tags: ";
349 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) d;
350 Printf.eprintf "\nNextSibling tags: ";
351 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) ns;
352 Printf.eprintf "\nFollowing tags: ";
353 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) f;
354 Printf.eprintf "\n\n%!";) table
362 let finalize _ = Printf.eprintf "Release the string list !\n%!"
368 !Options.sample_factor
369 !Options.index_empty_texts
370 !Options.disable_text_collection)
372 let parse_xml_uri str = parse parse_xml_uri str
373 let parse_xml_string str = parse parse_xml_string str
375 let size t = tree_size t.doc;;
377 external pool : tree -> Tag.pool = "%identity"
379 let magic_string = "SXSI_INDEX"
380 let version_string = "2"
383 Unix.lseek fd 0 Unix.SEEK_CUR
385 let pr_pos fd = Printf.eprintf "At position %i\n%!" (pos fd)
388 let sl = String.length s in
389 let ssl = Printf.sprintf "%020i" sl in
390 ignore (Unix.write fd ssl 0 20);
391 ignore (Unix.write fd s 0 (String.length s))
393 let rec really_read fd buffer start length =
394 if length <= 0 then () else
395 match Unix.read fd buffer start length with
396 0 -> raise End_of_file
397 | r -> really_read fd buffer (start + r) (length - r);;
400 let buffer = String.create 20 in
401 let _ = really_read fd buffer 0 20 in
402 let size = int_of_string buffer in
403 let buffer = String.create size in
404 let _ = really_read fd buffer 0 size in
409 let fd = Unix.openfile str [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in
410 let out_c = Unix.out_channel_of_descr fd in
411 let _ = set_binary_mode_out out_c true in
412 output_string out_c magic_string;
413 output_char out_c '\n';
414 output_string out_c version_string;
415 output_char out_c '\n';
416 Marshal.to_channel out_c t.ttable [ ];
417 (* we need to move the fd to the correct position *)
419 ignore (Unix.lseek fd (pos_out out_c) Unix.SEEK_SET);
424 let load ?(sample=64) ?(load_text=true) str =
425 let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
426 let in_c = Unix.in_channel_of_descr fd in
427 let _ = set_binary_mode_in in_c true in
429 (let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
430 (let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
431 let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
432 Marshal.from_channel in_c
434 let ntable = Hashtbl.create (Hashtbl.length table) in
435 Hashtbl.iter (fun k (s1,s2,s3,s4) ->
436 let ss1 = Ptset.Int.fold (Ptset.Int.add) s1 Ptset.Int.empty
437 and ss2 = Ptset.Int.fold (Ptset.Int.add) s2 Ptset.Int.empty
438 and ss3 = Ptset.Int.fold (Ptset.Int.add) s3 Ptset.Int.empty
439 and ss4 = Ptset.Int.fold (Ptset.Int.add) s4 Ptset.Int.empty
440 in Hashtbl.add ntable k (ss1,ss2,ss3,ss4)
443 (* The in_channel read a chunk of fd, so we might be after
444 the start of the XMLTree save file. Reset to the correct
448 let _ = Printf.eprintf "\nLoading tag table : " in
449 let ntable = time (load_table) () in
450 ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
451 let tree = { doc = tree_load fd load_text sample;
459 let tag_pool t = pool t.doc
461 let compare = compare_node
463 let equal a b = a == b
467 | i -> Printf.sprintf "Node (%i)" i
469 let dump_node t = nts (inode t)
471 let is_left t n = tree_is_first_child t.doc n
475 let is_below_right t n1 n2 =
476 tree_is_ancestor t.doc (tree_parent t.doc n1) n2
477 && not (tree_is_ancestor t.doc n1 n2)
479 let is_binary_ancestor t n1 n2 =
480 let p = tree_parent t.doc n1 in
481 let fin = tree_closing t.doc p in
483 (* (is_below_right t n1 n2) ||
484 (tree_is_ancestor t.doc n1 n2) *)
486 let parent t n = tree_parent t.doc n
488 let first_child t = let doc = t.doc in ();fun n -> tree_first_child doc n
489 let first_element t = let doc = t.doc in (); fun n -> tree_first_element doc n
491 (* these function will be called in two times: first partial application
492 on the tag, then application of the tag and the tree, then application of
493 the other arguments. We use the trick to let the compiler optimize application
496 let tagged_child t tag = () ; fun n -> tree_tagged_child t.doc n tag
498 let select_child t = fun ts ->
499 let v = ptset_to_vector ts in ();
500 fun n -> tree_select_child t.doc n v
502 let next_sibling t = let doc = t.doc in (); fun n -> tree_next_sibling doc n
503 let next_element t = let doc = t.doc in (); fun n -> tree_next_element doc n
505 let tagged_following_sibling t tag = (); fun n -> tree_tagged_following_sibling t.doc n tag
507 let select_following_sibling t = fun ts ->
508 let v = (ptset_to_vector ts) in ();
509 fun n -> tree_select_following_sibling t.doc n v
511 let next_sibling_below t = (); fun n _ -> tree_next_sibling t.doc n
512 let next_element_below t = (); fun n _ -> tree_next_element t.doc n
513 let tagged_following_sibling_below t tag = (); fun n _ -> tree_tagged_following_sibling t.doc n tag
515 let select_following_sibling_below t = fun ts ->
516 let v = (ptset_to_vector ts) in ();
517 fun n _ -> tree_select_following_sibling t.doc n v
519 let id t n = tree_node_xml_id t.doc n
521 let tag t n = if n == nil then Tag.nullt else tree_tag t.doc n
523 let tagged_descendant t tag =
524 let doc = t.doc in (); fun n -> tree_tagged_descendant doc n tag
526 let select_descendant t = fun ts ->
527 let v = (ptset_to_vector ts) in ();
528 fun n -> tree_select_descendant t.doc n v
530 let tagged_following_below t tag =
532 (); fun n ctx -> tree_tagged_following_below doc n tag ctx
534 let select_following_below t = fun ts ->
535 let v = (ptset_to_vector ts) in ();
536 fun n ctx -> tree_select_following_below t.doc n v ctx
538 let closing t n = tree_closing t.doc n
539 let is_open t n = tree_is_open t.doc n
540 let get_text_id t n = tree_my_text t.doc n
543 let array_find a i j =
544 let l = Array.length a in
545 let rec loop idx x y =
546 if x > y || idx >= l then nil
548 if a.(idx) >= x then if a.(idx) > y then nil else (last_idx := idx;a.(idx))
549 else loop (idx+1) x y
551 if a.(0) > j || a.(l-1) < i then nil
552 else loop !last_idx i j
556 let count t s = text_count t.doc s
558 let init_stack () = stack := []
559 let push x = stack:= x::!stack
560 let peek () = match !stack with
562 | _ -> failwith "peek"
563 let pop () = match !stack with
565 | _ -> failwith "pop"
567 let next t = nodei ( (inode t) + 1 )
568 let next2 t = nodei ( (inode t) + 2 )
569 let next3 t = nodei ( (inode t) + 3 )
571 let print_xml_fast2 =
572 let _ = init_stack () in
573 let h = Hashtbl.create MED_H_SIZE in
574 let tag_str t = try Hashtbl.find h t with
575 Not_found -> let s = Tag.to_string t in
578 let h_att = Hashtbl.create MED_H_SIZE in
579 let att_str t = try Hashtbl.find h_att t with
580 Not_found -> let s = Tag.to_string t in
581 let attname = String.sub s 3 ((String.length s) -3) in
582 Hashtbl.add h_att t attname;attname
583 in fun outc tree t ->
584 let tree = tree.doc in
585 let fin = tree_closing tree t in
586 let rec loop_tag t tag =
588 if tree_is_open tree t then
590 if tag == Tag.pcdata then
592 output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
593 loop (next2 t) (* skip closing $ *)
596 let tagstr = tag_str tag in
597 let _ = output_char outc '<';
598 output_string outc tagstr in
600 if tree_is_open tree t' then
601 let _ = push tagstr in
602 let tag' = tree_tag tree t' in
603 if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in
604 output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag')
605 else (* closing with no content *)
606 let _ = output_string outc "/>" in
611 output_string outc "</";
612 output_string outc (pop());
613 output_char outc '>';
616 and loop t = loop_tag t (tree_tag tree t)
618 if tree_is_open tree t then
619 let attname = att_str (tree_tag tree t) in
620 output_char outc ' ';
621 output_string outc attname;
622 output_string outc "=\"";
623 let t = next t in (* open $@ *)
624 output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
625 output_char outc '"';
626 loop_attr (next3 t) (n+1)
632 let h = Hashtbl.create MED_H_SIZE in
633 let tag_str t = try Hashtbl.find h t with
634 Not_found -> let s = Tag.to_string t in
637 let h_att = Hashtbl.create MED_H_SIZE in
638 let att_str t = try Hashtbl.find h_att t with
639 Not_found -> let s = Tag.to_string t in
640 let attname = String.sub s 3 ((String.length s) -3) in
641 Hashtbl.add h_att t attname;attname
642 in fun outc tree t ->
643 let rec loop ?(print_right=true) t =
646 let tagid = tree_tag tree.doc t in
650 let tid = tree_my_text_unsafe tree.doc t in
651 output_string outc (text_get_text tree.doc tid);
653 then loop (next_sibling tree t);
656 let tagstr = tag_str tagid in
657 let l = first_child tree t
658 and r = next_sibling tree t
660 output_char outc '<';
661 output_string outc tagstr;
662 if l == nil then output_string outc "/>"
664 if (tag tree l) == Tag.attribute then
666 loop_attributes (first_child tree l);
667 if (next_sibling tree l) == nil then output_string outc "/>"
670 output_char outc '>';
671 loop (next_sibling tree l);
672 output_string outc "</";
673 output_string outc tagstr;
674 output_char outc '>';
679 output_char outc '>';
681 output_string outc "</";
682 output_string outc tagstr;
683 output_char outc '>';
685 if print_right then loop r
686 and loop_attributes a =
689 let attname = att_str (tag tree a) in
690 let fsa = first_child tree a in
691 let tid = tree_my_text_unsafe tree.doc fsa in
692 output_char outc ' ';
693 output_string outc attname;
694 output_string outc "=\"";
695 output_string outc (text_get_text tree.doc tid);
696 output_char outc '"';
697 loop_attributes (next_sibling tree a)
699 loop ~print_right:false t
702 let print_xml_fast outc tree t =
703 if (tag tree t) = Tag.document_node then
704 print_xml_fast outc tree (first_child tree t)
705 else print_xml_fast outc tree t
707 let tags_children t tag =
708 let a,_,_,_ = Hashtbl.find t.ttable tag in a
709 let tags_below t tag =
710 let _,a,_,_ = Hashtbl.find t.ttable tag in a
711 let tags_siblings t tag =
712 let _,_,a,_ = Hashtbl.find t.ttable tag in a
713 let tags_after t tag =
714 let _,_,_,a = Hashtbl.find t.ttable tag in a
717 let tags t tag = Hashtbl.find t.ttable tag
720 let rec binary_parent t n =
722 if tree_is_first_child t.doc n
723 then tree_parent t.doc n
724 else tree_prev_sibling t.doc n
725 in if tree_tag t.doc r = Tag.pcdata then
729 let doc_ids t n = tree_doc_ids t.doc n
731 let subtree_tags t tag = ();
732 fun n -> if n == nil then 0 else
733 tree_subtree_tags t.doc n tag
736 let tid = tree_my_text t.doc n in
737 if tid == nulldoc then "" else
738 text_get_text t.doc tid
741 let dump_tree fmt tree =
744 let tag = (tree_tag tree.doc t ) in
745 let tagstr = Tag.to_string tag in
746 let tab = String.make n ' ' in
748 if tag == Tag.pcdata || tag == Tag.attribute_data
750 Format.fprintf fmt "%s<%s>%s</%s>\n"
751 tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr
753 Format.fprintf fmt "%s<%s>\n" tab tagstr;
754 loop (tree_first_child tree.doc t) (n+2);
755 Format.fprintf fmt "%s</%s>\n%!" tab tagstr;
757 loop (tree_next_sibling tree.doc t) n
763 let print_xml_fast3 t = tree_print_xml_fast3 t.doc
770 let rec loop left node acc_d total_d num_leaves =
772 (acc_d+total_d,if left then num_leaves+1 else num_leaves)
774 let d,td = loop true (tree_first_child tree node) (acc_d+1) total_d num_leaves in
775 loop false (tree_next_sibling tree node) (acc_d) d td
777 let a,b = loop true root 0 0 0
779 Printf.eprintf "Average depth: %f, number of leaves %i\n%!" ((float_of_int a)/. (float_of_int b)) b