1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
10 external init_lib : unit -> unit = "caml_init_lib"
12 exception CPlusPlusError of string
14 let () = Callback.register_exception "CPlusPlusError" (CPlusPlusError "")
20 type 'a node = private int
21 type node_kind = [`Text | `Tree ]
25 ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t;
28 external inode : 'a node -> int = "%identity"
29 external nodei : int -> 'a node = "%identity"
30 let compare_node x y = (inode x) - (inode y)
31 let equal_node : 'a node -> 'a node -> bool = (==)
34 external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri"
35 external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string"
36 external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print"
37 external tree_save : tree -> Unix.file_descr -> string -> unit = "caml_xml_tree_save"
38 external tree_load : Unix.file_descr -> string -> bool -> int -> tree = "caml_xml_tree_load"
40 external nullt : unit -> 'a node = "caml_xml_tree_nullt"
42 let nil : [`Tree ] node = nodei ~-1
43 let nulldoc : [`Text ] node = nodei ~-1
44 let root : [`Tree ] node = nodei 0
46 external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text"
47 external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text"
49 let text_is_empty t n = (equal_node nulldoc n) || text_is_empty t n
51 external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix"
52 external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix"
53 external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal"
54 external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains"
55 external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan"
57 external text_count : tree -> string -> int = "caml_text_collection_count"
58 external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix"
59 external text_count_suffix : tree -> string -> int = "caml_text_collection_count_suffix"
60 external text_count_equal : tree -> string -> int = "caml_text_collection_count_equal"
61 external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains"
62 external text_count_lessthan : tree -> string -> int = "caml_text_collection_count_lessthan"
64 external text_prefix : tree -> string -> [`Text ] node array = "caml_text_collection_prefix"
65 external text_suffix : tree -> string -> [`Text ] node array = "caml_text_collection_suffix"
66 external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals"
67 external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
68 external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan"
71 external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc"
72 external tree_size : tree -> int = "caml_xml_tree_size" "noalloc"
73 external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc"
74 external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" "noalloc"
75 external tree_subtree_tags : tree -> [`Tree] node -> Tag.t -> int = "caml_xml_tree_subtree_elements" "noalloc"
77 let tree_is_nil x = equal_node x nil
78 external tree_is_leaf : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_leaf" "noalloc"
79 external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc"
80 external tree_is_child : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_child" "noalloc"
81 external tree_is_first_child : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_first_child" "noalloc"
82 external tree_num_children : tree -> [`Tree ] node -> int = "caml_xml_tree_num_children" "noalloc"
83 external tree_child_number : tree -> [`Tree ] node -> int = "caml_xml_tree_child_number" "noalloc"
84 external tree_depth : tree -> [`Tree ] node -> int = "caml_xml_tree_depth" "noalloc"
85 external tree_preorder : tree -> [`Tree ] node -> int = "caml_xml_tree_preorder" "noalloc"
86 external tree_postorder : tree -> [`Tree ] node -> int = "caml_xml_tree_postorder" "noalloc"
87 external tree_tag : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" "noalloc"
88 external tree_doc_ids : tree -> [`Tree ] node -> [`Text] node*[`Text] node = "caml_xml_tree_doc_ids"
90 external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc"
91 external tree_child : tree -> [`Tree] node -> int -> [`Tree] node = "caml_xml_tree_child" "noalloc"
92 external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc"
93 external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc"
94 external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" "noalloc"
95 external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc"
96 external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc"
97 external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc"
98 external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc"
101 external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc"
102 external unordered_set_length : unordered_set -> int = "caml_unordered_set_length"
103 external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc"
105 external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_child" "noalloc"
106 external tree_tagged_following_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_following_sibling" "noalloc"
107 external tree_select_following_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_following_sibling" "noalloc"
108 external tree_tagged_descendant : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_descendant" "noalloc"
109 external tree_select_descendant : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_descendant" "noalloc"
110 external tree_tagged_following : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_following" "noalloc"
111 external tree_tagged_following_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_below" "noalloc"
112 external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_below" "noalloc"
115 external tree_tagged_following_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_before" "noalloc"
116 external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_before" "noalloc"
118 external tree_my_text : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text" "noalloc"
119 external tree_my_text_unsafe : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text_unsafe" "noalloc"
120 external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" "noalloc"
121 external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" "noalloc"
123 external tree_parent_node : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_node" "noalloc"
125 (*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *)
127 external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc"
128 external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc"
131 external benchmark_jump : tree -> Tag.t -> int = "caml_benchmark_jump" "noalloc"
133 let benchmark_jump t s = benchmark_jump t.doc s
135 external benchmark_fcns : tree -> int = "caml_benchmark_fcns" "noalloc"
136 external benchmark_fene : tree -> int = "caml_benchmark_fene" "noalloc"
137 external benchmark_iter : tree -> int = "caml_benchmark_iter" "noalloc"
139 let benchmark_fcns t = benchmark_fcns t.doc
141 let benchmark_fene t = benchmark_fene t.doc
143 let benchmark_iter t = benchmark_iter t.doc
145 external benchmark_lcps : tree -> unit = "caml_benchmark_lcps" "noalloc"
147 let benchmark_lcps t = benchmark_lcps t.doc
155 let text_size tree = inode (snd ( tree_doc_ids tree root ))
157 let text_get_text t (x:[`Text] node) =
158 if x == nulldoc then ""
159 else text_get_text t x
164 module HPtset = Hashtbl.Make(Ptset.Int)
166 let vector_htbl = HPtset.create MED_H_SIZE
168 let ptset_to_vector s =
170 HPtset.find vector_htbl s
173 let v = unordered_set_alloc (Ptset.Int.cardinal s) in
174 let _ = Ptset.Int.iter (fun e -> unordered_set_insert v e) s in
175 HPtset.add vector_htbl s v; v
179 let subtree_size t i = tree_subtree_size t.doc i
180 let subtree_elements t i = tree_subtree_elements t.doc i
181 let text_size t = text_size t.doc
183 module MemUnion = Hashtbl.Make (struct
184 type t = Ptset.Int.t*Ptset.Int.t
185 let equal (x,y) (z,t) = x == z && y == t
186 let equal a b = equal a b || equal b a
187 let hash (x,y) = (* commutative hash *)
188 let x = Uid.to_int (Ptset.Int.uid x)
189 and y = Uid.to_int (Ptset.Int.uid y)
191 if x <= y then HASHINT2(x,y) else HASHINT2(y,x)
194 module MemAdd = Hashtbl.Make (
196 type t = Tag.t*Ptset.Int.t
197 let equal (x,y) (z,t) = (x == z)&&(y == t)
198 let hash (x,y) = HASHINT2(x,Uid.to_int (Ptset.Int.uid y))
201 module MemUpdate = struct
202 include Hashtbl.Make (
204 type t = Tag.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t
205 let equal (a1,b1,c1,d1,e1) (a2,b2,c2,d2,e2) = a1==a2 &&
206 b1 == b2 && c1 == c2 && d1 == d2 && e1 == e2
207 let hash (a,b,c,d,e) =
208 HASHINT4(HASHINT2(a,Uid.to_int (Ptset.Int.uid b)),
209 Uid.to_int (Ptset.Int.uid c),
210 Uid.to_int (Ptset.Int.uid d),
211 Uid.to_int (Ptset.Int.uid e))
216 let collect_tags tree =
217 let _ = Printf.eprintf "Collecting Tags\n%!" in
218 let h_union = MemUnion.create BIG_H_SIZE in
221 MemUnion.find h_union (s1,s2)
223 | Not_found -> let s = Ptset.Int.union s1 s2
225 MemUnion.add h_union (s1,s2) s;s
227 let h_add = MemAdd.create BIG_H_SIZE in
229 try MemAdd.find h_add (t,s)
231 | Not_found -> let r = Ptset.Int.add t s in
232 MemAdd.add h_add (t,s) r;r
234 let h = Hashtbl.create BIG_H_SIZE in
235 let update t sc sb ss sa =
236 let schild,sbelow,ssibling,safter =
241 (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
244 (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
246 let rec loop right id acc_after =
248 then Ptset.Int.empty,Ptset.Int.empty,acc_after else
249 let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
250 let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
251 let tag = tree_tag tree id in
252 update tag child1 desc1 sibling2 after2;
253 ( pt_add tag sibling2,
254 pt_add tag (pt_cup desc1 desc2),
255 if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
257 let _ = loop false (tree_root tree) Ptset.Int.empty in
258 let _ = Printf.eprintf "Finished\n%!" in
264 let contains_array = ref [| |]
265 let contains_index = Hashtbl.create 4096
268 Hashtbl.find contains_index i
272 let init_textfun f t s =
274 | `CONTAINS -> text_contains t.doc s
275 | `STARTSWITH -> text_prefix t.doc s
276 | `ENDSWITH -> text_suffix t.doc s
277 | `EQUALS -> text_equals t.doc s
279 (*Array.fast_sort (compare) a; *)
281 Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
283 let count_contains t s = text_count_contains t.doc s
285 let init_naive_contains t s =
286 let i,j = tree_doc_ids t.doc (tree_root t.doc)
288 let regexp = Str.regexp_string s in
291 let _ = Str.search_forward regexp arg 0;
295 let rec loop n acc l =
298 let s = text_get_text t.doc n
301 then loop (nodei ((inode n)+1)) (n::acc) (l+1)
302 else loop (nodei ((inode n)+1)) acc l
304 let acc,l = loop i [] 0 in
305 let a = Array.create l nulldoc in
306 let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
312 let array_find a i j =
313 let l = Array.length a in
314 let rec loop idx x y =
315 if x > y || idx >= l then nulldoc
317 if a.(idx) >= x then if a.(idx) > y then nulldoc else (last_idx := idx;a.(idx))
318 else loop (idx+1) x y
320 if a.(0) > j || a.(l-1) < i then nulldoc
321 else loop !last_idx i j
323 let text_below tree t =
324 let l = Array.length !contains_array in
325 let i,j = tree_doc_ids tree.doc t in
326 let id = if l == 0 then i else (array_find !contains_array i j) in
327 tree_parent_node tree.doc id
329 let text_next tree t root =
330 let l = Array.length !contains_array in
331 let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
332 let _,j = tree_doc_ids tree.doc root in
333 let id = if l == 0 then if inf > j then nulldoc else inf
334 else array_find !contains_array inf j
336 tree_parent_node tree.doc id
340 module DocIdSet = struct
341 include Set.Make (struct type t = [`Text] node
342 let compare = compare_node end)
345 let is_nil t = t == nil
347 let is_node t = t != nil
348 let is_root t = t == root
351 let _ = Tag.init (Obj.magic t) in
352 let table = collect_tags t
354 let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
355 Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
356 Printf.eprintf "Child tags: ";
357 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) c;
358 Printf.eprintf "\nDescendant tags: ";
359 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) d;
360 Printf.eprintf "\nNextSibling tags: ";
361 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) ns;
362 Printf.eprintf "\nFollowing tags: ";
363 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) f;
364 Printf.eprintf "\n\n%!";) table
372 let finalize _ = Printf.eprintf "Release the string list !\n%!"
378 !Options.sample_factor
379 !Options.index_empty_texts
380 !Options.disable_text_collection)
382 let parse_xml_uri str = parse parse_xml_uri str
383 let parse_xml_string str = parse parse_xml_string str
385 let size t = tree_size t.doc;;
387 external pool : tree -> Tag.pool = "%identity"
389 let magic_string = "SXSI_INDEX"
390 let version_string = "2"
393 Unix.lseek fd 0 Unix.SEEK_CUR
395 let pr_pos fd = Printf.eprintf "At position %i\n%!" (pos fd)
398 let sl = String.length s in
399 let ssl = Printf.sprintf "%020i" sl in
400 ignore (Unix.write fd ssl 0 20);
401 ignore (Unix.write fd s 0 (String.length s))
403 let rec really_read fd buffer start length =
404 if length <= 0 then () else
405 match Unix.read fd buffer start length with
406 0 -> raise End_of_file
407 | r -> really_read fd buffer (start + r) (length - r);;
410 let buffer = String.create 20 in
411 let _ = really_read fd buffer 0 20 in
412 let size = int_of_string buffer in
413 let buffer = String.create size in
414 let _ = really_read fd buffer 0 size in
419 let fd = Unix.openfile str [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in
420 let out_c = Unix.out_channel_of_descr fd in
421 let _ = set_binary_mode_out out_c true in
422 output_string out_c magic_string;
423 output_char out_c '\n';
424 output_string out_c version_string;
425 output_char out_c '\n';
426 Marshal.to_channel out_c t.ttable [ ];
427 (* we need to move the fd to the correct position *)
429 ignore (Unix.lseek fd (pos_out out_c) Unix.SEEK_SET);
430 tree_save t.doc fd str;
434 let load ?(sample=64) ?(load_text=true) str =
435 let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
436 let in_c = Unix.in_channel_of_descr fd in
437 let _ = set_binary_mode_in in_c true in
439 (let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
440 (let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
441 let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
442 Marshal.from_channel in_c
444 let ntable = Hashtbl.create (Hashtbl.length table) in
445 Hashtbl.iter (fun k (s1,s2,s3,s4) ->
446 let ss1 = Ptset.Int.fold (Ptset.Int.add) s1 Ptset.Int.empty
447 and ss2 = Ptset.Int.fold (Ptset.Int.add) s2 Ptset.Int.empty
448 and ss3 = Ptset.Int.fold (Ptset.Int.add) s3 Ptset.Int.empty
449 and ss4 = Ptset.Int.fold (Ptset.Int.add) s4 Ptset.Int.empty
450 in Hashtbl.add ntable k (ss1,ss2,ss3,ss4)
453 (* The in_channel read a chunk of fd, so we might be after
454 the start of the XMLTree save file. Reset to the correct
458 let _ = Printf.eprintf "\nLoading tag table : " in
459 let ntable = time (load_table) () in
460 ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
461 let tree = { doc = tree_load fd str load_text sample;
469 let tag_pool t = pool t.doc
471 let compare = compare_node
473 let equal a b = a == b
477 | i -> Printf.sprintf "Node (%i)" i
479 let dump_node t = nts (inode t)
481 let is_left t n = tree_is_first_child t.doc n
485 let is_below_right t n1 n2 =
486 tree_is_ancestor t.doc (tree_parent t.doc n1) n2
487 && not (tree_is_ancestor t.doc n1 n2)
489 let is_binary_ancestor t n1 n2 =
490 let p = tree_parent t.doc n1 in
491 let fin = tree_closing t.doc p in
493 (* (is_below_right t n1 n2) ||
494 (tree_is_ancestor t.doc n1 n2) *)
496 let parent t n = tree_parent t.doc n
498 let first_child t = let doc = t.doc in ();fun n -> tree_first_child doc n
499 let first_element t = let doc = t.doc in (); fun n -> tree_first_element doc n
500 let first_element t n = tree_first_element t.doc n
501 (* these function will be called in two times: first partial application
502 on the tag, then application of the tag and the tree, then application of
503 the other arguments. We use the trick to let the compiler optimize application
506 let tagged_child t tag = () ; fun n -> tree_tagged_child t.doc n tag
508 let select_child t = fun ts ->
509 let v = ptset_to_vector ts in ();
510 fun n -> tree_select_child t.doc n v
512 let next_sibling t = let doc = t.doc in (); fun n -> tree_next_sibling doc n
513 let next_element t = let doc = t.doc in (); fun n -> tree_next_element doc n
514 let next_element t n = tree_next_element t.doc n
516 let tagged_following_sibling t tag = (); fun n -> tree_tagged_following_sibling t.doc n tag
518 let select_following_sibling t = fun ts ->
519 let v = (ptset_to_vector ts) in ();
520 fun n -> tree_select_following_sibling t.doc n v
522 let next_sibling_below t = (); fun n _ -> tree_next_sibling t.doc n
523 let next_element_below t = (); fun n _ -> tree_next_element t.doc n
525 let tagged_following_sibling_below t tag = (); fun n _ -> tree_tagged_following_sibling t.doc n tag
527 let select_following_sibling_below t = fun ts ->
528 let v = (ptset_to_vector ts) in ();
529 fun n _ -> tree_select_following_sibling t.doc n v
531 let id t n = tree_node_xml_id t.doc n
533 let tag t n = if n == nil then Tag.nullt else tree_tag t.doc n
535 let tagged_descendant t tag =
536 let doc = t.doc in (); fun n -> tree_tagged_descendant doc n tag
538 let select_descendant t = fun ts ->
539 let v = (ptset_to_vector ts) in ();
540 fun n -> tree_select_descendant t.doc n v
542 let tagged_following_below t tag =
544 (); fun n ctx -> tree_tagged_following_below doc n tag ctx
546 let select_following_below t = fun ts ->
547 let v = (ptset_to_vector ts) in ();
548 fun n ctx -> tree_select_following_below t.doc n v ctx
550 let closing t n = tree_closing t.doc n
551 let is_open t n = tree_is_open t.doc n
552 let get_text_id t n = tree_my_text t.doc n
555 let array_find a i j =
556 let l = Array.length a in
557 let rec loop idx x y =
558 if x > y || idx >= l then nil
560 if a.(idx) >= x then if a.(idx) > y then nil else (last_idx := idx;a.(idx))
561 else loop (idx+1) x y
563 if a.(0) > j || a.(l-1) < i then nil
564 else loop !last_idx i j
568 let count t s = text_count t.doc s
570 let init_stack () = stack := []
571 let push x = stack:= x::!stack
572 let peek () = match !stack with
574 | _ -> failwith "peek"
575 let pop () = match !stack with
577 | _ -> failwith "pop"
579 let next t = nodei ( (inode t) + 1 )
580 let next2 t = nodei ( (inode t) + 2 )
581 let next3 t = nodei ( (inode t) + 3 )
583 let print_xml_fast2 =
584 let _ = init_stack () in
585 let h = Hashtbl.create MED_H_SIZE in
586 let tag_str t = try Hashtbl.find h t with
587 Not_found -> let s = Tag.to_string t in
590 let h_att = Hashtbl.create MED_H_SIZE in
591 let att_str t = try Hashtbl.find h_att t with
592 Not_found -> let s = Tag.to_string t in
593 let attname = String.sub s 3 ((String.length s) -3) in
594 Hashtbl.add h_att t attname;attname
595 in fun outc tree t ->
596 let tree = tree.doc in
597 let fin = tree_closing tree t in
598 let rec loop_tag t tag =
600 if tree_is_open tree t then
602 if tag == Tag.pcdata then
604 output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
605 loop (next2 t) (* skip closing $ *)
608 let tagstr = tag_str tag in
609 let _ = output_char outc '<';
610 output_string outc tagstr in
612 if tree_is_open tree t' then
613 let _ = push tagstr in
614 let tag' = tree_tag tree t' in
615 if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in
616 output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag')
617 else (* closing with no content *)
618 let _ = output_string outc "/>" in
623 output_string outc "</";
624 output_string outc (pop());
625 output_char outc '>';
628 and loop t = loop_tag t (tree_tag tree t)
630 if tree_is_open tree t then
631 let attname = att_str (tree_tag tree t) in
632 output_char outc ' ';
633 output_string outc attname;
634 output_string outc "=\"";
635 let t = next t in (* open $@ *)
636 output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
637 output_char outc '"';
638 loop_attr (next3 t) (n+1)
644 let h = Hashtbl.create MED_H_SIZE in
645 let tag_str t = try Hashtbl.find h t with
646 Not_found -> let s = Tag.to_string t in
649 let h_att = Hashtbl.create MED_H_SIZE in
650 let att_str t = try Hashtbl.find h_att t with
651 Not_found -> let s = Tag.to_string t in
652 let attname = String.sub s 3 ((String.length s) -3) in
653 Hashtbl.add h_att t attname;attname
654 in fun outc tree t ->
655 let rec loop ?(print_right=true) t =
658 let tagid = tree_tag tree.doc t in
662 let tid = tree_my_text_unsafe tree.doc t in
663 output_string outc (text_get_text tree.doc tid);
665 then loop (next_sibling tree t);
668 let tagstr = tag_str tagid in
669 let l = first_child tree t
670 and r = next_sibling tree t
672 output_char outc '<';
673 output_string outc tagstr;
674 if l == nil then output_string outc "/>"
676 if (tag tree l) == Tag.attribute then
678 loop_attributes (first_child tree l);
679 if (next_sibling tree l) == nil then output_string outc "/>"
682 output_char outc '>';
683 loop (next_sibling tree l);
684 output_string outc "</";
685 output_string outc tagstr;
686 output_char outc '>';
691 output_char outc '>';
693 output_string outc "</";
694 output_string outc tagstr;
695 output_char outc '>';
697 if print_right then loop r
698 and loop_attributes a =
701 let attname = att_str (tag tree a) in
702 let fsa = first_child tree a in
703 let tid = tree_my_text_unsafe tree.doc fsa in
704 output_char outc ' ';
705 output_string outc attname;
706 output_string outc "=\"";
707 output_string outc (text_get_text tree.doc tid);
708 output_char outc '"';
709 loop_attributes (next_sibling tree a)
711 loop ~print_right:false t
714 let print_xml_fast outc tree t =
715 if (tag tree t) = Tag.document_node then
716 print_xml_fast outc tree (first_child tree t)
717 else print_xml_fast outc tree t
719 let tags_children t tag =
720 let a,_,_,_ = Hashtbl.find t.ttable tag in a
721 let tags_below t tag =
722 let _,a,_,_ = Hashtbl.find t.ttable tag in a
723 let tags_siblings t tag =
724 let _,_,a,_ = Hashtbl.find t.ttable tag in a
725 let tags_after t tag =
726 let _,_,_,a = Hashtbl.find t.ttable tag in a
729 let tags t tag = Hashtbl.find t.ttable tag
732 let rec binary_parent t n =
734 if tree_is_first_child t.doc n
735 then tree_parent t.doc n
736 else tree_prev_sibling t.doc n
737 in if tree_tag t.doc r = Tag.pcdata then
741 let doc_ids t n = tree_doc_ids t.doc n
743 let subtree_tags t tag = ();
744 fun n -> if n == nil then 0 else
745 tree_subtree_tags t.doc n tag
748 let tid = tree_my_text t.doc n in
749 if tid == nulldoc then "" else
750 text_get_text t.doc tid
753 let dump_tree fmt tree =
756 let tag = (tree_tag tree.doc t ) in
757 let tagstr = Tag.to_string tag in
758 let tab = String.make n ' ' in
760 if tag == Tag.pcdata || tag == Tag.attribute_data
762 Format.fprintf fmt "%s<%s>%s</%s>\n"
763 tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr
765 Format.fprintf fmt "%s<%s>\n" tab tagstr;
766 loop (tree_first_child tree.doc t) (n+2);
767 Format.fprintf fmt "%s</%s>\n%!" tab tagstr;
769 loop (tree_next_sibling tree.doc t) n
775 let print_xml_fast3 t = tree_print_xml_fast3 t.doc
782 let rec loop left node acc_d total_d num_leaves =
784 (acc_d+total_d,if left then num_leaves+1 else num_leaves)
786 let d,td = loop true (tree_first_child tree node) (acc_d+1) total_d num_leaves in
787 loop false (tree_next_sibling tree node) (acc_d) d td
789 let a,b = loop true root 0 0 0
791 Printf.eprintf "Average depth: %f, number of leaves %i\n%!" ((float_of_int a)/. (float_of_int b)) b
799 let test_prefix t s = Array.length (text_prefix t.doc s)
800 let test_suffix t s = Array.length (text_suffix t.doc s)
801 let test_contains t s = Array.length (text_contains t.doc s)
802 let test_equals t s = Array.length (text_equals t.doc s)