1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
10 external init_lib : unit -> unit = "caml_init_lib"
12 exception CPlusPlusError of string
14 let () = Callback.register_exception "CPlusPlusError" (CPlusPlusError "")
20 type 'a node = private int
21 type node_kind = [`Text | `Tree ]
25 ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t;
28 external inode : 'a node -> int = "%identity"
29 external nodei : int -> 'a node = "%identity"
30 let compare_node x y = (inode x) - (inode y)
31 let equal_node : 'a node -> 'a node -> bool = (==)
34 external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri"
35 external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string"
36 external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print"
37 external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save"
38 external tree_load : Unix.file_descr -> bool -> int -> tree = "caml_xml_tree_load"
40 external nullt : unit -> 'a node = "caml_xml_tree_nullt"
42 let nil : [`Tree ] node = nodei ~-1
43 let nulldoc : [`Text ] node = nodei ~-1
44 let root : [`Tree ] node = nodei 0
46 external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text"
47 external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text"
49 let text_is_empty t n = (equal_node nulldoc n) || text_is_empty t n
51 external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix"
52 external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix"
53 external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal"
54 external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains"
55 external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan"
57 external text_count : tree -> string -> int = "caml_text_collection_count"
58 external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix"
59 external text_count_suffix : tree -> string -> int = "caml_text_collection_count_suffix"
60 external text_count_equal : tree -> string -> int = "caml_text_collection_count_equal"
61 external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains"
62 external text_count_lessthan : tree -> string -> int = "caml_text_collection_count_lessthan"
64 external text_prefix : tree -> string -> [`Text ] node array = "caml_text_collection_prefix"
65 external text_suffix : tree -> string -> [`Text ] node array = "caml_text_collection_suffix"
66 external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals"
67 external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
68 external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan"
71 external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc"
72 external tree_size : tree -> int = "caml_xml_tree_size" "noalloc"
73 external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc"
74 external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" "noalloc"
75 external tree_subtree_tags : tree -> [`Tree] node -> Tag.t -> int = "caml_xml_tree_subtree_elements" "noalloc"
77 let tree_is_nil x = equal_node x nil
78 external tree_is_leaf : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_leaf" "noalloc"
79 external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc"
80 external tree_is_child : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_child" "noalloc"
81 external tree_is_first_child : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_first_child" "noalloc"
82 external tree_num_children : tree -> [`Tree ] node -> int = "caml_xml_tree_num_children" "noalloc"
83 external tree_child_number : tree -> [`Tree ] node -> int = "caml_xml_tree_child_number" "noalloc"
84 external tree_depth : tree -> [`Tree ] node -> int = "caml_xml_tree_depth" "noalloc"
85 external tree_preorder : tree -> [`Tree ] node -> int = "caml_xml_tree_preorder" "noalloc"
86 external tree_postorder : tree -> [`Tree ] node -> int = "caml_xml_tree_postorder" "noalloc"
87 external tree_tag : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" "noalloc"
88 external tree_doc_ids : tree -> [`Tree ] node -> [`Text] node*[`Text] node = "caml_xml_tree_doc_ids"
90 external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc"
91 external tree_child : tree -> [`Tree] node -> int -> [`Tree] node = "caml_xml_tree_child" "noalloc"
92 external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc"
93 external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc"
94 external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" "noalloc"
95 external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc"
96 external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc"
97 external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc"
98 external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc"
101 external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc"
102 external unordered_set_length : unordered_set -> int = "caml_unordered_set_length"
103 external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc"
105 external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_child" "noalloc"
106 external tree_tagged_following_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_following_sibling" "noalloc"
107 external tree_select_following_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_following_sibling" "noalloc"
108 external tree_tagged_descendant : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_descendant" "noalloc"
109 external tree_select_descendant : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_descendant" "noalloc"
110 external tree_tagged_following : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_following" "noalloc"
111 external tree_tagged_following_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_below" "noalloc"
112 external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_below" "noalloc"
115 external tree_tagged_following_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_before" "noalloc"
116 external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_before" "noalloc"
118 external tree_my_text : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text" "noalloc"
119 external tree_my_text_unsafe : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text_unsafe" "noalloc"
120 external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" "noalloc"
121 external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" "noalloc"
123 external tree_parent_node : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_node" "noalloc"
125 (*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *)
127 external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc"
128 external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc"
131 external benchmark_jump : tree -> Tag.t -> unit = "caml_benchmark_jump" "noalloc"
133 let benchmark_jump t s = benchmark_jump t.doc s
135 external benchmark_fsns : tree -> unit = "caml_benchmark_fsns" "noalloc"
137 let benchmark_fsns t = benchmark_fsns t.doc
145 let text_size tree = inode (snd ( tree_doc_ids tree root ))
147 let text_get_text t (x:[`Text] node) =
148 if x == nulldoc then ""
149 else text_get_text t x
154 module HPtset = Hashtbl.Make(Ptset.Int)
156 let vector_htbl = HPtset.create MED_H_SIZE
158 let ptset_to_vector s =
160 HPtset.find vector_htbl s
163 let v = unordered_set_alloc (Ptset.Int.cardinal s) in
164 let _ = Ptset.Int.iter (fun e -> unordered_set_insert v e) s in
165 HPtset.add vector_htbl s v; v
169 let subtree_size t i = tree_subtree_size t.doc i
170 let subtree_elements t i = tree_subtree_elements t.doc i
171 let text_size t = text_size t.doc
173 module MemUnion = Hashtbl.Make (struct
174 type t = Ptset.Int.t*Ptset.Int.t
175 let equal (x,y) (z,t) = x == z || y == t
176 let equal a b = equal a b || equal b a
177 let hash (x,y) = (* commutative hash *)
178 let x = Ptset.Int.uid x
179 and y = Ptset.Int.uid y
181 if x <= y then HASHINT2(x,y) else HASHINT2(y,x)
184 module MemAdd = Hashtbl.Make (
186 type t = Tag.t*Ptset.Int.t
187 let equal (x,y) (z,t) = (x == z)&&(y == t)
188 let hash (x,y) = HASHINT2(x,Ptset.Int.uid y)
191 let collect_tags tree =
192 let _ = Printf.eprintf "Collecting Tags\n%!" in
193 (* let h_union = MemUnion.create BIG_H_SIZE in
196 MemUnion.find h_union (s1,s2)
198 | Not_found -> let s = Ptset.Int.union s1 s2
200 MemUnion.add h_union (s1,s2) s;s
202 let h_add = MemAdd.create BIG_H_SIZE in
204 try MemAdd.find h_add (t,s)
206 | Not_found -> let r = Ptset.Int.add t s in
207 MemAdd.add h_add (t,s) r;r
209 let pt_cup = Ptset.Int.union in
210 let pt_add = Ptset.Int.add in
211 let h = Hashtbl.create BIG_H_SIZE in
212 let update t sc sb ss sa =
213 let schild,sbelow,ssibling,safter =
218 (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
221 (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
223 let rec loop right id acc_after =
225 then Ptset.Int.empty,Ptset.Int.empty,acc_after else
226 let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
227 let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
228 let tag = tree_tag tree id in
229 update tag child1 desc1 sibling2 after2;
230 ( pt_add tag sibling2,
231 pt_add tag (pt_cup desc1 desc2),
232 if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
234 let _ = loop false (tree_root tree) Ptset.Int.empty in
235 let _ = Printf.eprintf "Finished\n%!" in
241 let contains_array = ref [| |]
242 let contains_index = Hashtbl.create 4096
245 Hashtbl.find contains_index i
249 let init_textfun f t s =
251 | `CONTAINS -> text_contains t.doc s
252 | `STARTSWITH -> text_prefix t.doc s
253 | `ENDSWITH -> text_suffix t.doc s
254 | `EQUALS -> text_equals t.doc s
256 (*Array.fast_sort (compare) a; *)
258 Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
260 let count_contains t s = text_count_contains t.doc s
262 let init_naive_contains t s =
263 let i,j = tree_doc_ids t.doc (tree_root t.doc)
265 let regexp = Str.regexp_string s in
268 let _ = Str.search_forward regexp arg 0;
272 let rec loop n acc l =
275 let s = text_get_text t.doc n
278 then loop (nodei ((inode n)+1)) (n::acc) (l+1)
279 else loop (nodei ((inode n)+1)) acc l
281 let acc,l = loop i [] 0 in
282 let a = Array.create l nulldoc in
283 let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
289 let array_find a i j =
290 let l = Array.length a in
291 let rec loop idx x y =
292 if x > y || idx >= l then nulldoc
294 if a.(idx) >= x then if a.(idx) > y then nulldoc else (last_idx := idx;a.(idx))
295 else loop (idx+1) x y
297 if a.(0) > j || a.(l-1) < i then nulldoc
298 else loop !last_idx i j
300 let text_below tree t =
301 let l = Array.length !contains_array in
302 let i,j = tree_doc_ids tree.doc t in
303 let id = if l == 0 then i else (array_find !contains_array i j) in
304 tree_parent_node tree.doc id
306 let text_next tree t root =
307 let l = Array.length !contains_array in
308 let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
309 let _,j = tree_doc_ids tree.doc root in
310 let id = if l == 0 then if inf > j then nulldoc else inf
311 else array_find !contains_array inf j
313 tree_parent_node tree.doc id
317 module DocIdSet = struct
318 include Set.Make (struct type t = [`Text] node
319 let compare = compare_node end)
322 let is_nil t = t == nil
324 let is_node t = t != nil
325 let is_root t = t == root
328 let _ = Tag.init (Obj.magic t) in
329 let table = collect_tags t
331 let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
332 Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
333 Printf.eprintf "Child tags: ";
334 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) c;
335 Printf.eprintf "\nDescendant tags: ";
336 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) d;
337 Printf.eprintf "\nNextSibling tags: ";
338 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) ns;
339 Printf.eprintf "\nFollowing tags: ";
340 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) f;
341 Printf.eprintf "\n\n%!";) table
349 let finalize _ = Printf.eprintf "Release the string list !\n%!"
355 !Options.sample_factor
356 !Options.index_empty_texts
357 !Options.disable_text_collection)
359 let parse_xml_uri str = parse parse_xml_uri str
360 let parse_xml_string str = parse parse_xml_string str
362 let size t = tree_size t.doc;;
364 external pool : tree -> Tag.pool = "%identity"
366 let magic_string = "SXSI_INDEX"
367 let version_string = "2"
370 Unix.lseek fd 0 Unix.SEEK_CUR
372 let pr_pos fd = Printf.eprintf "At position %i\n%!" (pos fd)
375 let sl = String.length s in
376 let ssl = Printf.sprintf "%020i" sl in
377 ignore (Unix.write fd ssl 0 20);
378 ignore (Unix.write fd s 0 (String.length s))
380 let rec really_read fd buffer start length =
381 if length <= 0 then () else
382 match Unix.read fd buffer start length with
383 0 -> raise End_of_file
384 | r -> really_read fd buffer (start + r) (length - r);;
387 let buffer = String.create 20 in
388 let _ = really_read fd buffer 0 20 in
389 let size = int_of_string buffer in
390 let buffer = String.create size in
391 let _ = really_read fd buffer 0 size in
396 let fd = Unix.openfile str [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in
397 let out_c = Unix.out_channel_of_descr fd in
398 let _ = set_binary_mode_out out_c true in
399 output_string out_c magic_string;
400 output_char out_c '\n';
401 output_string out_c version_string;
402 output_char out_c '\n';
403 Marshal.to_channel out_c t.ttable [ ];
404 (* we need to move the fd to the correct position *)
406 ignore (Unix.lseek fd (pos_out out_c) Unix.SEEK_SET);
411 let load ?(sample=64) ?(load_text=true) str =
412 let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
413 let in_c = Unix.in_channel_of_descr fd in
414 let _ = set_binary_mode_in in_c true in
416 (let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
417 (let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
418 let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
419 Marshal.from_channel in_c
421 let ntable = Hashtbl.create (Hashtbl.length table) in
422 Hashtbl.iter (fun k (s1,s2,s3,s4) ->
423 let ss1 = Ptset.Int.fold (Ptset.Int.add) s1 Ptset.Int.empty
424 and ss2 = Ptset.Int.fold (Ptset.Int.add) s2 Ptset.Int.empty
425 and ss3 = Ptset.Int.fold (Ptset.Int.add) s3 Ptset.Int.empty
426 and ss4 = Ptset.Int.fold (Ptset.Int.add) s4 Ptset.Int.empty
427 in Hashtbl.add ntable k (ss1,ss2,ss3,ss4)
430 (* The in_channel read a chunk of fd, so we might be after
431 the start of the XMLTree save file. Reset to the correct
435 let _ = Printf.eprintf "\nLoading tag table : " in
436 let ntable = time (load_table) () in
437 ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
438 let tree = { doc = tree_load fd load_text sample;
446 let tag_pool t = pool t.doc
448 let compare = compare_node
450 let equal a b = a == b
454 | i -> Printf.sprintf "Node (%i)" i
456 let dump_node t = nts (inode t)
458 let is_left t n = tree_is_first_child t.doc n
462 let is_below_right t n1 n2 =
463 tree_is_ancestor t.doc (tree_parent t.doc n1) n2
464 && not (tree_is_ancestor t.doc n1 n2)
466 let is_binary_ancestor t n1 n2 =
467 let p = tree_parent t.doc n1 in
468 let fin = tree_closing t.doc p in
470 (* (is_below_right t n1 n2) ||
471 (tree_is_ancestor t.doc n1 n2) *)
473 let parent t n = tree_parent t.doc n
475 let first_child t = (); fun n -> tree_first_child t.doc n
476 let first_element t = (); fun n -> tree_first_element t.doc n
478 (* these function will be called in two times: first partial application
479 on the tag, then application of the tag and the tree, then application of
480 the other arguments. We use the trick to let the compiler optimize application
483 let tagged_child t tag = () ; fun n -> tree_tagged_child t.doc n tag
485 let select_child t = fun ts ->
486 let v = ptset_to_vector ts in ();
487 fun n -> tree_select_child t.doc n v
489 let next_sibling t = (); fun n -> tree_next_sibling t.doc n
490 let next_element t = (); fun n -> tree_next_element t.doc n
492 let tagged_following_sibling t tag = (); fun n -> tree_tagged_following_sibling t.doc n tag
494 let select_following_sibling t = fun ts ->
495 let v = (ptset_to_vector ts) in ();
496 fun n -> tree_select_following_sibling t.doc n v
498 let next_sibling_below t = (); fun n _ -> tree_next_sibling t.doc n
499 let next_element_below t = (); fun n _ -> tree_next_element t.doc n
500 let tagged_following_sibling_below t tag = (); fun n _ -> tree_tagged_following_sibling t.doc n tag
502 let select_following_sibling_below t = fun ts ->
503 let v = (ptset_to_vector ts) in ();
504 fun n _ -> tree_select_following_sibling t.doc n v
506 let id t n = tree_node_xml_id t.doc n
508 let tag t n = if n == nil then Tag.nullt else tree_tag t.doc n
510 let tagged_descendant t tag = (); fun n -> tree_tagged_descendant t.doc n tag
512 let select_descendant t = fun ts ->
513 let v = (ptset_to_vector ts) in ();
514 fun n -> tree_select_descendant t.doc n v
516 let tagged_following_below t tag = (); fun n ctx -> tree_tagged_following_below t.doc n tag ctx
518 let select_following_below t = fun ts ->
519 let v = (ptset_to_vector ts) in ();
520 fun n ctx -> tree_select_following_below t.doc n v ctx
522 let closing t n = tree_closing t.doc n
523 let is_open t n = tree_is_open t.doc n
524 let get_text_id t n = tree_my_text t.doc n
527 let array_find a i j =
528 let l = Array.length a in
529 let rec loop idx x y =
530 if x > y || idx >= l then nil
532 if a.(idx) >= x then if a.(idx) > y then nil else (last_idx := idx;a.(idx))
533 else loop (idx+1) x y
535 if a.(0) > j || a.(l-1) < i then nil
536 else loop !last_idx i j
540 let count t s = text_count t.doc s
542 let init_stack () = stack := []
543 let push x = stack:= x::!stack
544 let peek () = match !stack with
546 | _ -> failwith "peek"
547 let pop () = match !stack with
549 | _ -> failwith "pop"
551 let next t = nodei ( (inode t) + 1 )
552 let next2 t = nodei ( (inode t) + 2 )
553 let next3 t = nodei ( (inode t) + 3 )
555 let print_xml_fast2 =
556 let _ = init_stack () in
557 let h = Hashtbl.create MED_H_SIZE in
558 let tag_str t = try Hashtbl.find h t with
559 Not_found -> let s = Tag.to_string t in
562 let h_att = Hashtbl.create MED_H_SIZE in
563 let att_str t = try Hashtbl.find h_att t with
564 Not_found -> let s = Tag.to_string t in
565 let attname = String.sub s 3 ((String.length s) -3) in
566 Hashtbl.add h_att t attname;attname
567 in fun outc tree t ->
568 let tree = tree.doc in
569 let fin = tree_closing tree t in
570 let rec loop_tag t tag =
572 if tree_is_open tree t then
574 if tag == Tag.pcdata then
576 output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
577 loop (next2 t) (* skip closing $ *)
580 let tagstr = tag_str tag in
581 let _ = output_char outc '<';
582 output_string outc tagstr in
584 if tree_is_open tree t' then
585 let _ = push tagstr in
586 let tag' = tree_tag tree t' in
587 if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in
588 output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag')
589 else (* closing with no content *)
590 let _ = output_string outc "/>" in
595 output_string outc "</";
596 output_string outc (pop());
597 output_char outc '>';
600 and loop t = loop_tag t (tree_tag tree t)
602 if tree_is_open tree t then
603 let attname = att_str (tree_tag tree t) in
604 output_char outc ' ';
605 output_string outc attname;
606 output_string outc "=\"";
607 let t = next t in (* open $@ *)
608 output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
609 output_char outc '"';
610 loop_attr (next3 t) (n+1)
616 let h = Hashtbl.create MED_H_SIZE in
617 let tag_str t = try Hashtbl.find h t with
618 Not_found -> let s = Tag.to_string t in
621 let h_att = Hashtbl.create MED_H_SIZE in
622 let att_str t = try Hashtbl.find h_att t with
623 Not_found -> let s = Tag.to_string t in
624 let attname = String.sub s 3 ((String.length s) -3) in
625 Hashtbl.add h_att t attname;attname
626 in fun outc tree t ->
627 let rec loop ?(print_right=true) t =
630 let tagid = tree_tag tree.doc t in
634 let tid = tree_my_text_unsafe tree.doc t in
635 output_string outc (text_get_text tree.doc tid);
637 then loop (next_sibling tree t);
640 let tagstr = tag_str tagid in
641 let l = first_child tree t
642 and r = next_sibling tree t
644 output_char outc '<';
645 output_string outc tagstr;
646 if l == nil then output_string outc "/>"
648 if (tag tree l) == Tag.attribute then
650 loop_attributes (first_child tree l);
651 if (next_sibling tree l) == nil then output_string outc "/>"
654 output_char outc '>';
655 loop (next_sibling tree l);
656 output_string outc "</";
657 output_string outc tagstr;
658 output_char outc '>';
663 output_char outc '>';
665 output_string outc "</";
666 output_string outc tagstr;
667 output_char outc '>';
669 if print_right then loop r
670 and loop_attributes a =
673 let attname = att_str (tag tree a) in
674 let fsa = first_child tree a in
675 let tid = tree_my_text_unsafe tree.doc fsa in
676 output_char outc ' ';
677 output_string outc attname;
678 output_string outc "=\"";
679 output_string outc (text_get_text tree.doc tid);
680 output_char outc '"';
681 loop_attributes (next_sibling tree a)
683 loop ~print_right:false t
686 let print_xml_fast outc tree t =
687 if (tag tree t) = Tag.document_node then
688 print_xml_fast outc tree (first_child tree t)
689 else print_xml_fast outc tree t
691 let tags_children t tag =
692 let a,_,_,_ = Hashtbl.find t.ttable tag in a
693 let tags_below t tag =
694 let _,a,_,_ = Hashtbl.find t.ttable tag in a
695 let tags_siblings t tag =
696 let _,_,a,_ = Hashtbl.find t.ttable tag in a
697 let tags_after t tag =
698 let _,_,_,a = Hashtbl.find t.ttable tag in a
701 let tags t tag = Hashtbl.find t.ttable tag
704 let rec binary_parent t n =
706 if tree_is_first_child t.doc n
707 then tree_parent t.doc n
708 else tree_prev_sibling t.doc n
709 in if tree_tag t.doc r = Tag.pcdata then
713 let doc_ids t n = tree_doc_ids t.doc n
715 let subtree_tags t tag = ();
716 fun n -> if n == nil then 0 else
717 tree_subtree_tags t.doc n tag
720 let tid = tree_my_text t.doc n in
721 if tid == nulldoc then "" else
722 text_get_text t.doc tid
725 let dump_tree fmt tree =
728 let tag = (tree_tag tree.doc t ) in
729 let tagstr = Tag.to_string tag in
730 let tab = String.make n ' ' in
732 if tag == Tag.pcdata || tag == Tag.attribute_data
734 Format.fprintf fmt "%s<%s>%s</%s>\n"
735 tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr
737 Format.fprintf fmt "%s<%s>\n" tab tagstr;
738 loop (tree_first_child tree.doc t) (n+2);
739 Format.fprintf fmt "%s</%s>\n%!" tab tagstr;
741 loop (tree_next_sibling tree.doc t) n
747 let print_xml_fast3 t = tree_print_xml_fast3 t.doc