1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
10 external init_lib : unit -> unit = "caml_init_lib"
12 exception CPlusPlusError of string
14 let () = Callback.register_exception "CPlusPlusError" (CPlusPlusError "")
20 type 'a node = private int
21 type node_kind = [`Text | `Tree ]
25 ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t;
28 external inode : 'a node -> int = "%identity"
29 external nodei : int -> 'a node = "%identity"
30 let compare_node x y = (inode x) - (inode y)
31 let equal_node : 'a node -> 'a node -> bool = (==)
34 external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri"
35 external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string"
36 external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print"
37 external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save"
38 external tree_load : Unix.file_descr -> bool -> int -> tree = "caml_xml_tree_load"
40 external nullt : unit -> 'a node = "caml_xml_tree_nullt"
42 let nil : [`Tree ] node = nodei ~-1
43 let nulldoc : [`Text ] node = nodei ~-1
44 let root : [`Tree ] node = nodei 0
46 external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text"
47 external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text"
49 let text_is_empty t n = (equal_node nulldoc n) || text_is_empty t n
51 external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix"
52 external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix"
53 external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal"
54 external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains"
55 external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan"
57 external text_count : tree -> string -> int = "caml_text_collection_count"
58 external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix"
59 external text_count_suffix : tree -> string -> int = "caml_text_collection_count_suffix"
60 external text_count_equal : tree -> string -> int = "caml_text_collection_count_equal"
61 external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains"
62 external text_count_lessthan : tree -> string -> int = "caml_text_collection_count_lessthan"
64 external text_prefix : tree -> string -> [`Text ] node array = "caml_text_collection_prefix"
65 external text_suffix : tree -> string -> [`Text ] node array = "caml_text_collection_suffix"
66 external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals"
67 external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
68 external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan"
71 external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc"
72 external tree_size : tree -> int = "caml_xml_tree_size" "noalloc"
73 external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc"
74 external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" "noalloc"
75 external tree_subtree_tags : tree -> [`Tree] node -> Tag.t -> int = "caml_xml_tree_subtree_elements" "noalloc"
77 let tree_is_nil x = equal_node x nil
78 external tree_is_leaf : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_leaf" "noalloc"
79 external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc"
80 external tree_is_child : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_child" "noalloc"
81 external tree_is_first_child : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_first_child" "noalloc"
82 external tree_num_children : tree -> [`Tree ] node -> int = "caml_xml_tree_num_children" "noalloc"
83 external tree_child_number : tree -> [`Tree ] node -> int = "caml_xml_tree_child_number" "noalloc"
84 external tree_depth : tree -> [`Tree ] node -> int = "caml_xml_tree_depth" "noalloc"
85 external tree_preorder : tree -> [`Tree ] node -> int = "caml_xml_tree_preorder" "noalloc"
86 external tree_postorder : tree -> [`Tree ] node -> int = "caml_xml_tree_postorder" "noalloc"
87 external tree_tag : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" "noalloc"
88 external tree_doc_ids : tree -> [`Tree ] node -> [`Text] node*[`Text] node = "caml_xml_tree_doc_ids"
90 external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc"
91 external tree_child : tree -> [`Tree] node -> int -> [`Tree] node = "caml_xml_tree_child" "noalloc"
92 external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc"
93 external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc"
94 external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" "noalloc"
95 external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc"
96 external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc"
97 external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc"
98 external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc"
101 external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc"
102 external unordered_set_length : unordered_set -> int = "caml_unordered_set_length"
103 external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc"
105 external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_child" "noalloc"
106 external tree_tagged_following_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_following_sibling" "noalloc"
107 external tree_select_following_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_following_sibling" "noalloc"
108 external tree_tagged_descendant : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_descendant" "noalloc"
109 external tree_select_descendant : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_descendant" "noalloc"
110 external tree_tagged_following : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_following" "noalloc"
111 external tree_tagged_following_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_below" "noalloc"
112 external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_below" "noalloc"
115 external tree_tagged_following_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_before" "noalloc"
116 external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_before" "noalloc"
118 external tree_my_text : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text" "noalloc"
119 external tree_my_text_unsafe : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text_unsafe" "noalloc"
120 external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" "noalloc"
121 external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" "noalloc"
123 external tree_parent_node : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_node" "noalloc"
125 (*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *)
127 external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc"
128 external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc"
131 external benchmark_jump : tree -> Tag.t -> unit = "caml_benchmark_jump" "noalloc"
133 let benchmark_jump t s = benchmark_jump t.doc s
135 external benchmark_fsns : tree -> unit = "caml_benchmark_fsns" "noalloc"
137 let benchmark_fsns t = benchmark_fsns t.doc
145 let text_size tree = inode (snd ( tree_doc_ids tree root ))
147 let text_get_text t (x:[`Text] node) =
148 if x == nulldoc then ""
149 else text_get_text t x
154 module HPtset = Hashtbl.Make(Ptset.Int)
156 let vector_htbl = HPtset.create MED_H_SIZE
158 let ptset_to_vector s =
160 HPtset.find vector_htbl s
163 let v = unordered_set_alloc (Ptset.Int.cardinal s) in
164 let _ = Ptset.Int.iter (fun e -> unordered_set_insert v e) s in
165 HPtset.add vector_htbl s v; v
169 let subtree_size t i = tree_subtree_size t.doc i
170 let subtree_elements t i = tree_subtree_elements t.doc i
171 let text_size t = text_size t.doc
173 module MemUnion = Hashtbl.Make (struct
174 type t = Ptset.Int.t*Ptset.Int.t
175 let equal (x,y) (z,t) = x == z || y == t
176 let equal a b = equal a b || equal b a
177 let hash (x,y) = (* commutative hash *)
178 let x = Ptset.Int.uid x
179 and y = Ptset.Int.uid y
181 if x <= y then HASHINT2(x,y) else HASHINT2(y,x)
184 module MemAdd = Hashtbl.Make (
186 type t = Tag.t*Ptset.Int.t
187 let equal (x,y) (z,t) = (x == z)&&(y == t)
188 let hash (x,y) = HASHINT2(x,Ptset.Int.uid y)
191 let collect_tags tree =
192 let _ = Printf.eprintf "Collecting Tags\n%!" in
193 (* let h_union = MemUnion.create BIG_H_SIZE in
196 MemUnion.find h_union (s1,s2)
198 | Not_found -> let s = Ptset.Int.union s1 s2
200 MemUnion.add h_union (s1,s2) s;s
202 let h_add = MemAdd.create BIG_H_SIZE in
204 try MemAdd.find h_add (t,s)
206 | Not_found -> let r = Ptset.Int.add t s in
207 MemAdd.add h_add (t,s) r;r
209 let pt_cup = Ptset.Int.union in
210 let pt_add = Ptset.Int.add in
211 let h = Hashtbl.create BIG_H_SIZE in
212 let update t sc sb ss sa =
213 let schild,sbelow,ssibling,safter =
218 (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
221 (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
223 let rec loop right id acc_after =
225 then Ptset.Int.empty,Ptset.Int.empty,acc_after else
226 let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
227 let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
228 let tag = tree_tag tree id in
229 update tag child1 desc1 sibling2 after2;
230 ( pt_add tag sibling2,
231 pt_add tag (pt_cup desc1 desc2),
232 if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
234 let _ = loop false (tree_root tree) Ptset.Int.empty in
235 let _ = Printf.eprintf "Finished\n%!" in
241 let contains_array = ref [| |]
242 let contains_index = Hashtbl.create 4096
245 Hashtbl.find contains_index i
249 let init_textfun f t s =
251 | `CONTAINS -> text_contains t.doc s
252 | `STARTSWITH -> text_prefix t.doc s
253 | `ENDSWITH -> text_suffix t.doc s
254 | `EQUALS -> text_equals t.doc s
256 (*Array.fast_sort (compare) a; *)
258 Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
260 let count_contains t s = text_count_contains t.doc s
262 let init_naive_contains t s =
263 let i,j = tree_doc_ids t.doc (tree_root t.doc)
265 let regexp = Str.regexp_string s in
268 let _ = Str.search_forward regexp arg 0;
272 let rec loop n acc l =
275 let s = text_get_text t.doc n
278 then loop (nodei ((inode n)+1)) (n::acc) (l+1)
279 else loop (nodei ((inode n)+1)) acc l
281 let acc,l = loop i [] 0 in
282 let a = Array.create l nulldoc in
283 let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
289 let array_find a i j =
290 let l = Array.length a in
291 let rec loop idx x y =
292 if x > y || idx >= l then nulldoc
294 if a.(idx) >= x then if a.(idx) > y then nulldoc else (last_idx := idx;a.(idx))
295 else loop (idx+1) x y
297 if a.(0) > j || a.(l-1) < i then nulldoc
298 else loop !last_idx i j
300 let text_below tree t =
301 let l = Array.length !contains_array in
302 let i,j = tree_doc_ids tree.doc t in
303 let id = if l == 0 then i else (array_find !contains_array i j) in
304 tree_parent_node tree.doc id
306 let text_next tree t root =
307 let l = Array.length !contains_array in
308 let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
309 let _,j = tree_doc_ids tree.doc root in
310 let id = if l == 0 then if inf > j then nulldoc else inf
311 else array_find !contains_array inf j
313 tree_parent_node tree.doc id
317 module DocIdSet = struct
318 include Set.Make (struct type t = [`Text] node
319 let compare = compare_node end)
322 let is_nil t = t == nil
324 let is_node t = t != nil
325 let is_root t = t == root
328 let _ = Tag.init (Obj.magic t) in
329 let table = collect_tags t
331 let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
332 Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
333 Printf.eprintf "Child tags: ";
334 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) c;
335 Printf.eprintf "\nDescendant tags: ";
336 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) d;
337 Printf.eprintf "\nNextSibling tags: ";
338 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) ns;
339 Printf.eprintf "\nFollowing tags: ";
340 Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) f;
341 Printf.eprintf "\n\n%!";) table
349 let finalize _ = Printf.eprintf "Release the string list !\n%!"
355 !Options.sample_factor
356 !Options.index_empty_texts
357 !Options.disable_text_collection)
359 let parse_xml_uri str = parse parse_xml_uri str
360 let parse_xml_string str = parse parse_xml_string str
362 let size t = tree_size t.doc;;
364 external pool : tree -> Tag.pool = "%identity"
366 let magic_string = "SXSI_INDEX"
367 let version_string = "2"
370 Unix.lseek fd 0 Unix.SEEK_CUR
372 let pr_pos fd = Printf.eprintf "At position %i\n%!" (pos fd)
375 let sl = String.length s in
376 let ssl = Printf.sprintf "%020i" sl in
377 ignore (Unix.write fd ssl 0 20);
378 ignore (Unix.write fd s 0 (String.length s))
380 let rec really_read fd buffer start length =
381 if length <= 0 then () else
382 match Unix.read fd buffer start length with
383 0 -> raise End_of_file
384 | r -> really_read fd buffer (start + r) (length - r);;
387 let buffer = String.create 20 in
388 let _ = really_read fd buffer 0 20 in
389 let size = int_of_string buffer in
390 let buffer = String.create size in
391 let _ = really_read fd buffer 0 size in
396 let fd = Unix.openfile str [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in
397 let out_c = Unix.out_channel_of_descr fd in
398 let _ = set_binary_mode_out out_c true in
399 output_string out_c magic_string;
400 output_char out_c '\n';
401 output_string out_c version_string;
402 output_char out_c '\n';
403 Marshal.to_channel out_c t.ttable [ ];
404 (* we need to move the fd to the correct position *)
406 ignore (Unix.lseek fd (pos_out out_c) Unix.SEEK_SET);
411 let load ?(sample=64) ?(load_text=true) str =
412 let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
413 let in_c = Unix.in_channel_of_descr fd in
414 let _ = set_binary_mode_in in_c true in
416 (let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
417 (let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
418 let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
419 Marshal.from_channel in_c
421 let ntable = Hashtbl.create (Hashtbl.length table) in
422 Hashtbl.iter (fun k (s1,s2,s3,s4) ->
423 let ss1 = Ptset.Int.fold (Ptset.Int.add) s1 Ptset.Int.empty
424 and ss2 = Ptset.Int.fold (Ptset.Int.add) s2 Ptset.Int.empty
425 and ss3 = Ptset.Int.fold (Ptset.Int.add) s3 Ptset.Int.empty
426 and ss4 = Ptset.Int.fold (Ptset.Int.add) s4 Ptset.Int.empty
427 in Hashtbl.add ntable k (ss1,ss2,ss3,ss4)
430 (* The in_channel read a chunk of fd, so we might be after
431 the start of the XMLTree save file. Reset to the correct
435 let _ = Printf.eprintf "\nLoading tag table : " in
436 let ntable = time (load_table) () in
437 ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
438 let tree = { doc = tree_load fd load_text sample;
446 let tag_pool t = pool t.doc
448 let compare = compare_node
450 let equal a b = a == b
454 | i -> Printf.sprintf "Node (%i)" i
456 let dump_node t = nts (inode t)
458 let is_left t n = tree_is_first_child t.doc n
462 let is_below_right t n1 n2 =
463 tree_is_ancestor t.doc (tree_parent t.doc n1) n2
464 && not (tree_is_ancestor t.doc n1 n2)
466 let is_binary_ancestor t n1 n2 =
467 let p = tree_parent t.doc n1 in
468 let fin = tree_closing t.doc p in
470 (* (is_below_right t n1 n2) ||
471 (tree_is_ancestor t.doc n1 n2) *)
473 let parent t n = tree_parent t.doc n
475 let first_child t = (); fun n -> tree_first_child t.doc n
476 let first_element t = (); fun n -> tree_first_element t.doc n
478 (* these function will be called in two times: first partial application
479 on the tag, then application of the tag and the tree, then application of
480 the other arguments. We use the trick to let the compiler optimize application
483 let tagged_child t tag = () ; fun n -> tree_tagged_child t.doc n tag
485 let select_child t = fun ts ->
486 let v = ptset_to_vector ts in ();
487 fun n -> tree_select_child t.doc n v
489 let next_sibling t = (); fun n -> tree_next_sibling t.doc n
490 let next_element t = (); fun n -> tree_next_element t.doc n
492 let tagged_following_sibling t tag = (); fun n -> tree_tagged_following_sibling t.doc n tag
494 let select_following_sibling t = fun ts ->
495 let v = (ptset_to_vector ts) in ();
496 fun n -> tree_select_following_sibling t.doc n v
498 let next_sibling_below t = (); fun n _ -> tree_next_sibling t.doc n
499 let next_element_below t = (); fun n _ -> tree_next_element t.doc n
500 let tagged_following_sibling_below t tag = (); fun n _ -> tree_tagged_following_sibling t.doc n tag
502 let select_following_sibling_below t = fun ts ->
503 let v = (ptset_to_vector ts) in ();
504 fun n _ -> tree_select_following_sibling t.doc n v
506 let id t n = tree_node_xml_id t.doc n
508 let tag t n = if n == nil then Tag.nullt else tree_tag t.doc n
510 let tagged_descendant t tag =
511 let doc = t.doc in (); fun n -> tree_tagged_descendant doc n tag
513 let select_descendant t = fun ts ->
514 let v = (ptset_to_vector ts) in ();
515 fun n -> tree_select_descendant t.doc n v
517 let tagged_following_below t tag =
519 (); fun n ctx -> tree_tagged_following_below doc n tag ctx
521 let select_following_below t = fun ts ->
522 let v = (ptset_to_vector ts) in ();
523 fun n ctx -> tree_select_following_below t.doc n v ctx
525 let closing t n = tree_closing t.doc n
526 let is_open t n = tree_is_open t.doc n
527 let get_text_id t n = tree_my_text t.doc n
530 let array_find a i j =
531 let l = Array.length a in
532 let rec loop idx x y =
533 if x > y || idx >= l then nil
535 if a.(idx) >= x then if a.(idx) > y then nil else (last_idx := idx;a.(idx))
536 else loop (idx+1) x y
538 if a.(0) > j || a.(l-1) < i then nil
539 else loop !last_idx i j
543 let count t s = text_count t.doc s
545 let init_stack () = stack := []
546 let push x = stack:= x::!stack
547 let peek () = match !stack with
549 | _ -> failwith "peek"
550 let pop () = match !stack with
552 | _ -> failwith "pop"
554 let next t = nodei ( (inode t) + 1 )
555 let next2 t = nodei ( (inode t) + 2 )
556 let next3 t = nodei ( (inode t) + 3 )
558 let print_xml_fast2 =
559 let _ = init_stack () in
560 let h = Hashtbl.create MED_H_SIZE in
561 let tag_str t = try Hashtbl.find h t with
562 Not_found -> let s = Tag.to_string t in
565 let h_att = Hashtbl.create MED_H_SIZE in
566 let att_str t = try Hashtbl.find h_att t with
567 Not_found -> let s = Tag.to_string t in
568 let attname = String.sub s 3 ((String.length s) -3) in
569 Hashtbl.add h_att t attname;attname
570 in fun outc tree t ->
571 let tree = tree.doc in
572 let fin = tree_closing tree t in
573 let rec loop_tag t tag =
575 if tree_is_open tree t then
577 if tag == Tag.pcdata then
579 output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
580 loop (next2 t) (* skip closing $ *)
583 let tagstr = tag_str tag in
584 let _ = output_char outc '<';
585 output_string outc tagstr in
587 if tree_is_open tree t' then
588 let _ = push tagstr in
589 let tag' = tree_tag tree t' in
590 if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in
591 output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag')
592 else (* closing with no content *)
593 let _ = output_string outc "/>" in
598 output_string outc "</";
599 output_string outc (pop());
600 output_char outc '>';
603 and loop t = loop_tag t (tree_tag tree t)
605 if tree_is_open tree t then
606 let attname = att_str (tree_tag tree t) in
607 output_char outc ' ';
608 output_string outc attname;
609 output_string outc "=\"";
610 let t = next t in (* open $@ *)
611 output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
612 output_char outc '"';
613 loop_attr (next3 t) (n+1)
619 let h = Hashtbl.create MED_H_SIZE in
620 let tag_str t = try Hashtbl.find h t with
621 Not_found -> let s = Tag.to_string t in
624 let h_att = Hashtbl.create MED_H_SIZE in
625 let att_str t = try Hashtbl.find h_att t with
626 Not_found -> let s = Tag.to_string t in
627 let attname = String.sub s 3 ((String.length s) -3) in
628 Hashtbl.add h_att t attname;attname
629 in fun outc tree t ->
630 let rec loop ?(print_right=true) t =
633 let tagid = tree_tag tree.doc t in
637 let tid = tree_my_text_unsafe tree.doc t in
638 output_string outc (text_get_text tree.doc tid);
640 then loop (next_sibling tree t);
643 let tagstr = tag_str tagid in
644 let l = first_child tree t
645 and r = next_sibling tree t
647 output_char outc '<';
648 output_string outc tagstr;
649 if l == nil then output_string outc "/>"
651 if (tag tree l) == Tag.attribute then
653 loop_attributes (first_child tree l);
654 if (next_sibling tree l) == nil then output_string outc "/>"
657 output_char outc '>';
658 loop (next_sibling tree l);
659 output_string outc "</";
660 output_string outc tagstr;
661 output_char outc '>';
666 output_char outc '>';
668 output_string outc "</";
669 output_string outc tagstr;
670 output_char outc '>';
672 if print_right then loop r
673 and loop_attributes a =
676 let attname = att_str (tag tree a) in
677 let fsa = first_child tree a in
678 let tid = tree_my_text_unsafe tree.doc fsa in
679 output_char outc ' ';
680 output_string outc attname;
681 output_string outc "=\"";
682 output_string outc (text_get_text tree.doc tid);
683 output_char outc '"';
684 loop_attributes (next_sibling tree a)
686 loop ~print_right:false t
689 let print_xml_fast outc tree t =
690 if (tag tree t) = Tag.document_node then
691 print_xml_fast outc tree (first_child tree t)
692 else print_xml_fast outc tree t
694 let tags_children t tag =
695 let a,_,_,_ = Hashtbl.find t.ttable tag in a
696 let tags_below t tag =
697 let _,a,_,_ = Hashtbl.find t.ttable tag in a
698 let tags_siblings t tag =
699 let _,_,a,_ = Hashtbl.find t.ttable tag in a
700 let tags_after t tag =
701 let _,_,_,a = Hashtbl.find t.ttable tag in a
704 let tags t tag = Hashtbl.find t.ttable tag
707 let rec binary_parent t n =
709 if tree_is_first_child t.doc n
710 then tree_parent t.doc n
711 else tree_prev_sibling t.doc n
712 in if tree_tag t.doc r = Tag.pcdata then
716 let doc_ids t n = tree_doc_ids t.doc n
718 let subtree_tags t tag = ();
719 fun n -> if n == nil then 0 else
720 tree_subtree_tags t.doc n tag
723 let tid = tree_my_text t.doc n in
724 if tid == nulldoc then "" else
725 text_get_text t.doc tid
728 let dump_tree fmt tree =
731 let tag = (tree_tag tree.doc t ) in
732 let tagstr = Tag.to_string tag in
733 let tab = String.make n ' ' in
735 if tag == Tag.pcdata || tag == Tag.attribute_data
737 Format.fprintf fmt "%s<%s>%s</%s>\n"
738 tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr
740 Format.fprintf fmt "%s<%s>\n" tab tagstr;
741 loop (tree_first_child tree.doc t) (n+2);
742 Format.fprintf fmt "%s</%s>\n%!" tab tagstr;
744 loop (tree_next_sibling tree.doc t) n
750 let print_xml_fast3 t = tree_print_xml_fast3 t.doc