1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
11 type descr = Nil | Node of node_content |String of string_content
13 val parse_xml_uri : string -> t
14 val parse_xml_string : string -> t
15 val string : t -> string
16 val descr : t -> descr
21 val print_xml_fast : out_channel -> t -> unit
22 val compare : t -> t -> int
23 val equal : t -> t -> bool
29 type string_content = string
30 type descr = Nil | Node of node_content | String of string_content
31 and node_content = int*Tag.t * descr * descr * (descr ref)
36 let string = function String s -> s | _ -> failwith "string"
39 external parse_xml_uri : string -> t = "caml_call_shredder_uri"
40 external parse_xml_string : string -> t = "caml_call_shredder_string"
42 let parse_xml_uri s = Node(0,Tag.tag "",parse_xml_uri s,Nil,ref Nil)
43 let parse_xml_string s = Node(0,Tag.tag "",parse_xml_string s,Nil,ref Nil)
44 let tstring = function Nil -> "Nil"
45 | Node (_,_,_,_,_) -> "Node"
46 | String _ -> "String"
50 let pp_str = Format.pp_print_string fmt in
51 let rec loop = function Nil -> ()
52 | String (s) -> pp_str s
53 | Node (_,t,l,r,_) when Tag.equal t Tag.pcdata -> loop l;loop r
55 pp_str ("<" ^ (Tag.to_string t));
58 | Node(_,t',atts,children,_) when Tag.equal t' Tag.attribute ->
59 (loop_attributes atts;
65 pp_str ("</"^ (Tag.to_string t)^">" )
67 | _ -> pp_str ">"; loop l;
68 pp_str ("</"^ (Tag.to_string t)^">" );
70 and loop_attributes = function
71 | Node(_,t,Node(_,_,String(s),_,_),r,_) ->
72 pp_str (" "^(Tag.to_string t)^"=\""^ s ^"\"") ;
80 function Node(i,t,l,_,_) -> print_xml fmt (Node(i,t,l,Nil,ref Nil))
81 | t -> print_xml fmt t
84 (* a bit ugly but inlining like this makes serialization faster *)
86 let print_xml_fast outc t =
87 let rec loop = function Nil -> ()
88 | String (s) -> output_string outc s
89 | Node (_,t,l,r,_) when Tag.equal t Tag.pcdata -> loop l;loop r
90 | Node (_,t,l,r,_) -> let t = Tag.to_string t in
94 Nil -> output_string outc "/>"
95 | Node(_,t',atts,children,_) when Tag.equal t' Tag.attribute ->
96 (loop_attributes atts;
98 | Nil -> output_string outc "/>"
100 output_char outc '>';
102 output_string outc "</";
103 output_string outc t;
104 output_char outc '>' )
106 output_char outc '>';
108 output_string outc "</";
109 output_string outc t;
112 and loop_attributes = function
113 | Node(_,t,Node(_,_,String(s),_,_),r,_) ->
114 output_char outc ' ';
115 output_string outc (Tag.to_string t);
116 output_string outc "=\"";
117 output_string outc s;
118 output_char outc '"';
125 let print_xml_fast outc =
126 function Node(i,t,l,_,_) -> print_xml_fast outc (Node(i,t,l,Nil,ref Nil))
127 | t -> print_xml_fast outc t
136 Format.fprintf fmt " "
143 | Nil -> prtabs fmt; Format.fprintf fmt "#"
144 | String s -> prtabs fmt; Format.fprintf fmt "(String %s)" s
145 | Node(id,t,l,r,_) ->
147 Format.fprintf fmt " (tag='";
149 Format.fprintf fmt "', id='%i')\n" id;
152 Format.fprintf fmt "\n";
155 Format.fprintf fmt "\n";
156 prtabs fmt;prtabs fmt;
157 Format.fprintf fmt "(id='%i'end )\n" id
166 let id = function Node(i,_,_,_,_) -> i
169 let tag = function Node(_,t,_,_,_) -> t
170 | _ -> failwith "tag"
172 let left = function Node(_,_,l,_,_) -> l
173 | _ -> failwith "left"
175 let right = function Node(_,_,_,r,_) -> r
176 | _ -> failwith "right"
178 let first_child = left
179 let next_sibling = right
181 let is_root = function Node (_,_,_,_,{contents=Nil}) -> true | _ -> false
182 let is_left n = match n with
183 | Node (_,_,_,_,{contents=p}) when not(is_root n) && (left p) == n -> true
186 let is_right n = match n with
187 | Node (_,_,_,_,{contents=p}) when not(is_root n) && (right p) == n -> true
191 let compare t1 t2 = match t1,t2 with
193 | String s1, String s2 -> String.compare s1 s2
194 | Nil, String _ -> -1
196 | Node(i1,_,_,_,_), Node(i2,_,_,_,_) -> i1 - i2
199 let equal t1 t2 = (compare t1 t2) == 0
201 let int_size = Sys.word_size/8
202 let ssize s = ((String.length s)/4 +1)*4
204 function Nil -> (int_size,1,0,0)
205 | String s -> (int_size + (ssize s),0,1,0)
207 let sizel,nl,sl,il = size l
208 and sizer,nr,sr,ir = size r
210 (sizel+sizer+(7*int_size),nl+nr,sl+sr,il+ir+1)
212 let s,n,st,i = size t in
222 type node_kind = [`Text | `Tree ]
224 let compare : 'a node -> 'a node -> int = (-)
225 let equal : 'a node -> 'a node -> bool = (==)
227 (* abstract type, values are pointers to a XMLTree C++ object *)
229 external int_of_node : 'a node -> int = "%identity"
231 external parse_xml_uri : string -> t = "caml_call_shredder_uri"
232 let parse_xml_uri uri = parse_xml_uri uri
234 external parse_xml_string : string -> t = "caml_call_shredder_string"
235 let parse_xml_string uri = parse_xml_string uri
240 type t (* pointer to the text collection *)
242 external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt"
244 external get_text : t -> [`Text] node -> string = "caml_text_collection_get_text"
247 if equal nil n then ""
250 external is_empty : t -> [`Text ] node -> bool = "caml_text_collection_empty_text"
258 external serialize : string -> unit = "caml_xml_tree_serialize"
259 external unserialize : string -> t = "caml_xml_tree_unserialize"
261 external root : t -> [`Tree] node = "caml_xml_tree_root"
262 external nullt : unit -> [`Tree ] node = "caml_xml_tree_nullt"
265 let is_nil x = equal x nil
267 external parent : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child"
268 external parent_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc"
269 external first_child : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child"
273 external next_sibling : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling"
275 external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf"
277 external tag : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag"
279 external text_collection : t -> Text.t = "caml_xml_tree_text_collection"
281 let is_last t n = equal nil (next_sibling t n)
283 external prev_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text"
286 external my_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text"
287 external next_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text"
289 external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id"
290 external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id"
294 let textcol = text_collection t in
297 then Printf.eprintf "#"
300 Printf.eprintf "Node %i has tag '%s', DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!"
302 (Tag.to_string (tag t id))
303 (int_of_node (prev_text t id))
304 (Text.get_text textcol (prev_text t id))
305 (int_of_node (my_text t id))
306 (Text.get_text textcol (my_text t id))
307 (int_of_node (next_text t id))
308 (Text.get_text textcol (next_text t id));
309 aux(first_child t id);
310 aux(next_sibling t id);
316 let textcol = text_collection t in
322 ignore (Text.get_text textcol (prev_text t id));
324 then ignore (Text.get_text textcol (my_text t id));
326 then ignore (Text.get_text textcol (next_text t id));
327 aux (first_child t id);
328 aux (next_sibling t id);
335 module Binary = struct
339 | SC of [`Text ] node * [`Tree ] node
340 type string_content = [ `Text ] node
343 | Node of node_content
344 | String of string_content
348 type t = { doc : doc;
352 let dump { doc=t } = Tree.print_skel t
354 let node_of_t t = { doc= t;
355 text = text_collection t;
356 node = Node(NC (root t)) }
359 let parse_xml_uri str = node_of_t (parse_xml_uri str)
360 let parse_xml_string str = node_of_t (parse_xml_string str)
362 let compare a b = match a.node,b.node with
363 | Node(NC i),Node(NC j) -> compare i j
364 | _, Node(NC( _ )) -> 1
365 | Node(SC (i,_)),Node(SC (j,_)) -> compare i j
366 | Node(NC( _ )),Node(SC (_,_)) -> -1
367 | _, Node(SC (_,_)) -> 1
368 | String i, String j -> compare i j
369 | Node _ , String _ -> -1
374 let equal a b = (compare a b) == 0
376 let string t = match t.node with
377 | String i -> Text.get_text t.text i
380 let norm (n : [`Tree ] node ) = if is_nil n then Nil else Node (NC n)
386 | String i -> Printf.sprintf "String %i" i
387 | Node (NC t) -> Printf.sprintf "Node (NC %i)" (int_of_node t)
388 | Node (SC (t,i)) -> Printf.sprintf "Node (SC (%i,%i))" (int_of_node t) (int_of_node i)
393 | Node (NC t) when is_leaf n.doc t ->
394 let txt = my_text n.doc t in
395 if Text.is_empty n.text txt
397 else Node(SC (txt,Tree.nil))
399 let fs = first_child n.doc t in
400 let txt = prev_text n.doc fs in
401 if Text.is_empty n.text txt
403 else Node (SC (txt, fs))
404 | Node(SC (i,_)) -> String i
405 | Nil | String _ -> failwith "first_child"
407 { n with node = node'}
413 | Node (SC (_,ns)) -> norm ns
415 let ns = next_sibling n.doc t in
416 let txt = next_text n.doc t in
417 if Text.is_empty n.text txt
419 else Node (SC (txt, ns))
420 | Nil | String _ -> failwith "next_sibling"
422 { n with node = node'}
425 let left = first_child
426 let right = next_sibling
429 function { doc=d; node=Node(NC n)} -> text_xml_id d n
430 | { doc=d; node=Node(SC (i,_) )} -> node_xml_id d i
434 function { node=Node(SC _) } -> Tag.pcdata
435 | { doc=d; node=Node(NC n)} -> tag d n
436 | _ -> failwith "Tag"
440 let print_xml_fast outc t =
441 let rec loop ?(print_right=true) t = match t.node with
443 | String (s) -> output_string outc (string t)
444 | Node _ when Tag.equal (tag t) Tag.pcdata -> loop (left t); loop (right t)
447 let tg = Tag.to_string (tag t) in
451 output_char outc '<';
452 output_string outc tg;
454 Nil -> output_string outc "/>"
455 | String _ -> assert false
456 | Node(_) when Tag.equal (tag l) Tag.attribute ->
457 (loop_attributes (left l);
458 match (right l).node with
459 | Nil -> output_string outc "/>"
461 output_char outc '>';
463 output_string outc "</";
464 output_string outc tg;
465 output_char outc '>' )
467 output_char outc '>';
469 output_string outc "</";
470 output_string outc tg;
472 );if print_right then loop r
473 and loop_attributes a =
478 match (left a).node with
480 | _ -> string (left(left a))
482 output_char outc ' ';
483 output_string outc (Tag.to_string (tag a));
484 output_string outc "=\"";
485 output_string outc value;
486 output_char outc '"';
487 loop_attributes (right a)
490 loop ~print_right:false t
493 let print_xml_fast outc t =
494 if Tag.to_string (tag t) = "" then
495 print_xml_fast outc (first_child t)
496 else print_xml_fast outc t
498 let traversal t = Tree.traversal t.doc
499 let full_traversal t =
503 | String i -> ignore(Text.get_text t.text i)
514 let dump = XML.Binary.dump
515 let traversal = XML.Binary.traversal
516 let full_traversal = XML.Binary.full_traversal
517 external cpp_traversal : XML.t -> unit = "caml_cpp_traversal"
518 let cpp_traversal t = cpp_traversal t.XML.Binary.doc