(******************************************************************************) (* SXSI : XPath evaluator *) (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) module type BINARY = sig type node_content type string_content type descr = Nil | Node of node_content |String of string_content type t val parse_xml_uri : string -> t val parse_xml_string : string -> t val string : t -> string val descr : t -> descr val left : t -> t val right : t -> t val id : t -> int val tag : t -> Tag.t val print_xml_fast : out_channel -> t -> unit val compare : t -> t -> int val equal : t -> t -> bool end module OldBinary = struct type string_content = string type descr = Nil | Node of node_content | String of string_content and node_content = int*Tag.t * descr * descr * (descr ref) type t = descr let descr t = t let string = function String s -> s | _ -> failwith "string" external parse_xml_uri : string -> t = "caml_call_shredder_uri" external parse_xml_string : string -> t = "caml_call_shredder_string" let parse_xml_uri s = Node(0,Tag.tag "",parse_xml_uri s,Nil,ref Nil) let parse_xml_string s = Node(0,Tag.tag "",parse_xml_string s,Nil,ref Nil) let tstring = function Nil -> "Nil" | Node (_,_,_,_,_) -> "Node" | String _ -> "String" let print_xml fmt t = let pp_str = Format.pp_print_string fmt in let rec loop = function Nil -> () | String (s) -> pp_str s | Node (_,t,l,r,_) when Tag.equal t Tag.pcdata -> loop l;loop r | Node (_,t,l,r,_) -> pp_str ("<" ^ (Tag.to_string t)); ( match l with Nil -> pp_str "/>" | Node(_,t',atts,children,_) when Tag.equal t' Tag.attribute -> (loop_attributes atts; match children with | Nil -> pp_str "/>" | _ -> pp_str ">"; loop children; pp_str ("" ) ) | _ -> pp_str ">"; loop l; pp_str ("" ); );loop r and loop_attributes = function | Node(_,t,Node(_,_,String(s),_,_),r,_) -> pp_str (" "^(Tag.to_string t)^"=\""^ s ^"\"") ; loop_attributes r | _ -> () in loop t let print_xml fmt = function Node(i,t,l,_,_) -> print_xml fmt (Node(i,t,l,Nil,ref Nil)) | t -> print_xml fmt t (* a bit ugly but inlining like this makes serialization faster *) let print_xml_fast outc t = let rec loop = function Nil -> () | String (s) -> output_string outc s | Node (_,t,l,r,_) when Tag.equal t Tag.pcdata -> loop l;loop r | Node (_,t,l,r,_) -> let t = Tag.to_string t in output_char outc '<'; output_string outc t; ( match l with Nil -> output_string outc "/>" | Node(_,t',atts,children,_) when Tag.equal t' Tag.attribute -> (loop_attributes atts; match children with | Nil -> output_string outc "/>" | _ -> output_char outc '>'; loop children; output_string outc "' ) | _ -> output_char outc '>'; loop l; output_string outc "' );loop r and loop_attributes = function | Node(_,t,Node(_,_,String(s),_,_),r,_) -> output_char outc ' '; output_string outc (Tag.to_string t); output_string outc "=\""; output_string outc s; output_char outc '"'; loop_attributes r | _ -> () in loop t let print_xml_fast outc = function Node(i,t,l,_,_) -> print_xml_fast outc (Node(i,t,l,Nil,ref Nil)) | t -> print_xml_fast outc t let tabs = ref 0 let prtabs fmt = for i = 0 to !tabs do Format.fprintf fmt " " done let rec dump fmt t = incr tabs; let _ = match t with | Nil -> prtabs fmt; Format.fprintf fmt "#" | String s -> prtabs fmt; Format.fprintf fmt "(String %s)" s | Node(id,t,l,r,_) -> prtabs fmt; Format.fprintf fmt " (tag='"; Tag.print fmt t; Format.fprintf fmt "', id='%i')\n" id; prtabs fmt; dump fmt l; Format.fprintf fmt "\n"; prtabs fmt; dump fmt r; Format.fprintf fmt "\n"; prtabs fmt;prtabs fmt; Format.fprintf fmt "(id='%i'end )\n" id in decr tabs let dump fmt t = tabs:=0; dump fmt t; tabs:=0 let id = function Node(i,_,_,_,_) -> i | _ -> failwith "id" let tag = function Node(_,t,_,_,_) -> t | _ -> failwith "tag" let left = function Node(_,_,l,_,_) -> l | _ -> failwith "left" let right = function Node(_,_,_,r,_) -> r | _ -> failwith "right" let first_child = left let next_sibling = right let is_root = function Node (_,_,_,_,{contents=Nil}) -> true | _ -> false let is_left n = match n with | Node (_,_,_,_,{contents=p}) when not(is_root n) && (left p) == n -> true | _ -> false let is_right n = match n with | Node (_,_,_,_,{contents=p}) when not(is_root n) && (right p) == n -> true | _ -> false let compare t1 t2 = match t1,t2 with | Nil,Nil -> 0 | String s1, String s2 -> String.compare s1 s2 | Nil, String _ -> -1 | String _, Nil -> 1 | Node(i1,_,_,_,_), Node(i2,_,_,_,_) -> i1 - i2 | _, Node _ -> -1 | Node _ , _ -> 1 let equal t1 t2 = (compare t1 t2) == 0 let int_size = Sys.word_size/8 let ssize s = ((String.length s)/4 +1)*4 let rec size = function Nil -> (int_size,1,0,0) | String s -> (int_size + (ssize s),0,1,0) | Node(_,_,l,r,_) -> let sizel,nl,sl,il = size l and sizer,nr,sr,ir = size r in (sizel+sizer+(7*int_size),nl+nr,sl+sr,il+ir+1) let size t = let s,n,st,i = size t in s/1024,n,st,i end module XML = struct type t type 'a node = int type node_kind = [`Text | `Tree ] let compare : 'a node -> 'a node -> int = (-) let equal : 'a node -> 'a node -> bool = (==) (* abstract type, values are pointers to a XMLTree C++ object *) external int_of_node : 'a node -> int = "%identity" external parse_xml_uri : string -> t = "caml_call_shredder_uri" let parse_xml_uri uri = parse_xml_uri uri external parse_xml_string : string -> t = "caml_call_shredder_string" let parse_xml_string uri = parse_xml_string uri module Text = struct type t (* pointer to the text collection *) (* Todo *) external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt" let nil = nullt () external get_text : t -> [`Text] node -> string = "caml_text_collection_get_text" let get_text t n = if equal nil n then "" else get_text t n external is_empty : t -> [`Text ] node -> bool = "caml_text_collection_empty_text" end module Tree = struct external serialize : string -> unit = "caml_xml_tree_serialize" external unserialize : string -> t = "caml_xml_tree_unserialize" external root : t -> [`Tree] node = "caml_xml_tree_root" external nullt : unit -> [`Tree ] node = "caml_xml_tree_nullt" let nil = nullt () let is_nil x = equal x nil external parent : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" external parent_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" external first_child : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" external next_sibling : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" external tag : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" external text_collection : t -> Text.t = "caml_xml_tree_text_collection" let is_last t n = equal nil (next_sibling t n) external prev_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" external my_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" external next_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" let print_skel t = let textcol = text_collection t in let rec aux id = if (is_nil id) then Printf.eprintf "#" else begin Printf.eprintf "Node %i has tag '%s', DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!" (int_of_node id) (Tag.to_string (tag t id)) (int_of_node (prev_text t id)) (Text.get_text textcol (prev_text t id)) (int_of_node (my_text t id)) (Text.get_text textcol (my_text t id)) (int_of_node (next_text t id)) (Text.get_text textcol (next_text t id)); aux(first_child t id); aux(next_sibling t id); end in aux (root t) let traversal t = let textcol = text_collection t in let rec aux id = if not (is_nil id) then begin ignore (tag t id); ignore (Text.get_text textcol (prev_text t id)); if (is_leaf t id) then ignore (Text.get_text textcol (my_text t id)); if (is_last t id) then ignore (Text.get_text textcol (next_text t id)); aux (first_child t id); aux (next_sibling t id); end in aux (root t) end module Binary = struct type node_content = NC of [`Tree ] node | SC of [`Text ] node * [`Tree ] node type string_content = [ `Text ] node type descr = | Nil | Node of node_content | String of string_content type doc = t type t = { doc : doc; text : Text.t; node : descr } let dump { doc=t } = Tree.print_skel t open Tree let node_of_t t = { doc= t; text = text_collection t; node = Node(NC (root t)) } let parse_xml_uri str = node_of_t (parse_xml_uri str) let parse_xml_string str = node_of_t (parse_xml_string str) let compare a b = match a.node,b.node with | Node(NC i),Node(NC j) -> compare i j | _, Node(NC( _ )) -> 1 | Node(SC (i,_)),Node(SC (j,_)) -> compare i j | Node(NC( _ )),Node(SC (_,_)) -> -1 | _, Node(SC (_,_)) -> 1 | String i, String j -> compare i j | Node _ , String _ -> -1 | _ , String _ -> 1 | Nil, Nil -> 0 | _,Nil -> -1 let equal a b = (compare a b) == 0 let string t = match t.node with | String i -> Text.get_text t.text i | _ -> assert false let norm (n : [`Tree ] node ) = if is_nil n then Nil else Node (NC n) let descr t = t.node let nts = function Nil -> "Nil" | String i -> Printf.sprintf "String %i" i | Node (NC t) -> Printf.sprintf "Node (NC %i)" (int_of_node t) | Node (SC (t,i)) -> Printf.sprintf "Node (SC (%i,%i))" (int_of_node t) (int_of_node i) let first_child n = let node' = match n.node with | Node (NC t) when is_leaf n.doc t -> let txt = my_text n.doc t in if Text.is_empty n.text txt then Nil else Node(SC (txt,Tree.nil)) | Node (NC t) -> let fs = first_child n.doc t in let txt = prev_text n.doc fs in if Text.is_empty n.text txt then norm fs else Node (SC (txt, fs)) | Node(SC (i,_)) -> String i | Nil | String _ -> failwith "first_child" in { n with node = node'} let next_sibling n = let node' = match n.node with | Node (SC (_,ns)) -> norm ns | Node(NC t) -> let ns = next_sibling n.doc t in let txt = next_text n.doc t in if Text.is_empty n.text txt then norm ns else Node (SC (txt, ns)) | Nil | String _ -> failwith "next_sibling" in { n with node = node'} let left = first_child let right = next_sibling let id = function { doc=d; node=Node(NC n)} -> text_xml_id d n | { doc=d; node=Node(SC (i,_) )} -> node_xml_id d i | _ -> failwith "id" let tag = function { node=Node(SC _) } -> Tag.pcdata | { doc=d; node=Node(NC n)} -> tag d n | _ -> failwith "Tag" let print_xml_fast outc t = let rec loop ?(print_right=true) t = match t.node with | Nil -> () | String (s) -> output_string outc (string t) | Node _ when Tag.equal (tag t) Tag.pcdata -> loop (left t); loop (right t) | Node (_) -> let tg = Tag.to_string (tag t) in let l = left t and r = right t in output_char outc '<'; output_string outc tg; ( match l.node with Nil -> output_string outc "/>" | String _ -> assert false | Node(_) when Tag.equal (tag l) Tag.attribute -> (loop_attributes (left l); match (right l).node with | Nil -> output_string outc "/>" | _ -> output_char outc '>'; loop (right l); output_string outc "' ) | _ -> output_char outc '>'; loop l; output_string outc "' );if print_right then loop r and loop_attributes a = match a.node with | Node(_) -> let value = match (left a).node with | Nil -> "" | _ -> string (left(left a)) in output_char outc ' '; output_string outc (Tag.to_string (tag a)); output_string outc "=\""; output_string outc value; output_char outc '"'; loop_attributes (right a) | _ -> () in loop ~print_right:false t let print_xml_fast outc t = if Tag.to_string (tag t) = "" then print_xml_fast outc (first_child t) else print_xml_fast outc t let traversal t = Tree.traversal t.doc let full_traversal t = let rec aux n = match n.node with | Nil -> () | String i -> ignore(Text.get_text t.text i) | Node(_) -> ignore (tag n); aux (first_child n); aux (next_sibling n) in aux t end end let dump = XML.Binary.dump let traversal = XML.Binary.traversal let full_traversal = XML.Binary.full_traversal external cpp_traversal : XML.t -> unit = "caml_cpp_traversal" let cpp_traversal t = cpp_traversal t.XML.Binary.doc include XML