X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=e3e8fe218ddc98436c4c6426bb88e132a6c2a90f;hb=451e60ad59e35344dff62da5ca27fcd5eec1bff9;hp=5e2a44b31bff8706c652cfb66bab966ac49f55bb;hpb=496df5f7d3e6f8271763314f2067719cc2904c71;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index 5e2a44b..e3e8fe2 100644 --- a/tree.ml +++ b/tree.ml @@ -4,517 +4,443 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) -module type BINARY = -sig - type node_content - type string_content - type descr = Nil | Node of node_content |String of string_content - type t - val parse_xml_uri : string -> t - val parse_xml_string : string -> t - val string : t -> string - val descr : t -> descr - val left : t -> t - val right : t -> t - val id : t -> int - val tag : t -> Tag.t - val print_xml_fast : out_channel -> t -> unit - val compare : t -> t -> int - val equal : t -> t -> bool -end - -module OldBinary = -struct +INCLUDE "utils.ml" - type string_content = string - type descr = Nil | Node of node_content | String of string_content - and node_content = int*Tag.t * descr * descr * (descr ref) - type t = descr - - let descr t = t +type tree +type 'a node = int +type node_kind = [`Text | `Tree ] + +let compare_node : 'a node -> 'a node -> int = (-) +let equal_node : 'a node -> 'a node -> bool = (==) + +(* abstract type, values are pointers to a XMLTree C++ object *) + +external int_of_node : 'a node -> int = "%identity" + +external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri" +external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string" + +external save_tree : tree -> string -> unit = "caml_xml_tree_save" +external load_tree : string -> int -> tree = "caml_xml_tree_load" + +external nullt : unit -> 'a node = "caml_xml_tree_nullt" + +let nil : 'a node = Obj.magic (-1) + +external text_get_tc_text : tree -> [`Text] node -> string = "caml_text_collection_get_text" + +external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text" - let string = function String s -> s | _ -> failwith "string" +let text_is_empty t n = + (equal_node nil n) || text_is_empty t n - - external parse_xml_uri : string -> t = "caml_call_shredder_uri" - external parse_xml_string : string -> t = "caml_call_shredder_string" - - let parse_xml_uri s = Node(0,Tag.tag "",parse_xml_uri s,Nil,ref Nil) - let parse_xml_string s = Node(0,Tag.tag "",parse_xml_string s,Nil,ref Nil) - let tstring = function Nil -> "Nil" - | Node (_,_,_,_,_) -> "Node" - | String _ -> "String" - -let print_xml fmt t = - let pp_str = Format.pp_print_string fmt in - let rec loop = function Nil -> () - | String (s) -> pp_str s - | Node (_,t,l,r,_) when Tag.equal t Tag.pcdata -> loop l;loop r - | Node (_,t,l,r,_) -> - pp_str ("<" ^ (Tag.to_string t)); - ( match l with - Nil -> pp_str "/>" - | Node(_,t',atts,children,_) when Tag.equal t' Tag.attribute -> - (loop_attributes atts; - match children with - | Nil -> pp_str "/>" - | _ -> - pp_str ">"; - loop children; - pp_str ("" ) - ) - | _ -> pp_str ">"; loop l; - pp_str ("" ); - );loop r - and loop_attributes = function - | Node(_,t,Node(_,_,String(s),_,_),r,_) -> - pp_str (" "^(Tag.to_string t)^"=\""^ s ^"\"") ; - loop_attributes r - | _ -> () - in - loop t - -let print_xml fmt = - function Node(i,t,l,_,_) -> print_xml fmt (Node(i,t,l,Nil,ref Nil)) - | t -> print_xml fmt t - - -(* a bit ugly but inlining like this makes serialization faster *) - -let print_xml_fast outc t = - let rec loop = function Nil -> () - | String (s) -> output_string outc s - | Node (_,t,l,r,_) when Tag.equal t Tag.pcdata -> loop l;loop r - | Node (_,t,l,r,_) -> let t = Tag.to_string t in - output_char outc '<'; - output_string outc t; - ( match l with - Nil -> output_string outc "/>" - | Node(_,t',atts,children,_) when Tag.equal t' Tag.attribute -> - (loop_attributes atts; - match children with - | Nil -> output_string outc "/>" - | _ -> - output_char outc '>'; - loop children; - output_string outc "' ) - | _ -> - output_char outc '>'; - loop l; - output_string outc "' - );loop r - and loop_attributes = function - | Node(_,t,Node(_,_,String(s),_,_),r,_) -> - output_char outc ' '; - output_string outc (Tag.to_string t); - output_string outc "=\""; - output_string outc s; - output_char outc '"'; - loop_attributes r - | _ -> () +external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains" +external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains" +external text_count : tree -> string -> int = "caml_text_collection_count" +external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" +external text_unsorted_contains : tree -> string -> unit = "caml_text_collection_unsorted_contains" +external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" - in - loop t -let print_xml_fast outc = - function Node(i,t,l,_,_) -> print_xml_fast outc (Node(i,t,l,Nil,ref Nil)) - | t -> print_xml_fast outc t +external tree_serialize : tree -> string -> unit = "caml_xml_tree_serialize" + +external tree_unserialize : string -> tree = "caml_xml_tree_unserialize" + +external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" +let tree_is_nil x = equal_node x nil +external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" +external tree_parent_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" +external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" +external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" +external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" +external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" +external tree_tagged_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_sibling" -let tabs = ref 0 +external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" +external tree_is_leaf : tree -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" +external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" +external tree_is_first_child : tree -> [`Tree] node -> bool = "caml_xml_tree_is_first_child" -let prtabs fmt = - for i = 0 to !tabs - do - Format.fprintf fmt " " - done +(* external tag : tree -> [`Tree ] node -> T = "caml_xml_tree_tag"*) +external tree_tag_id : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" + +let tree_is_last t n = equal_node nil (tree_next_sibling t n) -let rec dump fmt t = - incr tabs; - let _ = match t with - | Nil -> prtabs fmt; Format.fprintf fmt "#" - | String s -> prtabs fmt; Format.fprintf fmt "(String %s)" s - | Node(id,t,l,r,_) -> - prtabs fmt; - Format.fprintf fmt " (tag='"; - Tag.print fmt t; - Format.fprintf fmt "', id='%i')\n" id; - prtabs fmt; - dump fmt l; - Format.fprintf fmt "\n"; - prtabs fmt; - dump fmt r; - Format.fprintf fmt "\n"; - prtabs fmt;prtabs fmt; - Format.fprintf fmt "(id='%i'end )\n" id - in decr tabs - - -let dump fmt t = - tabs:=0; - dump fmt t; - tabs:=0 - -let id = function Node(i,_,_,_,_) -> i - | _ -> failwith "id" - -let tag = function Node(_,t,_,_,_) -> t - | _ -> failwith "tag" - -let left = function Node(_,_,l,_,_) -> l - | _ -> failwith "left" - -let right = function Node(_,_,_,r,_) -> r - | _ -> failwith "right" - -let first_child = left -let next_sibling = right - -let is_root = function Node (_,_,_,_,{contents=Nil}) -> true | _ -> false -let is_left n = match n with - | Node (_,_,_,_,{contents=p}) when not(is_root n) && (left p) == n -> true - | _ -> false - -let is_right n = match n with - | Node (_,_,_,_,{contents=p}) when not(is_root n) && (right p) == n -> true - | _ -> false - - -let compare t1 t2 = match t1,t2 with - | Nil,Nil -> 0 - | String s1, String s2 -> String.compare s1 s2 - | Nil, String _ -> -1 - | String _, Nil -> 1 - | Node(i1,_,_,_,_), Node(i2,_,_,_,_) -> i1 - i2 - | _, Node _ -> -1 - | Node _ , _ -> 1 -let equal t1 t2 = (compare t1 t2) == 0 - -let int_size = Sys.word_size/8 -let ssize s = ((String.length s)/4 +1)*4 -let rec size = - function Nil -> (int_size,1,0,0) - | String s -> (int_size + (ssize s),0,1,0) - | Node(_,_,l,r,_) -> - let sizel,nl,sl,il = size l - and sizer,nr,sr,ir = size r - in - (sizel+sizer+(7*int_size),nl+nr,sl+sr,il+ir+1) -let size t = - let s,n,st,i = size t in - s/1024,n,st,i -end +external tree_prev_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" +external tree_my_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" +external tree_next_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" +external tree_doc_ids : tree -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids" -module XML = -struct +let text_size tree = int_of_node (snd ( tree_doc_ids tree (Obj.magic 0) )) - type t - type 'a node = int - type node_kind = [`Text | `Tree ] +let text_get_cached_text t x = + if x == -1 then "" + else + text_get_cached_text t x - let compare : 'a node -> 'a node -> int = (-) - let equal : 'a node -> 'a node -> bool = (==) - (* abstract type, values are pointers to a XMLTree C++ object *) - - external int_of_node : 'a node -> int = "%identity" +external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" +external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" +external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" +external tree_tagged_desc : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" +external tree_tagged_foll_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" +external tree_subtree_tags : tree -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" - external parse_xml_uri : string -> t = "caml_call_shredder_uri" - let parse_xml_uri uri = parse_xml_uri uri - - external parse_xml_string : string -> t = "caml_call_shredder_string" - let parse_xml_string uri = parse_xml_string uri - - module Text = - struct - type t (* pointer to the text collection *) - (* Todo *) - external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt" - let nil = nullt () - external get_text : t -> [`Text] node -> string = "caml_text_collection_get_text" - - let get_text t n = - if equal nil n then "" - else get_text t n - - external is_empty : t -> [`Text ] node -> bool = "caml_text_collection_empty_text" - end +type int_vector +external int_vector_alloc : int -> int_vector = "caml_int_vector_alloc" +external int_vector_length : int_vector -> int = "caml_int_vector_length" +external int_vector_set : int_vector -> int -> int -> unit = "caml_int_vector_set" - module Tree = - struct +external tree_select_child : tree -> [`Tree ] node -> int_vector -> [`Tree] node = "caml_xml_tree_select_child" +external tree_select_foll_sibling : tree -> [`Tree ] node -> int_vector -> [`Tree] node = "caml_xml_tree_select_foll_sibling" +external tree_select_desc : tree -> [`Tree ] node -> int_vector -> [`Tree] node = "caml_xml_tree_select_desc" +external tree_select_foll_below : tree -> [`Tree ] node -> int_vector -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_below" - - external serialize : string -> unit = "caml_xml_tree_serialize" - external unserialize : string -> t = "caml_xml_tree_unserialize" - - external root : t -> [`Tree] node = "caml_xml_tree_root" - external nullt : unit -> [`Tree ] node = "caml_xml_tree_nullt" - let nil = nullt () - let is_nil x = equal x nil +module HPtset = Hashtbl.Make(Ptset.Int) + +let vector_htbl = HPtset.create MED_H_SIZE + +let ptset_to_vector s = + try + HPtset.find vector_htbl s + with + Not_found -> + let v = int_vector_alloc (Ptset.Int.cardinal s) in + let _ = Ptset.Int.fold (fun e i -> int_vector_set v i e;i+1) s 0 in + HPtset.add vector_htbl s v; v - external parent : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" - external parent_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" - external first_child : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" +type t = { doc : tree; + node : [`Tree] node; + ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; + } + +let text_size t = text_size t.doc + +module MemUnion = Hashtbl.Make (struct + type t = Ptset.Int.t*Ptset.Int.t + let equal (x,y) (z,t) = (Ptset.Int.equal x z)&&(Ptset.Int.equal y t) + let equal a b = equal a b || equal b a + let hash (x,y) = (* commutative hash *) + let x = Ptset.Int.hash x + and y = Ptset.Int.hash y + in + if x < y then HASHINT2(x,y) else HASHINT2(y,x) + end) + +let collect_tags tree = + let h_union = MemUnion.create BIG_H_SIZE in + let pt_cup s1 s2 = + try + MemUnion.find h_union (s1,s2) + with + | Not_found -> let s = Ptset.Int.union s1 s2 + in + MemUnion.add h_union (s1,s2) s;s + in + let h_add = Hashtbl.create BIG_H_SIZE in + let pt_add t s = + let k = HASHINT2(Tag.hash t,Ptset.Int.hash s) in + try + Hashtbl.find h_add k + with + | Not_found -> let r = Ptset.Int.add t s in + Hashtbl.add h_add k r;r + in + let h = Hashtbl.create BIG_H_SIZE in + let update t sb sa = + let sbelow,safter = + try + Hashtbl.find h t + with + | Not_found -> + (Ptset.Int.empty,Ptset.Int.empty) + in + Hashtbl.replace h t (pt_cup sbelow sb, pt_cup safter sa) + in + let rec loop id acc = + if equal_node id nil + then (Ptset.Int.empty,acc) + else + let below2,after2 = loop (tree_next_sibling tree id) acc in + let below1,after1 = loop (tree_first_child tree id) after2 in + let tag = tree_tag_id tree id in + update tag below1 after2; + pt_add tag (pt_cup below1 below2), (pt_add tag after1) + in + let _ = loop (tree_root tree) Ptset.Int.empty in h + + + + +let contains_array = ref [| |] +let contains_index = Hashtbl.create 4096 +let in_array _ i = + try + Hashtbl.find contains_index i + with + Not_found -> false + +let init_contains t s = + let a = text_contains t.doc s + in + Array.fast_sort (compare) a; + contains_array := a; + Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array - external next_sibling : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" +let count_contains t s = text_count_contains t.doc s +let unsorted_contains t s = text_unsorted_contains t.doc s + +let init_naive_contains t s = + let i,j = tree_doc_ids t.doc (tree_root t.doc) + in + let regexp = Str.regexp_string s in + let matching arg = + try + let _ = Str.search_forward regexp arg 0; + in true + with _ -> false + in + let rec loop n acc l = + if n >= j then acc,l + else + let s = text_get_cached_text t.doc n + in + if matching s + then loop (n+1) (n::acc) (l+1) + else loop (n+1) acc l + in + let acc,l = loop i [] 0 in + let a = Array.create l nil in + let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc + in + contains_array := a + - external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" + +module DocIdSet = struct + include Set.Make (struct type t = [`Text] node + let compare = compare_node end) - external tag : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" +end +let is_nil t = t.node == nil - external text_collection : t -> Text.t = "caml_xml_tree_text_collection" +let is_node t = t.node != nil - let is_last t n = equal nil (next_sibling t n) +let node_of_t t = + let _ = Tag.init (Obj.magic t) in + let table = collect_tags t + in + { doc= t; + node = tree_root t; + ttable = table; + } +let finalize _ = Printf.eprintf "Release the string list !\n%!" +;; + +let parse f str = + node_of_t + (f str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection) - external prev_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" +let parse_xml_uri str = parse parse_xml_uri str +let parse_xml_string str = parse parse_xml_string str + +external pool : tree -> Tag.pool = "%identity" - external my_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" - external next_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" +let save t str = (save_tree t.doc str) +;; - external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" - external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" - +let load ?(sample=64) str = + node_of_t (load_tree str sample) + - let print_skel t = - let textcol = text_collection t in - let rec aux id = - if (is_nil id) - then Printf.eprintf "#" - else - begin - Printf.eprintf "Node %i has tag '%s', DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!" - (int_of_node id) - (Tag.to_string (tag t id)) - (int_of_node (prev_text t id)) - (Text.get_text textcol (prev_text t id)) - (int_of_node (my_text t id)) - (Text.get_text textcol (my_text t id)) - (int_of_node (next_text t id)) - (Text.get_text textcol (next_text t id)); - aux(first_child t id); - aux(next_sibling t id); - end - in - aux (root t) - - let traversal t = - let textcol = text_collection t in - let rec aux id = - if not (is_nil id) - then - begin - ignore (tag t id); - ignore (Text.get_text textcol (prev_text t id)); - if (is_leaf t id) - then ignore (Text.get_text textcol (my_text t id)); - if (is_last t id) - then ignore (Text.get_text textcol (next_text t id)); - aux (first_child t id); - aux (next_sibling t id); - end - in - aux (root t) - end + + +let tag_pool t = pool t.doc + +let compare a b = a.node - b.node + +let equal a b = a.node == b.node + +let nts = function + -1 -> "Nil" + | i -> Printf.sprintf "Node (%i)" i +let dump_node t = nts t.node + +let mk_nil t = { t with node = nil } +let root n = { n with node = tree_root n.doc } + +let is_root n = n.node == (tree_root n.doc) - module Binary = struct - - type node_content = - NC of [`Tree ] node - | SC of [`Text ] node * [`Tree ] node - type string_content = [ `Text ] node - type descr = - | Nil - | Node of node_content - | String of string_content - - type doc = t - - type t = { doc : doc; - text : Text.t; - node : descr } - - let dump { doc=t } = Tree.print_skel t - open Tree - let node_of_t t = { doc= t; - text = text_collection t; - node = Node(NC (root t)) } - - - let parse_xml_uri str = node_of_t (parse_xml_uri str) - let parse_xml_string str = node_of_t (parse_xml_string str) - - let compare a b = match a.node,b.node with - | Node(NC i),Node(NC j) -> compare i j - | _, Node(NC( _ )) -> 1 - | Node(SC (i,_)),Node(SC (j,_)) -> compare i j - | Node(NC( _ )),Node(SC (_,_)) -> -1 - | _, Node(SC (_,_)) -> 1 - | String i, String j -> compare i j - | Node _ , String _ -> -1 - | _ , String _ -> 1 - | Nil, Nil -> 0 - | _,Nil -> -1 - - let equal a b = (compare a b) == 0 - - let string t = match t.node with - | String i -> Text.get_text t.text i - | _ -> assert false - - let norm (n : [`Tree ] node ) = if is_nil n then Nil else Node (NC n) +let is_left n = tree_is_first_child n.doc n.node + +let is_below_right t1 t2 = tree_is_ancestor t1.doc (tree_parent t1.doc t1.node) t2.node + +let parent n = { n with node = tree_parent n.doc n.node } + +let first_child n = { n with node = tree_first_child n.doc n.node } +let tagged_child tag n = { n with node = tree_tagged_child n.doc n.node tag } +let select_child ts n = { n with node = tree_select_child n.doc n.node (ptset_to_vector ts) } + +let next_sibling n = { n with node = tree_next_sibling n.doc n.node } +let tagged_sibling tag n = { n with node = tree_tagged_sibling n.doc n.node tag } +let select_sibling ts n = { n with node = tree_select_foll_sibling n.doc n.node (ptset_to_vector ts) } + +let next_sibling_ctx n _ = next_sibling n +let tagged_sibling_ctx tag n _ = tagged_sibling tag n +let select_sibling_ctx ts n _ = select_sibling ts n + +let id t = tree_node_xml_id t.doc t.node - let descr t = t.node - - let nts = function - Nil -> "Nil" - | String i -> Printf.sprintf "String %i" i - | Node (NC t) -> Printf.sprintf "Node (NC %i)" (int_of_node t) - | Node (SC (t,i)) -> Printf.sprintf "Node (SC (%i,%i))" (int_of_node t) (int_of_node i) - - let first_child n = - let node' = - match n.node with - | Node (NC t) when is_leaf n.doc t -> - let txt = my_text n.doc t in - if Text.is_empty n.text txt - then Nil - else Node(SC (txt,Tree.nil)) - | Node (NC t) -> - let fs = first_child n.doc t in - let txt = prev_text n.doc fs in - if Text.is_empty n.text txt - then norm fs - else Node (SC (txt, fs)) - | Node(SC (i,_)) -> String i - | Nil | String _ -> failwith "first_child" - in - { n with node = node'} +let tag t = if t.node == nil then Tag.nullt else tree_tag_id t.doc t.node + +let tagged_desc tag n = { n with node = tree_tagged_desc n.doc n.node tag } +let select_desc ts n = { n with node = tree_select_desc n.doc n.node (ptset_to_vector ts) } + +let tagged_foll_ctx tag t ctx = + { t with node = tree_tagged_foll_below t.doc t.node tag ctx.node } +let select_foll_ctx ts n ctx = { n with node = tree_select_foll_below n.doc n.node (ptset_to_vector ts) ctx.node } + +let last_idx = ref 0 +let array_find a i j = + let l = Array.length a in + let rec loop idx x y = + if x > y || idx >= l then nil + else + if a.(idx) >= x then if a.(idx) > y then nil else (last_idx := idx;a.(idx)) + else loop (idx+1) x y + in + if a.(0) > j || a.(l-1) < i then nil + else loop !last_idx i j - - let next_sibling n = - let node' = - match n.node with - | Node (SC (_,ns)) -> norm ns - | Node(NC t) -> - let ns = next_sibling n.doc t in - let txt = next_text n.doc t in - if Text.is_empty n.text txt - then norm ns - else Node (SC (txt, ns)) - | Nil | String _ -> failwith "next_sibling" - in - { n with node = node'} - - - let left = first_child - let right = next_sibling - - let id = - function { doc=d; node=Node(NC n)} -> text_xml_id d n - | { doc=d; node=Node(SC (i,_) )} -> node_xml_id d i - | _ -> failwith "id" - - let tag = - function { node=Node(SC _) } -> Tag.pcdata - | { doc=d; node=Node(NC n)} -> tag d n - | _ -> failwith "Tag" - - - - let print_xml_fast outc t = - let rec loop ?(print_right=true) t = match t.node with - | Nil -> () - | String (s) -> output_string outc (string t) - | Node _ when Tag.equal (tag t) Tag.pcdata -> loop (left t); loop (right t) + + + let count t s = text_count t.doc s + + let print_xml_fast outc t = + let rec loop ?(print_right=true) t = + if t.node != nil + then + let tagid = tree_tag_id t.doc t.node in + if tagid==Tag.pcdata + then output_string outc (text_get_cached_text t.doc t.node); + if print_right + then loop (next_sibling t) - | Node (_) -> - let tg = Tag.to_string (tag t) in - let l = left t - and r = right t + else + let tagstr = Tag.to_string tagid in + let l = first_child t + and r = next_sibling t in output_char outc '<'; - output_string outc tg; - ( match l.node with - Nil -> output_string outc "/>" - | String _ -> assert false - | Node(_) when Tag.equal (tag l) Tag.attribute -> - (loop_attributes (left l); - match (right l).node with - | Nil -> output_string outc "/>" - | _ -> - output_char outc '>'; - loop (right l); - output_string outc "' ) - | _ -> - output_char outc '>'; - loop l; - output_string outc "' - );if print_right then loop r - and loop_attributes a = - - match a.node with - | Node(_) -> - let value = - match (left a).node with - | Nil -> "" - | _ -> string (left(left a)) - in - output_char outc ' '; - output_string outc (Tag.to_string (tag a)); - output_string outc "=\""; - output_string outc value; - output_char outc '"'; - loop_attributes (right a) - | _ -> () - in + output_string outc tagstr; + if l.node == nil then output_string outc "/>" + else + if (tag l) == Tag.attribute then + begin + loop_attributes (first_child l); + if (next_sibling l).node == nil then output_string outc "/>" + else + begin + output_char outc '>'; + loop (next_sibling l); + output_string outc "'; + end; + end + else + begin + output_char outc '>'; + loop l; + output_string outc "'; + end; + if print_right then loop r + and loop_attributes a = + let s = (Tag.to_string (tag a)) in + let attname = String.sub s 3 ((String.length s) -3) in + output_char outc ' '; + output_string outc attname; + output_string outc "=\""; + output_string outc (text_get_cached_text t.doc + (tree_my_text a.doc (first_child a).node)); + output_char outc '"'; + loop_attributes (next_sibling a) + in loop ~print_right:false t - - + + let print_xml_fast outc t = - if Tag.to_string (tag t) = "" then + if (tag t) = Tag.document_node then print_xml_fast outc (first_child t) - else print_xml_fast outc t + else print_xml_fast outc t - let traversal t = Tree.traversal t.doc - let full_traversal t = - let rec aux n = - match n.node with - | Nil -> () - | String i -> ignore(Text.get_text t.text i) - | Node(_) -> - ignore (tag n); - aux (first_child n); - aux (next_sibling n) - in aux t - end -end +let tags_below t tag = + fst(Hashtbl.find t.ttable tag) + +let tags_after t tag = + snd(Hashtbl.find t.ttable tag) + +let tags t tag = Hashtbl.find t.ttable tag + + +let rec binary_parent t = + if tree_is_first_child t.doc t.node + then { t with node = tree_parent t.doc t.node } + else { t with node = tree_prev_sibling t.doc t.node } + +let doc_ids (t:t) : (int*int) = + (Obj.magic (tree_doc_ids t.doc t.node)) +let subtree_tags t tag = + if t.node == nil then 0 else + tree_subtree_tags t.doc t.node tag -let dump = XML.Binary.dump -let traversal = XML.Binary.traversal -let full_traversal = XML.Binary.full_traversal -external cpp_traversal : XML.t -> unit = "caml_cpp_traversal" -let cpp_traversal t = cpp_traversal t.XML.Binary.doc +let get_text t = + let tid = tree_my_text t.doc t.node in + if tid == nil then "" else + let a, b = tree_doc_ids t.doc (tree_root t.doc) in + let _ = Printf.eprintf "Trying to take text %i of node %i in %i %i\n%!" tid t.node a b in + text_get_cached_text t.doc tid -include XML + +let dump_tree fmt t = + let rec loop tree n = + if tree != nil then + let tag = (tree_tag_id t.doc tree ) in + let tagstr = Tag.to_string tag in + let tab = String.make n ' ' in + + if tag == Tag.pcdata || tag == Tag.attribute_data + then + Format.fprintf fmt "%s<%s>%s\n" + tab tagstr (text_get_cached_text t.doc (tree_my_text t.doc tree)) tagstr + else begin + Format.fprintf fmt "%s<%s>\n" tab tagstr; + loop (tree_first_child t.doc tree) (n+2); + Format.fprintf fmt "%s\n%!" tab tagstr; + end; + loop (tree_next_sibling t.doc tree) n + in + loop (tree_root t.doc) 0 +;; + +