val has_tagged_foll : t -> Tag.t -> bool
val tagged_desc : t -> Tag.t -> t
val tagged_foll : t -> Tag.t -> t
+ val init_tagged_next : t -> Tag.t -> unit
val tagged_next : t -> Tag.t -> t
val subtree_tags : t -> Tag.t -> int
val is_left : t -> bool
+ val print_id : Format.formatter -> t -> unit
+ val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit
end
module XML =
*)
external is_empty : t -> [`Text ] node -> bool = "caml_text_collection_empty_text"
+
let is_empty t n =
(equal nil n) || is_empty t n
let get_text t n =
- if (equal nil n) || is_empty t n then ""
+ if equal nil n then ""
else get_cached_text t n
external size : t -> int = "caml_text_collection_size"
external next_sibling : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling"
-
+ external prev_sibling : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling"
external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf"
(* external tag : t -> [`Tree ] node -> T = "caml_xml_tree_tag"*)
external tag_id : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id"
+(*
+ let tag_hash = Array.make 6_000_000 (Tag.nullt)
+
+ let tag_id t id =
+ let tag = tag_hash.(int_of_node id)
+ in
+ if tag != Tag.nullt then tag
+ else
+ let tag = tag_id t id in
+ (tag_hash.(int_of_node id) <- tag; tag)
+*)
let is_last t n = equal nil (next_sibling t n)
external prev_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text"
external tagged_next : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_next"
external subtree_tags : t -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags"
+
+
+ let test_xml_tree ppf tags v =
+ let pr x = Format.fprintf ppf x in
+ let rec aux id =
+ if (is_nil id)
+ then ()
+ else
+ begin
+ pr "Node %i, (Tag) %i='%s' (GetTagName), NodeXMLId (Preorder)=%i\n%!"
+ (int_of_node id)
+ (tag_id v id)
+ (Tag.to_string (tag_id v id))
+ (node_xml_id v id);
+ pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i\n%!"
+ (int_of_node (prev_text v id))
+ (Text.get_text v (prev_text v id))
+ (int_of_node (my_text v id))
+ (Text.get_text v (my_text v id))
+ (int_of_node (next_text v id))
+ (Text.get_text v (next_text v id))
+ (int_of_node(parent_doc v (my_text v id)));
+ pr "Testing Tagged*\n%!";
+ Ptset.iter (fun t ->
+ let str = Tag.to_string t in
+ if Tag.pcdata <> t
+ then begin
+ pr "Tag: %s : \n%!" str;
+ pr "TaggedDesc = %i%!, " (tagged_desc v id t);
+ pr "TaggedFoll = %i\n%!" (tagged_foll v id t);
+ pr "SubtreeTags = %i\n%!" (subtree_tags v id t);
+ end) tags;
+ pr "----------------------------\n";
+ aux(first_child v id);
+ aux(next_sibling v id);
+ end
+ in
+ aux (root v)
+
+
+
+
+
+
let print_skel t =
let rec aux id =
if (is_nil id)
then Printf.eprintf "#\n"
else
begin
- Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) parent_doc(my_text)=%i\n%!"
+ Printf.eprintf "Node %i has tag '%i=%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) parent_doc(my_text)=%i\n%!"
(int_of_node id)
+ (tag_id t id)
(Tag.to_string (tag_id t id))
(node_xml_id t id)
(int_of_node (prev_text t id))
end
in
aux (root t)
+
+
+
end
node : descr }
let dump { doc=t } = Tree.print_skel t
+ let test_xml_tree ppf tags { doc=t } = Tree.test_xml_tree ppf tags t
+
module DocIdSet = struct
include Set.Make (struct type t = string_content
let compare = (-) end)
let root n = { n with node = norm (Tree.root n.doc) }
let is_root n = match n.node with
- | Node(NC t) when (Tree.root n.doc) == t -> true
+ | Node(NC t) -> (int_of_node t) == 0
| _ -> false
- let parent n =
+ let parent n =
+ if is_root n then { n with node=Nil}
+ else
let node' =
match n.node with
| Node(NC t) ->
let txt = prev_text n.doc t in
if Text.is_empty n.doc txt then
- Node(NC (Tree.parent n.doc t))
+ let ps = Tree.prev_sibling n.doc t in
+ if is_nil ps
+ then
+ Node(NC (Tree.parent n.doc t))
+ else Node(NC ps)
else
Node(SC (txt,t))
- | Node(SC(t,_)) -> Node (NC(parent_doc n.doc t))
+ | Node(SC(i,t)) ->
+ let ps = Tree.prev_sibling n.doc t in
+ if is_nil ps
+ then Node (NC(parent_doc n.doc i))
+ else Node(NC ps)
| _ -> failwith "parent"
in
{ n with node = node' }
| Nil | String _ -> failwith "first_child"
in
{ n with node = node'}
-
let next_sibling n =
let node' =
let id =
function { doc=d; node=Node(NC n)} -> node_xml_id d n
| { doc=d; node=Node(SC (i,_) )} -> text_xml_id d i
- | _ -> failwith "id"
+ | _ -> -1 (*
+ Format.fprintf Format.err_formatter "Failure id on %s\n%!" (nts x.node);
+ failwith "id" *)
let tag =
function { node=Node(SC _) } -> Tag.pcdata
| { doc=d; node=Node(NC n)} -> tag_id d n
| _ -> failwith "tag"
-(* let tag_id =
- function { node=Node(SC _) } -> ()
- | { doc=d; node=Node(NC n)} -> tag_id d n
- | _ -> ()
-*)
let string_below t id =
let strid = parent_doc t.doc id in
match t.node with
| { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) }
| _ -> { t with node=Nil }
-
+(*
let tagged_next t tag =
- if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_next"
- else match t with
- | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_next d n tag) }
- | { doc=d; node=Node(SC (_,n)) } -> { t with node = norm (tagged_next d n tag) }
- | _ -> { t with node=Nil }
-
+ if tag == Tag.attribute || tag == Tag.pcdata then failwith "tagged_next"
+ else
+ match tagged_desc t tag with
+ | { doc = d; node=Nil } -> tagged_foll t tag
+ | x -> x
+*)
let subtree_tags t tag =
match t with
- { doc = d; node = Node(NC n) } -> subtree_tags d n tag
+ { doc = d; node = Node(NC n) } ->
+ subtree_tags d n tag
| _ -> 0
+ let tagged_desc_array = ref [| |]
+ let idx = ref 0
+
+ let init_tagged_next t tagid =
+ let l = subtree_tags (root t) tagid
+ in
+ tagged_desc_array := Array.create l { t with node= Nil };
+ let i = ref 0 in
+ let rec collect t =
+ if is_node t then begin
+ if tag t == tagid then
+ begin
+ !tagged_desc_array.(!i) <- t;
+ incr i;
+ end;
+ collect (first_child t);
+ collect (next_sibling t)
+ end;
+ in
+ collect t;
+ idx := 0
+
+ let print_id ppf v =
+ let pr x= Format.fprintf ppf x in
+ match v with
+ { node=Nil } -> pr "NULLT: -1"
+ | { node=String(i) } | { node=Node(SC(i,_)) } -> pr "DocID: %i" (int_of_node i)
+ | { node=Node(NC(i)) } -> pr "Node: %i" (int_of_node i)
+
+
+
+ let tagged_next t tag =
+ if !idx >= Array.length !tagged_desc_array
+ then {t with node=Nil}
+ else
+ let r = !tagged_desc_array.(!idx)
+ in
+ incr idx; r
+
let has_tagged_foll t tag = is_node (tagged_foll t tag)
let count t s = Text.count t.doc s
let is_left t =
- let u = left (parent t) in
- (id t) == (id u)
+ if is_root t then false
+ else
+ if tag (parent t) == Tag.pcdata then false
+ else
+ let u = left (parent t) in
+ (id t) == (id u)
let print_xml_fast outc t =
let rec loop ?(print_right=true) t = match t.node with
| Nil -> ()
- | String (s) -> output_string outc (string t)
+ | String (s) -> output_string outc (Text.get_text t.doc s)
| Node _ when Tag.equal (tag t) Tag.pcdata ->
loop (left t);
if print_right then loop (right t)
print_xml_fast outc (first_child t)
else print_xml_fast outc t
+
+
+
+
let traversal t = Tree.traversal t.doc
let full_traversal t =
let rec aux n =