(* Copyright NICTA 2008 *)
(* Distributed under the terms of the LGPL (see LICENCE) *)
(******************************************************************************)
-(*INCLUDE "debug.ml" *)
+INCLUDE "utils.ml"
type tree
type 'a node = int
external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
external text_unsorted_contains : tree -> string -> unit = "caml_text_collection_unsorted_contains"
external get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text"
-let get_cached_text t x =
- if x == -1 then ""
- else get_cached_text t x
+
external tree_serialize : tree -> string -> unit = "caml_xml_tree_serialize"
let text_size tree = int_of_node (snd ( tree_doc_ids tree (Obj.magic 0) ))
+let get_cached_text t x =
+ if x == -1 then ""
+ else
+ get_cached_text t x
+
+
external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id"
external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id"
external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor"
external tree_tagged_desc : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc"
external tree_tagged_foll_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below"
external tree_subtree_tags : tree -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags"
+(*
external tree_select_below : tree -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_select_below"
external tree_select_desc_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_select_desc_only"
external tree_select_next : tree -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_next"
external tree_select_foll_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_foll_only"
-external tree_select_desc_or_foll_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_foll_only"
+external tree_select_desc_or_foll_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_foll_only" *)
type descr =
| Nil
type t = { doc : tree;
node : descr;
- ttable : (Tag.t,(Ptset.t*Ptset.t)) Hashtbl.t;
+ ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t)) Hashtbl.t;
}
-
-let update h t sb sa =
- let sbelow,safter =
- try
- Hashtbl.find h t
- with
- | Not_found -> Ptset.empty,Ptset.empty
- in
- Hashtbl.replace h t (Ptset.union sbelow sb, Ptset.union safter sa)
-
-
let text_size t = text_size t.doc
let collect_tags tree =
+ let h_union = Hashtbl.create 511 in
+ let pt_cup s1 s2 =
+ (* special case, since this is a union we want hash(s1,s2) = hash(s2,s1) *)
+ let x = Ptset.Int.hash s1
+ and y = Ptset.Int.hash s2 in
+ let h = if x < y then HASHINT2(x,y) else HASHINT2(y,x)in
+ try
+ Hashtbl.find h_union h
+ with
+ | Not_found -> let s = Ptset.Int.union s1 s2
+ in
+ Hashtbl.add h_union h s;s
+ in
+ let h_add = Hashtbl.create 511 in
+ let pt_add t s =
+ let k = HASHINT2(Tag.hash t,Ptset.Int.hash s) in
+ try
+ Hashtbl.find h_add k
+ with
+ | Not_found -> let r = Ptset.Int.add t s in
+ Hashtbl.add h_add k r;r
+ in
let h = Hashtbl.create 511 in
+ let sing = Ptset.Int.singleton Tag.pcdata in
+ let update t sb sa =
+ let sbelow,safter =
+ try
+ Hashtbl.find h t
+ with
+ | Not_found ->
+ (sing,sing)
+ in
+ Hashtbl.replace h t (pt_cup sbelow sb, pt_cup safter sa)
+ in
let rec loop id acc =
if equal_node id nil
- then (Ptset.singleton Tag.pcdata, Ptset.add Tag.pcdata acc)
+ then (Ptset.Int.empty,acc)
else
let below2,after2 = loop (tree_next_sibling tree id) acc in
let below1,after1 = loop (tree_first_child tree id) after2 in
let tag = tree_tag_id tree id in
- update h tag below1 after2;
- Ptset.add tag (Ptset.union below1 below2), (Ptset.add tag after1)
+ update tag below1 after2;
+ pt_add tag (pt_cup below1 below2), (pt_add tag after1)
in
- let b,a = loop (tree_root tree) Ptset.empty in
- update h Tag.pcdata b a;
+ let b,a = loop (tree_root tree) Ptset.Int.empty in
+ update Tag.pcdata b a;
h
match t.node with
| Text(_) -> Tag.pcdata
| Node(n) -> tree_tag_id t.doc n
- | _ -> failwith "tag"
-
-(*
- let string_below t id =
- let strid = parent_doc t.doc id in
- match t.node with
- | Node(NC(i)) ->
- (Tree.equal i strid) || (is_ancestor t.doc i strid)
- | Node(SC(i,_)) -> Text.equal i id
- | _ -> false
-
-
- let tagged_foll t tag =
- if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_foll"
- else match t with
- | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_foll d n tag) }
- | { doc=d; node=Node(SC (_,n)) } when is_nil n -> { t with node= Nil }
- | { doc=d; node=Node(SC (_,n)) } ->
- let nnode =
- if tag_id d n == tag then n
- else
- let n' = tagged_desc d n tag in
- if is_nil n' then tagged_foll d n tag
- else n'
- in {t with node= norm nnode}
- | _ -> { t with node=Nil }
-
-
- let tagged_desc t tag =
- if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_desc"
- else match t with
- | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) }
- | _ -> { t with node=Nil }
+ | Nil -> Tag.nullt
-*)
+(*
let select_next tb tf t s =
match s.node with
| Node (below) -> begin
match t.node with
| Node( n) ->
- { t with node = norm (tree_select_next t.doc n (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) }
+ { t with node = norm (tree_select_next t.doc n (Ptset.Int.to_int_vector tb) (Ptset.Int.to_int_vector tf) below) }
| Text (i,n) when equal_node nil n ->
let p = tree_parent_doc t.doc i in
- { t with node = norm (tree_select_next t.doc p (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) }
+ { t with node = norm (tree_select_next t.doc p (Ptset.Int.to_int_vector tb) (Ptset.Int.to_int_vector tf) below) }
| Text(_,n) ->
- if Ptset.mem (tree_tag_id t.doc n) (Ptset.union tb tf)
+ if Ptset.mem (tree_tag_id t.doc n) (Ptset.Int.union tb tf)
then { t with node=Node(n) }
else
- let vb = Ptset.to_int_vector tb in
- let vf = Ptset.to_int_vector tf in
+ let vb = Ptset.Int.to_int_vector tb in
+ let vf = Ptset.Int.to_int_vector tf in
let node =
let dsc = tree_select_below t.doc n vb vf in
if equal_node nil dsc
begin
match t.node with
| Node(n) ->
- { t with node= norm (tree_select_foll_only t.doc n (Ptset.to_int_vector tf) below) }
+ { t with node= norm (tree_select_foll_only t.doc n (Ptset.Int.to_int_vector tf) below) }
| Text(i,n) when equal_node nil n ->
let p = tree_parent_doc t.doc i in
- { t with node= norm (tree_select_foll_only t.doc p (Ptset.to_int_vector tf) below) }
+ { t with node= norm (tree_select_foll_only t.doc p (Ptset.Int.to_int_vector tf) below) }
| Text(_,n) ->
if Ptset.mem (tree_tag_id t.doc n) tf
then { t with node=Node(n) }
else
- let vf = Ptset.to_int_vector tf in
+ let vf = Ptset.Int.to_int_vector tf in
let node =
let dsc = tree_select_desc_only t.doc n vf in
if tree_is_nil dsc
let select_below tc td t=
match t.node with
| Node( n) ->
- let vc = Ptset.to_int_vector tc
+ let vc = Ptset.Int.to_int_vector tc
in
- let vd = Ptset.to_int_vector td
+ let vd = Ptset.Int.to_int_vector td
in
{ t with node= norm(tree_select_below t.doc n vc vd) }
| _ -> { t with node=Nil }
let select_desc_only td t =
match t.node with
| Node(n) ->
- let vd = Ptset.to_int_vector td
+ let vd = Ptset.Int.to_int_vector td
in
{ t with node = norm(tree_select_desc_only t.doc n vd) }
| _ -> { t with node = Nil }
-
+*)
let tagged_desc tag t =
match t.node with
| Node(n) ->
| _ -> { t with node = Nil }
-let tagged_foll_below tag t s =
+let tagged_foll_ctx tag t s =
match s.node with
| Node (below) ->
begin
| _ -> {t with node=Nil }
+
+
let last_idx = ref 0
let array_find a i j =
let l = Array.length a in
| Text (i,_) -> (i,i)
| Nil -> (nil,nil)
))
+
+let subtree_tags t tag = match t.node with
+ | Nil -> 0
+ | Node(i) -> tree_subtree_tags t.doc i tag
+ | Text(_,i) -> tree_subtree_tags t.doc i tag
+
+let get_text t = match t.node with
+ | Text(i,_) -> get_cached_text t.doc i
+ | _ -> ""