(* Copyright NICTA 2008 *)
(* Distributed under the terms of the LGPL (see LICENCE) *)
(******************************************************************************)
-(*INCLUDE "debug.ml" *)
+INCLUDE "utils.ml"
type tree
type 'a node = int
node : descr;
ttable : (Tag.t,(Ptset.t*Ptset.t)) Hashtbl.t;
}
-
-let update h t sb sa =
- let sbelow,safter =
- try
- Hashtbl.find h t
- with
- | Not_found -> Ptset.empty,Ptset.empty
- in
- Hashtbl.replace h t (Ptset.union sbelow sb, Ptset.union safter sa)
+
+
+
let text_size t = text_size t.doc
let collect_tags tree =
+ let h_union = Hashtbl.create 511 in
+ let pt_cup s1 s2 =
+ (* special case, since this is a union we want hash(s1,s2) = hash(s2,s1) *)
+ let x = Ptset.hash s1
+ and y = Ptset.hash s2 in
+ let h = if x < y then HASHINT2(x,y) else HASHINT2(y,x) in
+ try
+ Hashtbl.find h_union h
+ with
+ | Not_found -> let s = Ptset.union s1 s2
+ in
+ Hashtbl.add h_union h s;s
+ in
+ let h_add = Hashtbl.create 511 in
+ let pt_add t s =
+ let k = HASHINT2(Tag.hash t,Ptset.hash s) in
+ try
+ Hashtbl.find h_add k
+ with
+ | Not_found -> let r = Ptset.add t s in
+ Hashtbl.add h_add k r;r
+ in
let h = Hashtbl.create 511 in
+ let sing = Ptset.singleton Tag.pcdata in
+ let update t sb sa =
+ let sbelow,safter =
+ try
+ Hashtbl.find h t
+ with
+ | Not_found ->
+ (sing,sing)
+ in
+ Hashtbl.replace h t (pt_cup sbelow sb, pt_cup safter sa)
+ in
let rec loop id acc =
if equal_node id nil
- then (Ptset.singleton Tag.pcdata, Ptset.add Tag.pcdata acc)
+ then (Ptset.empty,acc)
else
let below2,after2 = loop (tree_next_sibling tree id) acc in
let below1,after1 = loop (tree_first_child tree id) after2 in
let tag = tree_tag_id tree id in
- update h tag below1 after2;
- Ptset.add tag (Ptset.union below1 below2), (Ptset.add tag after1)
+ update tag below1 after2;
+ pt_add tag (pt_cup below1 below2), (pt_add tag after1)
in
let b,a = loop (tree_root tree) Ptset.empty in
- update h Tag.pcdata b a;
+ update Tag.pcdata b a;
h
| Text (i,_) -> (i,i)
| Nil -> (nil,nil)
))
+
+let subtree_tags t tag = match t.node with
+ | Nil -> 0
+ | Node(i) -> tree_subtree_tags t.doc i tag
+ | Text(_,i) -> tree_subtree_tags t.doc i tag