X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=d0466c1e1a481333d8f82be55824f23bd88ea662;hb=22e128466565745a4e74f3b9823e7884ee5c6157;hp=889dd988cdd851a333e6806b1c3a940e257253e7;hpb=f98a8d98d86941a885f492d5cc134e34989c198a;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index 889dd98..d0466c1 100644 --- a/tree.ml +++ b/tree.ml @@ -4,7 +4,7 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) -(*INCLUDE "debug.ml" *) +INCLUDE "utils.ml" type tree type 'a node = int @@ -99,34 +99,61 @@ type t = { doc : tree; node : descr; ttable : (Tag.t,(Ptset.t*Ptset.t)) Hashtbl.t; } - -let update h t sb sa = - let sbelow,safter = - try - Hashtbl.find h t - with - | Not_found -> Ptset.empty,Ptset.empty - in - Hashtbl.replace h t (Ptset.union sbelow sb, Ptset.union safter sa) + + + let text_size t = text_size t.doc let collect_tags tree = + let h_union = Hashtbl.create 511 in + let pt_cup s1 s2 = + (* special case, since this is a union we want hash(s1,s2) = hash(s2,s1) *) + let x = Ptset.hash s1 + and y = Ptset.hash s2 in + let h = if x < y then HASHINT2(x,y) else HASHINT2(y,x) in + try + Hashtbl.find h_union h + with + | Not_found -> let s = Ptset.union s1 s2 + in + Hashtbl.add h_union h s;s + in + let h_add = Hashtbl.create 511 in + let pt_add t s = + let k = HASHINT2(Tag.hash t,Ptset.hash s) in + try + Hashtbl.find h_add k + with + | Not_found -> let r = Ptset.add t s in + Hashtbl.add h_add k r;r + in let h = Hashtbl.create 511 in + let sing = Ptset.singleton Tag.pcdata in + let update t sb sa = + let sbelow,safter = + try + Hashtbl.find h t + with + | Not_found -> + (sing,sing) + in + Hashtbl.replace h t (pt_cup sbelow sb, pt_cup safter sa) + in let rec loop id acc = if equal_node id nil - then (Ptset.singleton Tag.pcdata, Ptset.add Tag.pcdata acc) + then (Ptset.empty,acc) else let below2,after2 = loop (tree_next_sibling tree id) acc in let below1,after1 = loop (tree_first_child tree id) after2 in let tag = tree_tag_id tree id in - update h tag below1 after2; - Ptset.add tag (Ptset.union below1 below2), (Ptset.add tag after1) + update tag below1 after2; + pt_add tag (pt_cup below1 below2), (pt_add tag after1) in let b,a = loop (tree_root tree) Ptset.empty in - update h Tag.pcdata b a; + update Tag.pcdata b a; h @@ -826,3 +853,8 @@ let doc_ids (t:t) : (int*int) = | Text (i,_) -> (i,i) | Nil -> (nil,nil) )) + +let subtree_tags t tag = match t.node with + | Nil -> 0 + | Node(i) -> tree_subtree_tags t.doc i tag + | Text(_,i) -> tree_subtree_tags t.doc i tag