X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=ptset.ml;h=e16cc2c2483dceda4281aa6323051a9586d72713;hb=22e128466565745a4e74f3b9823e7884ee5c6157;hp=091d4a841ede7c0c8ddee7f76f3fc90c040f2422;hpb=5b4679e20761058f1e04c123da52631c0dd265cc;p=SXSI%2Fxpathcomp.git diff --git a/ptset.ml b/ptset.ml index 091d4a8..e16cc2c 100644 --- a/ptset.ml +++ b/ptset.ml @@ -11,26 +11,32 @@ type elt = int type t = { id : int; key : int; (* hash *) - node : node } + node : node; + } and node = | Empty | Leaf of int | Branch of int * int * t * t + +(* faster if outside of a module *) +let hash_node x = match x with + | Empty -> 0 + | Leaf i -> (i+1) land max_int + (* power of 2 +/- 1 are fast ! *) + | Branch (b,i,l,r) -> + ((b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key + + (r.key lsl 7) - r.key) land max_int + module Node = struct type _t = t type t = _t - let hash x = x.key - let hash_node = function - | Empty -> 0 - | Leaf i -> i+1 - (* power of 2 +/- 1 are fast ! *) - | Branch (b,i,l,r) -> - (b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key - + (r.key lsl 7) - r.key - let hash_node x = (hash_node x) land max_int - let equal x y = match (x.node,y.node) with + external hash : t -> int = "%field1" + let equal x y = + if x.id == y.id || x.key == y.key || x.node == y.node then true + else + match (x.node,y.node) with | Empty,Empty -> true | Leaf k1, Leaf k2 when k1 == k2 -> true | Branch(p1,m1,l1,r1), Branch(p2,m2,l2,r2) when m1==m2 && p1==p2 && @@ -39,13 +45,7 @@ module Node = end module WH =Weak.Make(Node) -(* struct - include Hashtbl.Make(Node) - let merge h v = - if mem h v then v - else (add h v v;v) -end -*) + let pool = WH.create 4093 (* Neat trick thanks to Alain Frisch ! *) @@ -58,11 +58,11 @@ let empty = { id = gen_uid (); let _ = WH.add pool empty -let is_empty = function { id = 0 } -> true | _ -> false +let is_empty s = s.id==0 let rec norm n = let v = { id = gen_uid (); - key = Node.hash_node n; + key = hash_node n; node = n } in WH.merge pool v @@ -82,7 +82,10 @@ let branch_ne = function let zero_bit k m = (k land m) == 0 -let singleton k = if k < 0 then failwith "singleton" else leaf k +let singleton k = leaf k +let is_singleton n = + match n.node with Leaf _ -> true + | _ -> false let rec mem k n = match n.node with | Empty -> false @@ -180,35 +183,38 @@ let rec min_elt n = match n.node with let compare a b = if a == b then 0 else a.id - b.id + let h_merge = Hashtbl.create 4097 + let com_hash x y = (x*y - (x+y)) land max_int let rec merge s t = if (equal s t) (* This is cheap thanks to hash-consing *) then s else - match s.node,t.node with - | Empty, _ -> t - | _, Empty -> s - | Leaf k, _ -> add k t - | _, Leaf k -> add k s - | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> - if m == n && match_prefix q p m then - branch p m (merge s0 t0) (merge s1 t1) - else if m > n && match_prefix q p m then - if zero_bit q m then - branch p m (merge s0 t) s1 - else - branch p m s0 (merge s1 t) - else if m < n && match_prefix p q n then - if zero_bit p n then - branch q n (merge s t0) t1 - else - branch q n t0 (merge s t1) + match s.node,t.node with + | Empty, _ -> t + | _, Empty -> s + | Leaf k, _ -> add k t + | _, Leaf k -> add k s + | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> + if m == n && match_prefix q p m then + branch p m (merge s0 t0) (merge s1 t1) + else if m > n && match_prefix q p m then + if zero_bit q m then + branch p m (merge s0 t) s1 + else + branch p m s0 (merge s1 t) + else if m < n && match_prefix p q n then + if zero_bit p n then + branch q n (merge s t0) t1 else - (* The prefixes disagree. *) - join p s q t - - - + branch q n t0 (merge s t1) + else + (* The prefixes disagree. *) + join p s q t + + + + let rec subset s1 s2 = (equal s1 s2) || match (s1.node,s2.node) with | Empty, _ -> true @@ -226,8 +232,10 @@ let rec min_elt n = match n.node with else false - let union s t = - merge s t + + + + let union s1 s2 = merge s1 s2 let rec inter s1 s2 = if equal s1 s2 @@ -368,3 +376,32 @@ let rec intersect s1 s2 = (equal s1 s2) || let hash s = s.key let from_list l = List.fold_left (fun acc i -> add i acc) empty l + +type int_vector + +external int_vector_alloc : int -> int_vector = "caml_int_vector_alloc" +external int_vector_set : int_vector -> int -> int -> unit = "caml_int_vector_set" +external int_vector_length : int_vector -> int = "caml_int_vector_length" +external int_vector_empty : unit -> int_vector = "caml_int_vector_empty" + +let empty_vector = int_vector_empty () + +let to_int_vector_ext s = + let l = cardinal s in + let v = int_vector_alloc l in + let i = ref 0 in + iter (fun e -> int_vector_set v !i e; incr i) s; + v + +let hash_vectors = Hashtbl.create 4097 + +let to_int_vector s = + try + Hashtbl.find hash_vectors s.key + with + Not_found -> + let v = to_int_vector_ext s in + Hashtbl.add hash_vectors s.key v; + v + +