X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=ptset.ml;h=4b2c84564428ea4f1f8c324ea2aaa85af9b79dd4;hb=d4342e4bb9c853114de295567cd91ec86bb9e68f;hp=5c029f772ed28f4871e5299b45c20544823cf54c;hpb=d04661689691b4587cfc45a35e98604fcdc2b878;p=SXSI%2Fxpathcomp.git diff --git a/ptset.ml b/ptset.ml index 5c029f7..4b2c845 100644 --- a/ptset.ml +++ b/ptset.ml @@ -11,26 +11,32 @@ type elt = int type t = { id : int; key : int; (* hash *) - node : node } + node : node; + } and node = | Empty | Leaf of int | Branch of int * int * t * t + +(* faster if outside of a module *) +let hash_node x = match x with + | Empty -> 0 + | Leaf i -> (i+1) land max_int + (* power of 2 +/- 1 are fast ! *) + | Branch (b,i,l,r) -> + ((b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key + + (r.key lsl 7) - r.key) land max_int + module Node = struct type _t = t type t = _t - let hash x = x.key - let hash_node = function - | Empty -> 0 - | Leaf i -> i+1 - (* power of 2 +/- 1 are fast ! *) - | Branch (b,i,l,r) -> - (b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key - + (r.key lsl 7) - r.key - let hash_node x = (hash_node x) land max_int - let equal x y = match (x.node,y.node) with + external hash : t -> int = "%field1" + let equal x y = + if x.id == y.id || x.key == y.key || x.node == y.node then true + else + match (x.node,y.node) with | Empty,Empty -> true | Leaf k1, Leaf k2 when k1 == k2 -> true | Branch(p1,m1,l1,r1), Branch(p2,m2,l2,r2) when m1==m2 && p1==p2 && @@ -38,8 +44,14 @@ module Node = | _ -> false end -module WH = Weak.Make(Node) - +module WH =Weak.Make(Node) +(* struct + include Hashtbl.Make(Node) + let merge h v = + if mem h v then v + else (add h v v;v) +end +*) let pool = WH.create 4093 (* Neat trick thanks to Alain Frisch ! *) @@ -52,31 +64,31 @@ let empty = { id = gen_uid (); let _ = WH.add pool empty -let is_empty = function { id = 0 } -> true | _ -> false +let is_empty s = s.id==0 let rec norm n = let v = { id = gen_uid (); - key = Node.hash_node n; + key = hash_node n; node = n } in WH.merge pool v (* WH.merge pool *) -let branch (p,m,l,r) = norm (Branch(p,m,l,r)) +let branch p m l r = norm (Branch(p,m,l,r)) let leaf k = norm (Leaf k) (* To enforce the invariant that a branch contains two non empty sub-trees *) let branch_ne = function | (_,_,e,t) when is_empty e -> t | (_,_,t,e) when is_empty e -> t - | (p,m,t0,t1) -> branch (p,m,t0,t1) + | (p,m,t0,t1) -> branch p m t0 t1 (********** from here on, only use the smart constructors *************) let zero_bit k m = (k land m) == 0 -let singleton k = if k < 0 then failwith "singleton" else leaf k +let singleton k = leaf k let rec mem k n = match n.node with | Empty -> false @@ -113,10 +125,10 @@ let rec min_elt n = match n.node with let hbit = Array.init 256 naive_highest_bit let highest_bit_32 x = - let n = x lsr 24 in if n != 0 then hbit.(n) lsl 24 - else let n = x lsr 16 in if n != 0 then hbit.(n) lsl 16 - else let n = x lsr 8 in if n != 0 then hbit.(n) lsl 8 - else hbit.(x) + let n = x lsr 24 in if n != 0 then Array.unsafe_get hbit n lsl 24 + else let n = x lsr 16 in if n != 0 then Array.unsafe_get hbit n lsl 16 + else let n = x lsr 8 in if n != 0 then Array.unsafe_get hbit n lsl 8 + else Array.unsafe_get hbit x let highest_bit_64 x = let n = x lsr 32 in if n != 0 then (highest_bit_32 n) lsl 32 @@ -129,27 +141,27 @@ let rec min_elt n = match n.node with let branching_bit p0 p1 = highest_bit (p0 lxor p1) - let join (p0,t0,p1,t1) = + let join p0 t0 p1 t1 = let m = branching_bit p0 p1 in if zero_bit p0 m then - branch (mask p0 m, m, t0, t1) + branch (mask p0 m) m t0 t1 else - branch (mask p0 m, m, t1, t0) + branch (mask p0 m) m t1 t0 let match_prefix k p m = (mask k m) == p let add k t = let rec ins n = match n.node with | Empty -> leaf k - | Leaf j -> if j == k then n else join (k, leaf k, j, n) + | Leaf j -> if j == k then n else join k (leaf k) j n | Branch (p,m,t0,t1) -> if match_prefix k p m then if zero_bit k m then - branch (p, m, ins t0, t1) + branch p m (ins t0) t1 else - branch (p, m, t0, ins t1) + branch p m t0 (ins t1) else - join (k, leaf k, p, n) + join k (leaf k) p n in ins t @@ -170,12 +182,12 @@ let rec min_elt n = match n.node with (* should run in O(1) thanks to Hash consing *) - let equal = (=) + let equal a b = a==b || a.id == b.id - let compare = compare + let compare a b = if a == b then 0 else a.id - b.id - let rec merge (s,t) = + let rec merge s t = if (equal s t) (* This is cheap thanks to hash-consing *) then s else @@ -186,23 +198,22 @@ let rec min_elt n = match n.node with | _, Leaf k -> add k s | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> if m == n && match_prefix q p m then - branch (p, m, merge (s0,t0), merge (s1,t1)) + branch p m (merge s0 t0) (merge s1 t1) else if m > n && match_prefix q p m then if zero_bit q m then - branch (p, m, merge (s0,t), s1) + branch p m (merge s0 t) s1 else - branch (p, m, s0, merge (s1,t)) - else if m < n && match_prefix p q n then - + branch p m s0 (merge s1 t) + else if m < n && match_prefix p q n then if zero_bit p n then - branch (q, n, merge (s,t0), t1) + branch q n (merge s t0) t1 else - branch (q, n, t0, merge (s,t1)) + branch q n t0 (merge s t1) else (* The prefixes disagree. *) - join (p, s, q, t) + join p s q t - let union s t = merge (s,t) + let rec subset s1 s2 = (equal s1 s2) || match (s1.node,s2.node) with @@ -220,9 +231,12 @@ let rec min_elt n = match n.node with subset l1 r2 && subset r1 r2 else false + + let union s t = + merge s t let rec inter s1 s2 = - if (equal s1 s2) + if equal s1 s2 then s1 else match (s1.node,s2.node) with @@ -232,7 +246,7 @@ let rec min_elt n = match n.node with | _, Leaf k2 -> if mem k2 s1 then s2 else empty | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> if m1 == m2 && p1 == p2 then - merge (inter l1 l2, inter r1 r2) + merge (inter l1 l2) (inter r1 r2) else if m1 > m2 && match_prefix p2 p1 m1 then inter (if zero_bit p2 m1 then l1 else r1) s2 else if m1 < m2 && match_prefix p1 p2 m2 then @@ -241,7 +255,7 @@ let rec min_elt n = match n.node with empty let rec diff s1 s2 = - if (equal s1 s2) + if equal s1 s2 then empty else match (s1.node,s2.node) with @@ -251,12 +265,12 @@ let rec min_elt n = match n.node with | _, Leaf k2 -> remove k2 s1 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> if m1 == m2 && p1 == p2 then - merge (diff l1 l2, diff r1 r2) + merge (diff l1 l2) (diff r1 r2) else if m1 > m2 && match_prefix p2 p1 m1 then if zero_bit p2 m1 then - merge (diff l1 s2, r1) + merge (diff l1 s2) r1 else - merge (l1, diff r1 s2) + merge l1 (diff r1 s2) else if m1 < m2 && match_prefix p1 p2 m2 then if zero_bit p1 m2 then diff s1 l2 else diff s1 r2 else @@ -360,3 +374,32 @@ let rec intersect s1 s2 = (equal s1 s2) || let hash s = s.key let from_list l = List.fold_left (fun acc i -> add i acc) empty l + +type int_vector + +external int_vector_alloc : int -> int_vector = "caml_int_vector_alloc" +external int_vector_set : int_vector -> int -> int -> unit = "caml_int_vector_set" +external int_vector_length : int_vector -> int = "caml_int_vector_length" +external int_vector_empty : unit -> int_vector = "caml_int_vector_empty" + +let empty_vector = int_vector_empty () + +let to_int_vector_ext s = + let l = cardinal s in + let v = int_vector_alloc l in + let i = ref 0 in + iter (fun e -> int_vector_set v !i e; incr i) s; + v + +let hash_vectors = Hashtbl.create 4097 + +let to_int_vector s = + try + Hashtbl.find hash_vectors s.key + with + Not_found -> + let v = to_int_vector_ext s in + Hashtbl.add hash_vectors s.key v; + v + +