1 (***************************************************************************)
2 (* Implementation for sets of positive integers implemented as deeply hash-*)
3 (* consed Patricia trees. Provide fast set operations, fast membership as *)
4 (* well as fast min and max elements. Hash consing provides O(1) equality *)
7 (***************************************************************************)
19 | Branch of int * int * t * t
22 (* faster if outside of a module *)
23 let hash_node x = match x with
25 | Leaf i -> (i+1) land max_int
26 (* power of 2 +/- 1 are fast ! *)
28 ((b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key
29 + (r.key lsl 7) - r.key) land max_int
35 external hash : t -> int = "%field1"
37 if x.id == y.id || x.key == y.key || x.node == y.node then true
39 match (x.node,y.node) with
41 | Leaf k1, Leaf k2 when k1 == k2 -> true
42 | Branch(p1,m1,l1,r1), Branch(p2,m2,l2,r2) when m1==m2 && p1==p2 &&
43 (l1.id == l2.id) && (r1.id == r2.id) -> true
47 module WH =Weak.Make(Node)
49 let pool = WH.create 4093
51 (* Neat trick thanks to Alain Frisch ! *)
53 let gen_uid () = Oo.id (object end)
55 let empty = { id = gen_uid ();
59 let _ = WH.add pool empty
61 let is_empty s = s.id==0
64 let v = { id = gen_uid ();
72 let branch p m l r = norm (Branch(p,m,l,r))
73 let leaf k = norm (Leaf k)
75 (* To enforce the invariant that a branch contains two non empty sub-trees *)
76 let branch_ne = function
77 | (_,_,e,t) when is_empty e -> t
78 | (_,_,t,e) when is_empty e -> t
79 | (p,m,t0,t1) -> branch p m t0 t1
81 (********** from here on, only use the smart constructors *************)
83 let zero_bit k m = (k land m) == 0
85 let singleton k = leaf k
87 match n.node with Leaf _ -> true
90 let rec mem k n = match n.node with
93 | Branch (p, _, l, r) -> if k <= p then mem k l else mem k r
95 let rec min_elt n = match n.node with
96 | Empty -> raise Not_found
98 | Branch (_,_,s,_) -> min_elt s
100 let rec max_elt n = match n.node with
101 | Empty -> raise Not_found
103 | Branch (_,_,_,t) -> max_elt t
106 let rec elements_aux acc n = match n.node with
109 | Branch (_,_,l,r) -> elements_aux (elements_aux acc r) l
113 let mask k m = (k lor (m-1)) land (lnot m)
115 let naive_highest_bit x =
118 if i = 0 then 1 else if x lsr i = 1 then 1 lsl i else loop (i-1)
122 let hbit = Array.init 256 naive_highest_bit
124 let highest_bit_32 x =
125 let n = x lsr 24 in if n != 0 then Array.unsafe_get hbit n lsl 24
126 else let n = x lsr 16 in if n != 0 then Array.unsafe_get hbit n lsl 16
127 else let n = x lsr 8 in if n != 0 then Array.unsafe_get hbit n lsl 8
128 else Array.unsafe_get hbit x
130 let highest_bit_64 x =
131 let n = x lsr 32 in if n != 0 then (highest_bit_32 n) lsl 32
132 else highest_bit_32 x
134 let highest_bit = match Sys.word_size with
135 | 32 -> highest_bit_32
136 | 64 -> highest_bit_64
139 let branching_bit p0 p1 = highest_bit (p0 lxor p1)
141 let join p0 t0 p1 t1 =
142 let m = branching_bit p0 p1 in
143 if zero_bit p0 m then
144 branch (mask p0 m) m t0 t1
146 branch (mask p0 m) m t1 t0
148 let match_prefix k p m = (mask k m) == p
151 let rec ins n = match n.node with
153 | Leaf j -> if j == k then n else join k (leaf k) j n
154 | Branch (p,m,t0,t1) ->
155 if match_prefix k p m then
157 branch p m (ins t0) t1
159 branch p m t0 (ins t1)
166 let rec rmv n = match n.node with
168 | Leaf j -> if k == j then empty else n
169 | Branch (p,m,t0,t1) ->
170 if match_prefix k p m then
172 branch_ne (p, m, rmv t0, t1)
174 branch_ne (p, m, t0, rmv t1)
180 (* should run in O(1) thanks to Hash consing *)
182 let equal a b = a==b || a.id == b.id
184 let compare a b = if a == b then 0 else a.id - b.id
186 let h_merge = Hashtbl.create 4097
187 let com_hash x y = (x*y - (x+y)) land max_int
190 if (equal s t) (* This is cheap thanks to hash-consing *)
193 match s.node,t.node with
196 | Leaf k, _ -> add k t
197 | _, Leaf k -> add k s
198 | Branch (p,m,s0,s1), Branch (q,n,t0,t1) ->
199 if m == n && match_prefix q p m then
200 branch p m (merge s0 t0) (merge s1 t1)
201 else if m > n && match_prefix q p m then
203 branch p m (merge s0 t) s1
205 branch p m s0 (merge s1 t)
206 else if m < n && match_prefix p q n then
208 branch q n (merge s t0) t1
210 branch q n t0 (merge s t1)
212 (* The prefixes disagree. *)
218 let rec subset s1 s2 = (equal s1 s2) ||
219 match (s1.node,s2.node) with
222 | Leaf k1, _ -> mem k1 s2
223 | Branch _, Leaf _ -> false
224 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) ->
225 if m1 == m2 && p1 == p2 then
226 subset l1 l2 && subset r1 r2
227 else if m1 < m2 && match_prefix p1 p2 m2 then
228 if zero_bit p1 m2 then
229 subset l1 l2 && subset r1 l2
231 subset l1 r2 && subset r1 r2
238 let union s1 s2 = merge s1 s2
240 let rec inter s1 s2 =
244 match (s1.node,s2.node) with
247 | Leaf k1, _ -> if mem k1 s2 then s1 else empty
248 | _, Leaf k2 -> if mem k2 s1 then s2 else empty
249 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) ->
250 if m1 == m2 && p1 == p2 then
251 merge (inter l1 l2) (inter r1 r2)
252 else if m1 > m2 && match_prefix p2 p1 m1 then
253 inter (if zero_bit p2 m1 then l1 else r1) s2
254 else if m1 < m2 && match_prefix p1 p2 m2 then
255 inter s1 (if zero_bit p1 m2 then l2 else r2)
263 match (s1.node,s2.node) with
266 | Leaf k1, _ -> if mem k1 s2 then empty else s1
267 | _, Leaf k2 -> remove k2 s1
268 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) ->
269 if m1 == m2 && p1 == p2 then
270 merge (diff l1 l2) (diff r1 r2)
271 else if m1 > m2 && match_prefix p2 p1 m1 then
272 if zero_bit p2 m1 then
273 merge (diff l1 s2) r1
275 merge l1 (diff r1 s2)
276 else if m1 < m2 && match_prefix p1 p2 m2 then
277 if zero_bit p1 m2 then diff s1 l2 else diff s1 r2
284 (*s All the following operations ([cardinal], [iter], [fold], [for_all],
285 [exists], [filter], [partition], [choose], [elements]) are
286 implemented as for any other kind of binary trees. *)
288 let rec cardinal n = match n.node with
291 | Branch (_,_,t0,t1) -> cardinal t0 + cardinal t1
293 let rec iter f n = match n.node with
296 | Branch (_,_,t0,t1) -> iter f t0; iter f t1
298 let rec fold f s accu = match s.node with
301 | Branch (_,_,t0,t1) -> fold f t0 (fold f t1 accu)
303 let rec for_all p n = match n.node with
306 | Branch (_,_,t0,t1) -> for_all p t0 && for_all p t1
308 let rec exists p n = match n.node with
311 | Branch (_,_,t0,t1) -> exists p t0 || exists p t1
313 let rec filter pr n = match n.node with
315 | Leaf k -> if pr k then n else empty
316 | Branch (p,m,t0,t1) -> branch_ne (p, m, filter pr t0, filter pr t1)
319 let rec part (t,f as acc) n = match n.node with
321 | Leaf k -> if p k then (add k t, f) else (t, add k f)
322 | Branch (_,_,t0,t1) -> part (part acc t0) t1
324 part (empty, empty) s
326 let rec choose n = match n.node with
327 | Empty -> raise Not_found
329 | Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *)
333 let coll k (l, b, r) =
334 if k < x then add k l, b, r
335 else if k > x then l, b, add k r
338 fold coll s (empty, false, empty)
343 Printf.eprintf "{ id = %i; key = %i ; node=" n.id n.key;
345 | Empty -> Printf.eprintf "Empty; }\n"
346 | Leaf k -> Printf.eprintf "Leaf %i; }\n" k
347 | Branch (p,m,l,r) ->
348 Printf.eprintf "Branch(%i,%i,id=%i,id=%i); }\n"
354 let make l = List.fold_left (fun acc e -> add e acc ) empty l
357 (*s Additional functions w.r.t to [Set.S]. *)
359 let rec intersect s1 s2 = (equal s1 s2) ||
360 match (s1.node,s2.node) with
363 | Leaf k1, _ -> mem k1 s2
364 | _, Leaf k2 -> mem k2 s1
365 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) ->
366 if m1 == m2 && p1 == p2 then
367 intersect l1 l2 || intersect r1 r2
368 else if m1 < m2 && match_prefix p2 p1 m1 then
369 intersect (if zero_bit p2 m1 then l1 else r1) s2
370 else if m1 > m2 && match_prefix p1 p2 m2 then
371 intersect s1 (if zero_bit p1 m2 then l2 else r2)
378 let from_list l = List.fold_left (fun acc i -> add i acc) empty l
382 external int_vector_alloc : int -> int_vector = "caml_int_vector_alloc"
383 external int_vector_set : int_vector -> int -> int -> unit = "caml_int_vector_set"
384 external int_vector_length : int_vector -> int = "caml_int_vector_length"
385 external int_vector_empty : unit -> int_vector = "caml_int_vector_empty"
387 let empty_vector = int_vector_empty ()
389 let to_int_vector_ext s =
390 let l = cardinal s in
391 let v = int_vector_alloc l in
393 iter (fun e -> int_vector_set v !i e; incr i) s;
396 let hash_vectors = Hashtbl.create 4097
398 let to_int_vector s =
400 Hashtbl.find hash_vectors s.key
403 let v = to_int_vector_ext s in
404 Hashtbl.add hash_vectors s.key v;