From f1da22caf34bc3367984228ace9e7e7aa0760f0a Mon Sep 17 00:00:00 2001 From: kim Date: Sun, 26 Apr 2009 10:30:24 +0000 Subject: [PATCH] New files refactoring the code git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@355 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- hcons.ml | 37 +++++ hcons.mli | 21 +++ html_header.ml | 211 +++++++++++++++++++++++++++ memoizer.ml | 65 +++++++++ memoizer.mli | 5 + ptset_include.ml | 371 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 710 insertions(+) create mode 100644 hcons.ml create mode 100644 hcons.mli create mode 100644 html_header.ml create mode 100644 memoizer.ml create mode 100644 memoizer.mli create mode 100644 ptset_include.ml diff --git a/hcons.ml b/hcons.ml new file mode 100644 index 0000000..293b90f --- /dev/null +++ b/hcons.ml @@ -0,0 +1,37 @@ +INCLUDE "utils.ml" +module type S = +sig + type data + type t + val make : data -> t + val node : t -> data + val hash : t -> int + val uid : t -> int + val equal : t -> t -> bool +end +module Make (H : Hashtbl.HashedType) : S with type data = H.t = +struct + type data = H.t + type t = { id : int; + key : int; (* hash *) + node : data; + } + + let node t = t.node + let hash t = t.key + let uid t = t.id + let gen_uid = + let id = ref ~-1 in + fun () -> incr id;!id + let equal t1 t2 = t1 == t2 || t1.id == t2.id + module WH = Weak.Make( struct + type _t = t + type t = _t + let hash = hash + let equal a b = H.equal a.node b.node + end) + let pool = WH.create MED_H_SIZE + let make x = + let cell = { id = gen_uid(); key = H.hash x; node = x } in + WH.merge pool cell +end diff --git a/hcons.mli b/hcons.mli new file mode 100644 index 0000000..ceb33c2 --- /dev/null +++ b/hcons.mli @@ -0,0 +1,21 @@ +module type S = + sig + type data + type t + val make : data -> t + val node : t -> data + val hash : t -> int + val uid : t -> int + val equal : t -> t -> bool + end +module Make : + functor (H : Hashtbl.HashedType) -> +sig + type data = H.t + type t + val make : data -> t + val node : t -> data + val hash : t -> int + val uid : t -> int + val equal : t -> t -> bool +end diff --git a/html_header.ml b/html_header.ml new file mode 100644 index 0000000..f2b1a8e --- /dev/null +++ b/html_header.ml @@ -0,0 +1,211 @@ +let html_header = format_of_string + " + + + + + + + + +" +let html_footer = " + +" + let h_trace = Hashtbl.create 4096 + let register_trace t x = Hashtbl.add h_trace (Tree.id t) x + + + let output_trace a t file results = + let h_auto = 6+ (Hashtbl.fold (fun _ l a -> (List.length l)+a) a.phi 0) in + let max_tt = ref 0 in + let outc = open_out file in + let outf = Format.formatter_of_out_channel outc in + let strf = Format.str_formatter in + let pr_str x = Format.fprintf strf x in + let pr_out x = Format.fprintf outf x in + let rec loop t = + if not (Tree.is_nil t) then + let tooltip,selected = try + let (inconf,outconf,leftres,rightres,trans) = Hashtbl.find h_trace (Tree.id t) in + let selected = IntSet.mem (Tree.id t) results in + pr_str "
Subtree %i, tag='%s', internal node = %s\nEntered with configuration:\n" + (Tree.id t) (Tree.id t) (Tag.to_string (Tree.tag t)) (Tree.dump_node t); + iter_pl (fun s -> pr_st strf (Ptset.elements s)) inconf; + pr_str "%s" "\nLeft with configuration:\n"; + iter_pl (fun s -> pr_st strf (Ptset.elements s)) outconf; + pr_str "%s" "\nAccept states for left child:\n"; + iter_pl (fun s -> pr_st strf (Ptset.elements s)) leftres; + pr_str "%s" "\nAccept states for right child:\n"; + iter_pl (fun s -> pr_st strf (Ptset.elements s)) rightres; + pr_str "%s" "\nTriggered transitions:\n"; + pr_str "%s" ""; + List.iter (fun fl -> + pr_str "%s" ""; + max_tt := max !max_tt (form_list_length fl); + ) trans; + pr_str "%s" "
";pr_frmlst strf fl;pr_str "
\n"; + pr_str "In result set : %s\n
" (if selected then "Yes" else "No"); + Format.flush_str_formatter(),selected + with + Not_found -> "",false + in + let tag = Tree.tag t in + let div_class = (if (tooltip = "") then "skipped" else (if selected then "selected" else "touched"))^ + (if tag == Tag.pcdata then "_text" else"") + in + if tag == Tag.pcdata then + pr_out "
%s%s
"div_class (Tree.get_text t) tooltip + else begin + if (Tree.is_nil (Tree.first_child t)) + then + pr_out "
<%s/>%s
" + div_class (Tree.id t) (Tree.id t) (Tag.to_string tag) tooltip + else begin + pr_out "
<%s>%s
" + div_class (Tree.id t) (Tree.id t) (Tag.to_string tag) tooltip; + loop (Tree.first_child t); + pr_out "
</%s>
" div_class (Tag.to_string tag); + end; + end; + loop (Tree.next_sibling t); + in + let max_tt = 25*(!max_tt + 12)+20 in + let height = max max_tt (25*h_auto) in + pr_out html_header height height height height; + pr_out "%s" "
"; + dump outf a; + pr_out "%s" "
"; + pr_out "%s" "
"; + loop t; + pr_out "%s" html_footer; + pr_out "%!"; + close_out outc diff --git a/memoizer.ml b/memoizer.ml new file mode 100644 index 0000000..2432af7 --- /dev/null +++ b/memoizer.ml @@ -0,0 +1,65 @@ +(* + This module defines a wrapper builder which given a function + and creates its memoized version. The hashtable used to memoize + results is given as argument of the module, the keys of the table + are the argument of the function we want to wrap. + The tricky part is to do this also for recursive function where + each call can be memoized. + + See the technical report: + + "That About Wraps it Up: Using FIX to Handle Errors Without + Exceptions, and Other Programming Tricks" + + Bruce J. McAdam + + http://www.lfcs.inf.ed.ac.uk/reports/97/ECS-LFCS-97-375/ + + we give two wrapper builders: + - make , which builds a new function, memoized only at toplevel. The only + penalty here is the single look-up, which is supposed to be negligeble w.r.t + the actual computation (other wise there is little point in memoizing) + + - make_rec which acts as a fixpoint combinator and memoized each recursive call + of the function. The penalty is twofold: + 1) a look-up for every recursive call. + 2) the function has to be written in CPS, and is therefore compiled less + efficiently than its recursive non memoized function. + + Again, it is assumed that the same expensive computation will occur many time + to amortise these penalties. + +*) + +INCLUDE "utils.ml" + + +module Make ( H : Hashtbl.S ) = +struct + + + let make f = + let tbl = H.create SMALL_H_SIZE in + fun arg -> + try + H.find tbl arg + with Not_found -> + let r = f arg in H.add tbl arg r;r + + + type 'a fix = Fix of ('a fix -> 'a) + + let make_rec f = + let tbl = H.create SMALL_H_SIZE in + let unboxed = + function ((Fix f')as fix) -> + f (fun arg -> + try + H.find tbl arg + with + Not_found -> let r = f' fix arg + in H.add tbl arg r;r) + in unboxed (Fix unboxed) + +end +;; diff --git a/memoizer.mli b/memoizer.mli new file mode 100644 index 0000000..dfba383 --- /dev/null +++ b/memoizer.mli @@ -0,0 +1,5 @@ +module Make (H : Hashtbl.S) : +sig + val make : (H.key -> 'a) -> H.key -> 'a + val make_rec : ((H.key -> 'a) -> H.key -> 'a) -> H.key -> 'a +end diff --git a/ptset_include.ml b/ptset_include.ml new file mode 100644 index 0000000..695a796 --- /dev/null +++ b/ptset_include.ml @@ -0,0 +1,371 @@ +(***************************************************************************) +(* Implementation for sets of positive integers implemented as deeply hash-*) +(* consed Patricia trees. Provide fast set operations, fast membership as *) +(* well as fast min and max elements. Hash consing provides O(1) equality *) +(* checking *) +(* *) +(***************************************************************************) +IFDEF USE_PTSET_INCLUDE +THEN +INCLUDE "utils.ml" +(* + Cannot be used like this: + Need to be included after the following declrations: + type elt = ... + let equal_elt : elt -> elt -> bool = ... + let hash_elt : elt -> int = ... + let uid_elt : elt -> int = ... +*) +type 'a node = + | Empty + | Leaf of elt + | Branch of int * int * 'a * 'a + +module rec HNode : Hcons.S with type data = Node.t = Hcons.Make (Node) +and Node : Hashtbl.HashedType with type t = HNode.t node = +struct + type t = HNode.t node + let equal x y = + match x,y with + | Empty,Empty -> true + | Leaf k1, Leaf k2 -> equal_elt k1 k2 + | Branch(b1,i1,l1,r1),Branch(b2,i2,l2,r2) -> + b1 == b2 && i1 == i2 && + (HNode.equal l1 l2) && + (HNode.equal r1 r2) + | _ -> false + let hash = function + | Empty -> 0 + | Leaf i -> HASHINT2(HALF_MAX_INT,hash_elt i) + | Branch (b,i,l,r) -> HASHINT4(b,i,HNode.hash l, HNode.hash r) +end + +type t = HNode.t +let hash = HNode.hash +let uid = HNode.uid + +let empty = HNode.make Empty + +let is_empty s = (HNode.node s) == Empty + +(* WH.merge pool *) + +let branch p m l r = HNode.make (Branch(p,m,l,r)) +let leaf k = HNode.make (Leaf k) + +(* To enforce the invariant that a branch contains two non empty sub-trees *) +let branch_ne p m t0 t1 = + if (is_empty t0) then t1 + else if is_empty t1 then t0 else branch p m t0 t1 + +(********** from here on, only use the smart constructors *************) + +let zero_bit k m = (k land m) == 0 + +let singleton k = leaf k + +let is_singleton n = + match HNode.node n with Leaf _ -> true + | _ -> false + +let mem (k:elt) n = + let kid = uid_elt k in + let rec loop n = match HNode.node n with + | Empty -> false + | Leaf j -> equal_elt k j + | Branch (p, _, l, r) -> if kid <= p then loop l else loop r + in loop n + +let rec min_elt n = match HNode.node n with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_,_,s,_) -> min_elt s + +let rec max_elt n = match HNode.node n with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_,_,_,t) -> max_elt t + + let elements s = + let rec elements_aux acc n = match HNode.node n with + | Empty -> acc + | Leaf k -> k :: acc + | Branch (_,_,l,r) -> elements_aux (elements_aux acc r) l + in + elements_aux [] s + + let mask k m = (k lor (m-1)) land (lnot m) + + let naive_highest_bit x = + assert (x < 256); + let rec loop i = + if i = 0 then 1 else if x lsr i = 1 then 1 lsl i else loop (i-1) + in + loop 7 + + let hbit = Array.init 256 naive_highest_bit + + let highest_bit_32 x = + let n = x lsr 24 in if n != 0 then Array.unsafe_get hbit n lsl 24 + else let n = x lsr 16 in if n != 0 then Array.unsafe_get hbit n lsl 16 + else let n = x lsr 8 in if n != 0 then Array.unsafe_get hbit n lsl 8 + else Array.unsafe_get hbit x + + let highest_bit_64 x = + let n = x lsr 32 in if n != 0 then (highest_bit_32 n) lsl 32 + else highest_bit_32 x + + let highest_bit = match Sys.word_size with + | 32 -> highest_bit_32 + | 64 -> highest_bit_64 + | _ -> assert false + + let branching_bit p0 p1 = highest_bit (p0 lxor p1) + + let join p0 t0 p1 t1 = + let m = branching_bit p0 p1 in + if zero_bit p0 m then + branch (mask p0 m) m t0 t1 + else + branch (mask p0 m) m t1 t0 + + let match_prefix k p m = (mask k m) == p + + let add k t = + let kid = uid_elt k in + let rec ins n = match HNode.node n with + | Empty -> leaf k + | Leaf j -> if equal_elt j k then n else join kid (leaf k) (uid_elt j) n + | Branch (p,m,t0,t1) -> + if match_prefix kid p m then + if zero_bit kid m then + branch p m (ins t0) t1 + else + branch p m t0 (ins t1) + else + join kid (leaf k) p n + in + ins t + + let remove k t = + let kid = uid_elt k in + let rec rmv n = match HNode.node n with + | Empty -> empty + | Leaf j -> if equal_elt k j then empty else n + | Branch (p,m,t0,t1) -> + if match_prefix kid p m then + if zero_bit kid m then + branch_ne p m (rmv t0) t1 + else + branch_ne p m t0 (rmv t1) + else + n + in + rmv t + + (* should run in O(1) thanks to Hash consing *) + + let equal a b = HNode.equal a b + + let compare a b = (HNode.uid a) - (HNode.uid b) + + let rec merge s t = + if (equal s t) (* This is cheap thanks to hash-consing *) + then s + else + match HNode.node s, HNode.node t with + | Empty, _ -> t + | _, Empty -> s + | Leaf k, _ -> add k t + | _, Leaf k -> add k s + | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> + if m == n && match_prefix q p m then + branch p m (merge s0 t0) (merge s1 t1) + else if m > n && match_prefix q p m then + if zero_bit q m then + branch p m (merge s0 t) s1 + else + branch p m s0 (merge s1 t) + else if m < n && match_prefix p q n then + if zero_bit p n then + branch q n (merge s t0) t1 + else + branch q n t0 (merge s t1) + else + (* The prefixes disagree. *) + join p s q t + + + + + let rec subset s1 s2 = (equal s1 s2) || + match (HNode.node s1,HNode.node s2) with + | Empty, _ -> true + | _, Empty -> false + | Leaf k1, _ -> mem k1 s2 + | Branch _, Leaf _ -> false + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + subset l1 l2 && subset r1 r2 + else if m1 < m2 && match_prefix p1 p2 m2 then + if zero_bit p1 m2 then + subset l1 l2 && subset r1 l2 + else + subset l1 r2 && subset r1 r2 + else + false + + + let union s1 s2 = merge s1 s2 + (* Todo replace with e Memo Module *) + module MemUnion = Hashtbl.Make( + struct + type set = t + type t = set*set + let equal (x,y) (z,t) = (equal x z)&&(equal y t) + let equal a b = equal a b || equal b a + let hash (x,y) = (* commutative hash *) + let x = HNode.hash x + and y = HNode.hash y + in + if x < y then HASHINT2(x,y) else HASHINT2(y,x) + end) + let h_mem = MemUnion.create MED_H_SIZE + + let mem_union s1 s2 = + try MemUnion.find h_mem (s1,s2) + with Not_found -> + let r = merge s1 s2 in MemUnion.add h_mem (s1,s2) r;r + + + let rec inter s1 s2 = + if equal s1 s2 + then s1 + else + match (HNode.node s1,HNode.node s2) with + | Empty, _ -> empty + | _, Empty -> empty + | Leaf k1, _ -> if mem k1 s2 then s1 else empty + | _, Leaf k2 -> if mem k2 s1 then s2 else empty + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + merge (inter l1 l2) (inter r1 r2) + else if m1 > m2 && match_prefix p2 p1 m1 then + inter (if zero_bit p2 m1 then l1 else r1) s2 + else if m1 < m2 && match_prefix p1 p2 m2 then + inter s1 (if zero_bit p1 m2 then l2 else r2) + else + empty + + let rec diff s1 s2 = + if equal s1 s2 + then empty + else + match (HNode.node s1,HNode.node s2) with + | Empty, _ -> empty + | _, Empty -> s1 + | Leaf k1, _ -> if mem k1 s2 then empty else s1 + | _, Leaf k2 -> remove k2 s1 + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + merge (diff l1 l2) (diff r1 r2) + else if m1 > m2 && match_prefix p2 p1 m1 then + if zero_bit p2 m1 then + merge (diff l1 s2) r1 + else + merge l1 (diff r1 s2) + else if m1 < m2 && match_prefix p1 p2 m2 then + if zero_bit p1 m2 then diff s1 l2 else diff s1 r2 + else + s1 + + +(*s All the following operations ([cardinal], [iter], [fold], [for_all], + [exists], [filter], [partition], [choose], [elements]) are + implemented as for any other kind of binary trees. *) + +let rec cardinal n = match HNode.node n with + | Empty -> 0 + | Leaf _ -> 1 + | Branch (_,_,t0,t1) -> cardinal t0 + cardinal t1 + +let rec iter f n = match HNode.node n with + | Empty -> () + | Leaf k -> f k + | Branch (_,_,t0,t1) -> iter f t0; iter f t1 + +let rec fold f s accu = match HNode.node s with + | Empty -> accu + | Leaf k -> f k accu + | Branch (_,_,t0,t1) -> fold f t0 (fold f t1 accu) + + +let rec for_all p n = match HNode.node n with + | Empty -> true + | Leaf k -> p k + | Branch (_,_,t0,t1) -> for_all p t0 && for_all p t1 + +let rec exists p n = match HNode.node n with + | Empty -> false + | Leaf k -> p k + | Branch (_,_,t0,t1) -> exists p t0 || exists p t1 + +let rec filter pr n = match HNode.node n with + | Empty -> empty + | Leaf k -> if pr k then n else empty + | Branch (p,m,t0,t1) -> branch_ne p m (filter pr t0) (filter pr t1) + +let partition p s = + let rec part (t,f as acc) n = match HNode.node n with + | Empty -> acc + | Leaf k -> if p k then (add k t, f) else (t, add k f) + | Branch (_,_,t0,t1) -> part (part acc t0) t1 + in + part (empty, empty) s + +let rec choose n = match HNode.node n with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *) + + +let split x s = + let coll k (l, b, r) = + if k < x then add k l, b, r + else if k > x then l, b, add k r + else l, true, r + in + fold coll s (empty, false, empty) + + +let make l = List.fold_left (fun acc e -> add e acc ) empty l +(*i*) + +(*s Additional functions w.r.t to [Set.S]. *) + +let rec intersect s1 s2 = (equal s1 s2) || + match (HNode.node s1,HNode.node s2) with + | Empty, _ -> false + | _, Empty -> false + | Leaf k1, _ -> mem k1 s2 + | _, Leaf k2 -> mem k2 s1 + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + intersect l1 l2 || intersect r1 r2 + else if m1 < m2 && match_prefix p2 p1 m1 then + intersect (if zero_bit p2 m1 then l1 else r1) s2 + else if m1 > m2 && match_prefix p1 p2 m2 then + intersect s1 (if zero_bit p1 m2 then l2 else r2) + else + false + + + +let rec uncons n = match HNode.node n with + | Empty -> raise Not_found + | Leaf k -> (k,empty) + | Branch (p,m,s,t) -> let h,ns = uncons s in h,branch_ne p m ns t + +let from_list l = List.fold_left (fun acc e -> add e acc) empty l + +END -- 2.17.1