From: kim Date: Mon, 9 Feb 2009 07:48:55 +0000 (+0000) Subject: Added missing files X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;ds=inline;h=d04661689691b4587cfc45a35e98604fcdc2b878;p=SXSI%2Fxpathcomp.git Added missing files git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@153 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/ata.ml b/ata.ml new file mode 100644 index 0000000..c12be8a --- /dev/null +++ b/ata.ml @@ -0,0 +1,543 @@ +(* Todo refactor and remove this alias *) +INCLUDE "debug.ml" +module Tree = Tree.Binary + +let gen_id() = Oo.id (object end) +module State = struct + + type t = int + let mk = gen_id + +end +let mk_state = State.mk + +type state = State.t + +type predicate = Ptset.t*Ptset.t -> Tree.t -> [ `True | `False | `Maybe ] + +type formula_expr = + | False | True + | Or of formula * formula + | And of formula * formula + | Atom of ([ `Left | `Right ]*bool*state*predicate option) +and formula = { fid: int; + pos : formula_expr; + neg : formula; + st : Ptset.t*Ptset.t; + } + + +module FormNode = +struct + type t = formula + let hash = function + | False -> 0 + | True -> 1 + | And(f1,f2) -> 2+17*f1.fid + 37*f2.fid + | Or(f1,f2) -> 3+101*f1.fid + 253*f2.fid + | Atom(d,b,s,_) -> 5+(if d=`Left then 11 else 19)*(if b then 23 else 31)*s + + let hash t = (hash t.pos) land max_int + + let equal f1 f2 = + match f1.pos,f2.pos with + | False,False | True,True -> true + | Atom(d1,b1,s1,_), Atom(d2,b2,s2,_) when (d1 = d2) && (b1=b2) &&(s1=s2) -> true + | Or(g1,g2),Or(h1,h2) + | And(g1,g2),And(h1,h2) -> g1.fid == h1.fid && g2.fid == h2.fid + | _ -> false +end +module WH = Weak.Make(FormNode) + +let f_pool = WH.create 107 + +let true_,false_ = + let rec t = { fid = 1; pos = True; neg = f ; st = Ptset.empty,Ptset.empty} + and f = { fid = 0; pos = False; neg = t; st = Ptset.empty,Ptset.empty } + in + WH.add f_pool f; + WH.add f_pool t; + t,f + +let is_true f = f.fid == 1 +let is_false f = f.fid == 0 + + +let cons pos neg s1 s2 = + let rec pnode = + { fid = gen_id (); + pos = pos; + neg = nnode; + st = s1; } + and nnode = { + fid = gen_id (); + pos = neg; + neg = pnode; + st = s2; + } + in + (WH.merge f_pool pnode),(WH.merge f_pool nnode) + +let atom_ ?(pred=None) d p s = + let si = Ptset.singleton s in + let ss = match d with + | `Left -> si,Ptset.empty + | `Right -> Ptset.empty,si + in fst (cons (Atom(d,p,s,pred)) (Atom(d,not p,s,pred)) ss ss ) + +let merge_states f1 f2 = + let sp = + Ptset.union (fst f1.st) (fst f2.st), + Ptset.union (snd f1.st) (snd f2.st) + and sn = + Ptset.union (fst f1.neg.st) (fst f2.neg.st), + Ptset.union (snd f1.neg.st) (snd f2.neg.st) + in + sp,sn + +let or_ f1 f2 = + if is_true f1 || is_true f2 then true_ + else if is_false f1 && is_false f2 then false_ + else if is_false f1 then f2 + else if is_false f2 then f1 + else + let sp,sn = merge_states f1 f2 in + fst (cons (Or(f1,f2)) (And(f1.neg,f2.neg)) sp sn) + + + +let and_ f1 f2 = + if is_true f1 && is_true f2 then true_ + else if is_false f1 || is_false f2 then false_ + else if is_true f1 then f2 + else if is_true f2 then f1 + else + let sp,sn = merge_states f1 f2 in + fst (cons (And(f1,f2)) (Or(f1.neg,f2.neg)) sp sn) + + +let not_ f = f.neg + +type property = [ `None | `Existential ] +let get_prop h s = + try + Hashtbl.find h s + with + Not_found -> `None + +type t = { + id : int; + states : Ptset.t; + init : Ptset.t; + final : Ptset.t; + universal : Ptset.t; + (* Transitions of the Alternating automaton *) + (* (tags,q) -> (marking,formula) *) + phi : ((TagSet.t*state),(bool*formula)) Hashtbl.t; + delta : (TagSet.t,(Ptset.t*bool*Ptset.t*Ptset.t)) Hashtbl.t; + properties : (state,property) Hashtbl.t; + } + + module Pair (X : Set.OrderedType) (Y : Set.OrderedType) = + struct + type t = X.t*Y.t + let compare (x1,y1) (x2,y2) = + let r = X.compare x1 x2 in + if r == 0 then Y.compare y1 y2 + else r + end + + module PL = Set.Make (Pair (Ptset) (Ptset)) + + + let pr_st ppf l = Format.fprintf ppf "{"; + begin + match l with + | [] -> () + | [s] -> Format.fprintf ppf " %i" s + | p::r -> Format.fprintf ppf " %i" p; + List.iter (fun i -> Format.fprintf ppf "; %i" i) r + end; + Format.fprintf ppf " }" + let rec pr_frm ppf f = match f.pos with + | True -> Format.fprintf ppf "⊤" + | False -> Format.fprintf ppf "⊤" + | And(f1,f2) -> + Format.fprintf ppf "("; + (pr_frm ppf f1); + Format.fprintf ppf ") ∧ ("; + (pr_frm ppf f2); + Format.fprintf ppf ")" + | Or(f1,f2) -> + (pr_frm ppf f1); + Format.fprintf ppf " ∨ "; + (pr_frm ppf f2); + | Atom(dir,b,s,p) -> Format.fprintf ppf "%s%s[%i]%s" + (if b then "" else "¬") + (if dir = `Left then "↓₁" else "↓₂")s + (match p with None -> "" | _ -> " ") + + let dnf_hash = Hashtbl.create 17 + + let rec dnf_aux f = match f.pos with + | False -> PL.empty + | True -> PL.singleton (Ptset.empty,Ptset.empty) + | Atom(`Left,_,s,_) -> PL.singleton (Ptset.singleton s,Ptset.empty) + | Atom(`Right,_,s,_) -> PL.singleton (Ptset.empty,Ptset.singleton s) + | Or(f1,f2) -> PL.union (dnf f1) (dnf f2) + | And(f1,f2) -> + let pl1 = dnf f1 + and pl2 = dnf f2 + in + PL.fold (fun (s1,s2) acc -> + PL.fold ( fun (s1', s2') acc' -> + (PL.add + ((Ptset.union s1 s1'), + (Ptset.union s2 s2')) acc') ) + pl2 acc ) + pl1 PL.empty + + + and dnf f = + try + Hashtbl.find dnf_hash f.fid + with + Not_found -> + let d = dnf_aux f in + Hashtbl.add dnf_hash f.fid d;d + + + let equal_form f1 f2 = + (f1.fid == f2.fid) || (FormNode.equal f1 f2) || (PL.equal (dnf f1) (dnf f2)) + + let alt_trans_to_nfa ?(accu=[]) ts s mark f = + (* todo memoize *) + let f' = dnf f in + PL.fold (fun (s1,s2) acc -> (ts,s,mark,s1,s2)::acc) f' accu + + + let possible_trans ?(accu=[]) a q tag = + (* todo change the data structure to avoid creating (,) *) + let ata_trans = + Hashtbl.fold (fun (ts,s) (m,f) acc -> + if (q==s) && (TagSet.mem tag ts) + then (ts,s,m,f)::acc + else acc) a.phi [] + in + if ata_trans != [] + then begin + List.iter (fun (ts,s,m,f) -> + (* The following builds too many transitions in the nfa + let ts' = TagSet.remove tag ts + in + Hashtbl.remove a.phi (ts,s); + if not (TagSet.is_empty ts') + then Hashtbl.add a.phi (ts',s) (m,f) + *) + Hashtbl.remove a.phi (ts,s) + ) ata_trans; + (* let tstag = TagSet.tag tag in *) + let nfa_trs = List.fold_left (fun acc (ts,s,m,f) -> + alt_trans_to_nfa ~accu:acc ts s m f) [] ata_trans + in + List.iter (fun (ts,s,m,s1,s2) -> + Hashtbl.add a.delta ts ((Ptset.singleton s),m,s1,s2)) nfa_trs + end; + Hashtbl.fold (fun ts (s,m,s1,s2) acc -> + if (Ptset.mem q s) && (TagSet.mem tag ts) + then (m,s1,s2)::acc else acc) a.delta accu + + let dump ppf a = + Format.fprintf ppf "Automaton (%i) :\n" a.id; + Format.fprintf ppf "States : "; pr_st ppf (Ptset.elements a.states); + Format.fprintf ppf "\nInitial states : "; pr_st ppf (Ptset.elements a.init); + Format.fprintf ppf "\nFinal states : "; pr_st ppf (Ptset.elements a.final); + Format.fprintf ppf "\nUniversal states : "; pr_st ppf (Ptset.elements a.universal); + Format.fprintf ppf "\nAlternating transitions :\n------------------------------\n"; + let l = Hashtbl.fold (fun k t acc -> (k,t)::acc) a.phi [] in + let l = List.sort (fun ((tsx,x),_) ((tsy,y),_) -> if x-y == 0 then TagSet.compare tsx tsy else x-y) l in + List.iter (fun ((ts,q),(b,f)) -> + + let s = + try + Tag.to_string (TagSet.choose ts) + with + | _ -> "*" + in + Format.fprintf ppf "(%s,%i) %s " s q (if b then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf "\n")l; + + Format.fprintf ppf "NFA transitions :\n------------------------------\n"; + Hashtbl.iter (fun (ts) (q,b,s1,s2) -> + + let s = + try + Tag.to_string (TagSet.choose ts) + with + | _ -> "*" + in + pr_st ppf (Ptset.elements q); + Format.fprintf ppf ",%s %s " s (if b then "=>" else "->"); + Format.fprintf ppf "("; + pr_st ppf (Ptset.elements s1); + Format.fprintf ppf ","; + pr_st ppf (Ptset.elements s2); + Format.fprintf ppf ")\n" ) a.delta; + Format.fprintf ppf "=======================================\n" + + module Transitions = struct + type t = state*TagSet.t*bool*formula + let ( ?< ) x = x + let ( >< ) state label = state,label + let ( >=> ) (state,(label,mark)) form = (state,label,mark,form) + let ( +| ) f1 f2 = or_ f1 f2 + let ( *& ) f1 f2 = and_ f1 f2 + let ( ** ) d s = atom_ d true s + + + end + type transition = Transitions.t + + let equal_trans (q1,t1,m1,f1) (q2,t2,m2,f2) = + (q1 == q2) && (TagSet.equal t1 t2) && (m1 == m2) && (equal_form f1 f2) + + module TS : Set.S with type elt = Tree.t = Set.Make(Tree) + let res = ref TS.empty + + + module BottomUpNew = struct + +IFDEF DEBUG +THEN + type trace = + | TNil of Ptset.t*Ptset.t + | TNode of Ptset.t*Ptset.t*bool* (int*bool*formula) list + + let traces = Hashtbl.create 17 + let dump_trace t = + let out = open_out "debug_trace.dot" + in + let outf = Format.formatter_of_out_channel out in + + let rec aux t num = + if Tree.is_node t + then + match (try Hashtbl.find traces (Tree.id t) with Not_found -> TNil(Ptset.empty,Ptset.empty)) with + | TNode(r,s,mark,trs) -> + let numl = aux (Tree.left t) num in + let numr = aux (Tree.right t) (numl+1) in + let mynum = numr + 1 in + Format.fprintf outf "n%i [ label=\"<%s>\\nr=" mynum (Tag.to_string (Tree.tag t)); + pr_st outf (Ptset.elements r); + Format.fprintf outf "\\ns="; + pr_st outf (Ptset.elements s); + List.iter (fun (q,m,f) -> + Format.fprintf outf "\\n%i %s" q (if m then "⇨" else "→"); + pr_frm outf f ) trs; + Format.fprintf outf "\", %s shape=box ];\n" + (if mark then "color=cyan1, style=filled," else ""); + let _ = Format.fprintf outf "n%i -> n%i;\n" mynum numl in + let _ = Format.fprintf outf "n%i -> n%i;\n" mynum numr in + mynum + | TNil(r,s) -> Format.fprintf outf "n%i [ shape=box, label=\"Nil\\nr=" num; + pr_st outf (Ptset.elements r); + Format.fprintf outf "\\ns="; + pr_st outf (Ptset.elements s); + Format.fprintf outf "\"];\n";num + else + match Hashtbl.find traces (-10) with + | TNil(r,s) -> + Format.fprintf outf "n%i [ shape=box, label=\"Nil\\nr=" num; + pr_st outf (Ptset.elements r); + Format.fprintf outf "\\ns="; + pr_st outf (Ptset.elements s); + Format.fprintf outf "\"];\n"; + num + | _ -> assert false + + in + Format.fprintf outf "digraph G {\n"; + ignore(aux t 0); + Format.fprintf outf "}\n%!"; + close_out out; + ignore(Sys.command "dot -Tsvg debug_trace.dot > debug_trace.svg") +END + + + + let hfeval = Hashtbl.create 17 + let miss = ref 0 + let call = ref 0 + let rec findlist s1 s2 = function + | [] -> raise Not_found + | ((ss1,ss2),r)::_ when + (not (Ptset.is_empty s1)) && (Ptset.subset s1 ss1) && + (not (Ptset.is_empty s2)) && (Ptset.subset s2 ss2) -> r + | _::r -> findlist s1 s2 r + + let eval_form f s1 s2 res1 res2 = + + let rec eval_aux f = match f.pos with + | Atom(`Left,b,q,_) -> if b == (Ptset.mem q s1) then (true,res1) else false,TS.empty + | Atom(`Right,b,q,_) -> if b == (Ptset.mem q s2) then (true,res2) else false,TS.empty + | True -> true,(TS.union res1 res2) + | False -> false,TS.empty + | Or(f1,f2) -> + let b1,r1 = eval_aux f1 + and b2,r2 = eval_aux f2 + in + let r1 = if b1 then r1 else TS.empty + and r2 = if b2 then r2 else TS.empty + in (b1 || b2, TS.union r1 r2) + + | And(f1,f2) -> + let b1,r1 = eval_aux f1 + and b2,r2 = eval_aux f2 + in + if b1 && b2 then (true, TS.union r1 r2) + else (false,TS.empty) + + in incr call;eval_aux f + + + (* If true, then the formule may evaluate to true in the future, + if false it will always return false, i.e. necessary conditions are not + satisfied + *) + + let val3 = function true -> `True + | false -> `False + + let or3 a b = match a,b with + | `True,_ | _,`True -> `True + | `False,`False -> `False + | _ -> `Maybe + + let and3 a b = match a,b with + | `True,`True -> `True + | `False,_ | _,`False -> `False + | _ -> `Maybe + let not3 = function + | `True -> `False + | `False -> `True + | `Maybe -> `Maybe + + let true3 = function true -> `Maybe + | false -> `False + + let may_eval (s1,s2) f t = + let rec aux f = match f.pos with + | True -> `True + | False -> `False + | Or(f1,f2) -> or3 (aux f1) (aux f2) + | And(f1,f2) -> and3 (aux f1) (aux f2) + | Atom(dir,b,q,predo) -> + and3 (true3 ((Ptset.mem q (match dir with + | `Left -> s1 + | `Right -> s2)) == b)) + (match predo with + | Some pred -> (pred (s1,s2) t) + | None -> `True) + + in aux f + + let rec accepting_among a t r = + let r = Ptset.diff r a.final in + let rest = Ptset.inter a.final r in + if Ptset.is_empty r then r,TS.empty else + if (not (Tree.is_node t)) + then + let _ = D(Hashtbl.add traces (-10) (TNil(r,Ptset.inter a.final r))) + in + Ptset.inter a.final r,TS.empty + else + let tag = Tree.tag t + and t1 = Tree.first_child t + and t2 = Tree.next_sibling t + in + let r1,r2,trs = + Hashtbl.fold (fun (ts,q) ((m,f)as tr) ((ar1,ar2,lt)as acc) -> + if (TagSet.mem tag ts) && Ptset.mem q r + then begin + (* Format.fprintf Format.err_formatter "Tree with tag %s qualifies for transition : (%s,%i)%s" + (Tag.to_string tag) + (try + Tag.to_string (TagSet.choose ts) + with + | _ -> "*" ) + q + (if m then "=>" else "->"); + pr_frm Format.err_formatter f; + Format.fprintf Format.err_formatter "\n"; *) + let ls,rs = f.st in + Ptset.union ls ar1,Ptset.union rs ar2,(q,tr)::lt + end + else acc + ) a.phi (Ptset.empty,Ptset.empty,[]) + in + let rtrue,rfalse,rmay,trs,selnodes = + List.fold_left (fun (at,af,am,atrs,selnodes) (q,(m,f)) -> + let ppf = Format.err_formatter in + match (*may_eval (r1,r2) f t *) `Maybe with + | `True -> + (* Format.fprintf ppf "Will skip (%i) %s " q (if m then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf ", always true \n"; *) + (Ptset.add q at),af,am,atrs,TS.add t selnodes + | `False -> + (*Format.fprintf ppf "Will skip (%i) %s " q (if m then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf ", always false \n"; *) + at,(Ptset.add q af),am,atrs,selnodes + + | `Maybe -> +(* Format.fprintf ppf "Must take (%i) %s " q (if m then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf "\n"; *) + at,af,(Ptset.add q am),(q,(m,f))::atrs,selnodes) + (Ptset.empty,Ptset.empty,Ptset.empty,[],TS.empty) trs + in + let rr1,rr2,trs = + List.fold_left (fun ((ar1,ar2,trs)as acc) ((q,(_,f)as tr)) -> + if Ptset.mem q rmay + then let ls,rs = f.st in + Ptset.union ls ar1,Ptset.union rs ar2,tr::trs + else acc) (Ptset.empty,Ptset.empty,[]) trs + in + let s1,res1 = accepting_among a t1 rr1 + and s2,res2 = accepting_among a t2 rr2 + in + let res,set,mark,trs = List.fold_left (fun ((sel_nodes,res,amark,acctr) as acc) (q,(mark,f)) -> + let b,resnodes = eval_form f s1 s2 res1 res2 in + (* if b then begin + pr_st Format.err_formatter (Ptset.elements s1); + Format.fprintf Format.err_formatter ","; + pr_st Format.err_formatter (Ptset.elements s2); + Format.fprintf Format.err_formatter " satisfies "; + pr_frm Format.err_formatter f; + Format.fprintf Format.err_formatter " for input tree %s\n" (Tag.to_string tag); + end; *) + if b + then + (TS.union + (if mark then TS.add t resnodes else resnodes) + sel_nodes) + ,Ptset.add q res,amark||mark,(q,mark,f)::acctr + else acc + ) (TS.empty,rtrue,false,[]) trs + in + + let set = Ptset.union a.final set in + let _ = D(Hashtbl.add traces (Tree.id t) (TNode(r,set,mark,trs))) in + set,res + + + let run a t = + let st,res = accepting_among a t a.init in + let b = Ptset.is_empty (st) in + let _ = D(dump_trace t) in + if b then [] + else (TS.elements res) + + end diff --git a/ata.mli b/ata.mli new file mode 100644 index 0000000..f5e2c4f --- /dev/null +++ b/ata.mli @@ -0,0 +1,56 @@ +type state = int +val mk_state : unit -> state + +type predicate = Ptset.t*Ptset.t -> Tree.Binary.t -> [ `True | `False | `Maybe ] +type formula_expr = + False + | True + | Or of formula * formula + | And of formula * formula + | Atom of ([ `Left | `Right ] * bool * state * predicate option) +and formula = { fid : int; pos : formula_expr; neg : formula; st : Ptset.t*Ptset.t;} +val true_ : formula +val false_ : formula +val atom_ : ?pred:predicate option -> [`Left | `Right ] -> bool -> state -> formula +val and_ : formula -> formula -> formula +val or_ : formula -> formula -> formula +val not_ : formula -> formula +val equal_form : formula -> formula -> bool +val pr_frm : Format.formatter -> formula -> unit + + +type property = [ `None | `Existential ] + +type t = { + id : int; + states : Ptset.t; + init : Ptset.t; + final : Ptset.t; + universal : Ptset.t; + phi : (TagSet.t * state, bool * formula) Hashtbl.t; + delta : (TagSet.t, Ptset.t * bool * Ptset.t * Ptset.t) Hashtbl.t; + properties : (state,property) Hashtbl.t; +} +val dump : Format.formatter -> t -> unit + +module Transitions : sig +type t = state*TagSet.t*bool*formula +(* Doing this avoid the parenthesis *) +val ( ?< ) : state -> state +val ( >< ) : state -> TagSet.t*bool -> state*(TagSet.t*bool) +val ( >=> ) : state*(TagSet.t*bool) -> formula -> t +val ( +| ) : formula -> formula -> formula +val ( *& ) : formula -> formula -> formula +val ( ** ) : [`Left | `Right ] -> state -> formula + +end +type transition = Transitions.t +val equal_trans : transition -> transition -> bool + + +module BottomUpNew : +sig + val miss : int ref + val call : int ref + val run : t -> Tree.Binary.t -> Tree.Binary.t list +end diff --git a/custom.ml b/custom.ml new file mode 100644 index 0000000..a71f32a --- /dev/null +++ b/custom.ml @@ -0,0 +1,138 @@ +(* also taken from CDuce misc/custom.ml + this module should always be included not referenced with Open +*) + +module Dummy = +struct + let dump _ _ = failwith "dump not implemented" + let check _ = failwith "check not implemented" + let equal _ _ = failwith "equal not implemented" + let hash _ = failwith "hash not implemented" + let compare _ _ = failwith "compare not implemented" + let print _ _ = failwith "print not implemented" +end + +(* Some of this borrowed from Jean-Christophe Filliâtre : + http://www.lri.fr/~filliatr/ftp/ocaml/ds/bitset.ml.html +*) + +module IntSet : Set.S with type elt = int= +struct + let max = Sys.word_size - 2 + type t = int + type elt = int + + let empty = 0 + let full = -1 + let is_empty x = x == 0 + let mem e s = ((1 lsl e) land s) != 0 + let add e s = (1 lsl e) lor s + let singleton e = (1 lsl e) + let union = (lor) + let inter = (land) + let diff a b = a land (lnot b) + let remove e s = (lnot (1 lsl e) land s) + let compare = (-) + let equal = (==) + let subset a b = a land (lnot b) == 0 + let cardinal s = + let rec loop n s = + if s == 0 then n else loop (succ n) (s - (s land (-s))) + in + loop 0 s +(* inverse of bit i = 1 lsl i i.e. tib i = log_2(i) *) +let log2 = Array.create 255 0 +let () = for i = 0 to 7 do log2.(1 lsl i) <- i done + +(* assumption: x is a power of 2 *) +let tib32 x = + if x land 0xFFFF == 0 then + let x = x lsr 16 in + if x land 0xFF == 0 then 24 + log2.(x lsr 8) else 16 + log2.(x) + else + if x land 0xFF == 0 then 8 + log2.(x lsr 8) else log2.(x) + +let ffffffff = (0xffff lsl 16) lor 0xffff +let tib64 x = + if x land ffffffff == 0 then 32 + tib32 (x lsr 32) else tib32 x + +let tib = + match Sys.word_size with 32 -> tib32 | 64 -> tib64 | _ -> assert false + +let min_elt s = + if s == 0 then raise Not_found; + tib (s land (-s)) + +let choose = min_elt + +(* TODO: improve? *) +let max_elt s = + if s == 0 then raise Not_found; + let rec loop i = + if s land i != 0 then tib i + else if i = 1 then raise Not_found else loop (i lsr 1) + in + loop min_int + +let rec elements s = + if s == 0 then [] else let i = s land (-s) in tib i :: elements (s - i) + +let rec iter f s = + if s != 0 then let i = s land (-s) in f (tib i); iter f (s - i) + +let rec fold f s acc = + if s == 0 then acc else let i = s land (-s) in fold f (s - i) (f (tib i) acc) + +let rec for_all p s = + s == 0 || let i = s land (-s) in p (tib i) && for_all p (s - i) + +let rec exists p s = + s != 0 && let i = s land (-s) in p (tib i) || exists p (s - i) + +let rec filter p s = + if s == 0 then + 0 + else + let i = s land (-s) in + let s = filter p (s - i) in + if p (tib i) then s + i else s + +let rec partition p s = + if s == 0 then + 0, 0 + else + let i = s land (-s) in + let st,sf = partition p (s - i) in + if p (tib i) then st + i, sf else st, sf + i + +let split i s = + let bi = 1 lsl i in + s land (bi - 1), s land bi != 0, s land (-1 lsl (i+1)) +end + + +module Bool = +struct + module Make (X : Sigs.T) (Y : Sigs.T) : + Sigs.T with type t = X.t*Y.t = + struct + module Fst = X + module Snd = Y + type t = X.t*Y.t + let dump ppf (x,y) = + X.dump ppf x; + Y.dump ppf y + + let check (x,y) = X.check x; Y.check y + let equal (x,y) (z,t) = + X.equal x z && Y.equal y t + let hash (x,y) = (X.hash x) + 4093 * Y.hash y + let compare (x,y) (z,t) = + let r = X.compare x z in + if r == 0 + then Y.compare y t + else r + + let print _ _ = failwith "compare not implemented" + end +end diff --git a/finiteCofinite.ml b/finiteCofinite.ml new file mode 100644 index 0000000..32f0e48 --- /dev/null +++ b/finiteCofinite.ml @@ -0,0 +1,183 @@ +(******************************************************************************) +(* SXSI : XPath evaluator *) +(* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) +(* Copyright NICTA 2008 *) +(* Distributed under the terms of the LGPL (see LICENCE) *) +(******************************************************************************) + +exception InfiniteSet +module type S = +sig + type elt + type t + val empty : t + val any : t + val is_empty : t -> bool + val is_any : t -> bool + val is_finite : t -> bool + val kind : t -> [ `Finite | `Cofinite ] + val singleton : elt -> t + val mem : elt -> t -> bool + val add : elt -> t -> t + val remove : elt -> t -> t + val cup : t -> t -> t + val cap : t -> t -> t + val diff : t -> t -> t + val neg : t -> t + val compare : t -> t -> int + val subset : t -> t -> bool + val kind_split : t list -> t * t + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val for_all : (elt -> bool) -> t -> bool + val exists : (elt -> bool) -> t -> bool + val filter : (elt -> bool) -> t -> t + val partition : (elt -> bool) -> t -> t * t + val cardinal : t -> int + val elements : t -> elt list + val from_list : elt list -> t + val choose : t -> elt + val hash : t -> int + val equal : t -> t -> bool +end + +module Make (E : Sigs.Set) : S with type elt = E.elt = +struct + + type elt = E.elt + type t = Finite of E.t | CoFinite of E.t + + + let empty = Finite E.empty + let any = CoFinite E.empty + + let is_empty = function + Finite s when E.is_empty s -> true + | _ -> false + + let is_any = function + CoFinite s when E.is_empty s -> true + | _ -> false + + let is_finite = function + | Finite _ -> true | _ -> false + + let kind = function + Finite _ -> `Finite + | _ -> `Cofinite + + let mem x = function Finite s -> E.mem x s + | CoFinite s -> not (E.mem x s) + + let singleton x = Finite (E.singleton x) + let add e = function + | Finite s -> Finite (E.add e s) + | CoFinite s -> CoFinite (E.remove e s) + let remove e = function + | Finite s -> Finite (E.remove e s) + | CoFinite s -> CoFinite (E.add e s) + + let cup s t = match (s,t) with + | Finite s, Finite t -> Finite (E.union s t) + | CoFinite s, CoFinite t -> CoFinite ( E.inter s t) + | Finite s, CoFinite t -> CoFinite (E.diff t s) + | CoFinite s, Finite t-> CoFinite (E.diff s t) + + let cap s t = match (s,t) with + | Finite s, Finite t -> Finite (E.inter s t) + | CoFinite s, CoFinite t -> CoFinite (E.union s t) + | Finite s, CoFinite t -> Finite (E.diff s t) + | CoFinite s, Finite t-> Finite (E.diff t s) + + let diff s t = match (s,t) with + | Finite s, Finite t -> Finite (E.diff s t) + | Finite s, CoFinite t -> Finite(E.inter s t) + | CoFinite s, Finite t -> CoFinite(E.union t s) + | CoFinite s, CoFinite t -> Finite (E.diff t s) + + let neg = function + | Finite s -> CoFinite s + | CoFinite s -> Finite s + + let compare s t = match (s,t) with + | Finite s , Finite t -> E.compare s t + | CoFinite s , CoFinite t -> E.compare t s + | Finite _, CoFinite _ -> -1 + | CoFinite _, Finite _ -> 1 + + let subset s t = match (s,t) with + | Finite s , Finite t -> E.subset s t + | CoFinite s , CoFinite t -> E.subset t s + | Finite s, CoFinite t -> E.is_empty (E.inter s t) + | CoFinite _, Finite _ -> false + + (* given a list l of type t list, + returns two sets (f,c) where : + - f is the union of all the finite sets of l + - c is the union of all the cofinite sets of l + - f and c are disjoint + Invariant : cup f c = List.fold_left cup empty l + + We treat the CoFinite part explicitely : + *) + + let kind_split l = + + let rec next_finite_cofinite facc cacc = function + | [] -> Finite facc, CoFinite (E.diff cacc facc) + | Finite s ::r -> next_finite_cofinite (E.union s facc) cacc r + | CoFinite _ ::r when E.is_empty cacc -> next_finite_cofinite facc cacc r + | CoFinite s ::r -> next_finite_cofinite facc (E.inter cacc s) r + in + let rec first_cofinite facc = function + | [] -> empty,empty + | Finite s :: r-> first_cofinite (E.union s facc) r + | CoFinite s :: r -> next_finite_cofinite facc s r + in + first_cofinite E.empty l + + let fold f t a = match t with + | Finite s -> E.fold f s a + | CoFinite _ -> raise InfiniteSet + + let for_all f = function + | Finite s -> E.for_all f s + | CoFinite _ -> raise InfiniteSet + + let exists f = function + | Finite s -> E.exists f s + | CoFinite _ -> raise InfiniteSet + + let filter f = function + | Finite s -> Finite (E.filter f s) + | CoFinite _ -> raise InfiniteSet + + let partition f = function + | Finite s -> let a,b = E.partition f s in Finite a,Finite b + | CoFinite _ -> raise InfiniteSet + + let cardinal = function + | Finite s -> E.cardinal s + | CoFinite _ -> raise InfiniteSet + + let elements = function + | Finite s -> E.elements s + | CoFinite _ -> raise InfiniteSet + + let from_list l = + Finite(List.fold_left (fun x a -> E.add a x ) E.empty l) + + let choose = function + Finite s -> E.choose s + | _ -> raise InfiniteSet + + let equal a b = + match a,b with + | Finite x, Finite y | CoFinite x, CoFinite y -> E.equal x y + | _ -> false + + let hash = + function Finite x -> (E.hash x) + | CoFinite x -> ( ~-(E.hash x) land max_int) + +end + diff --git a/finiteCofinite.mli b/finiteCofinite.mli new file mode 100644 index 0000000..b489557 --- /dev/null +++ b/finiteCofinite.mli @@ -0,0 +1,38 @@ +exception InfiniteSet + +module type S = + sig + type elt + type t + val empty : t + val any : t + val is_empty : t -> bool + val is_any : t -> bool + val is_finite : t -> bool + val kind : t -> [ `Cofinite | `Finite ] + val singleton : elt -> t + val mem : elt -> t -> bool + val add : elt -> t -> t + val remove : elt -> t -> t + val cup : t -> t -> t + val cap : t -> t -> t + val diff : t -> t -> t + val neg : t -> t + val compare : t -> t -> int + val subset : t -> t -> bool + val kind_split : t list -> t * t + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val for_all : (elt -> bool) -> t -> bool + val exists : (elt -> bool) -> t -> bool + val filter : (elt -> bool) -> t -> t + val partition : (elt -> bool) -> t -> t * t + val cardinal : t -> int + val elements : t -> elt list + val from_list : elt list -> t + val choose : t -> elt + val hash : t -> int + val equal : t -> t -> bool + end + +module Make : functor (E : Sigs.Set) -> S with type elt = E.elt + diff --git a/ptset.ml b/ptset.ml new file mode 100644 index 0000000..5c029f7 --- /dev/null +++ b/ptset.ml @@ -0,0 +1,362 @@ +(***************************************************************************) +(* Implementation for sets of positive integers implemented as deeply hash-*) +(* consed Patricia trees. Provide fast set operations, fast membership as *) +(* well as fast min and max elements. Hash consing provides O(1) equality *) +(* checking *) +(* *) +(***************************************************************************) + + +type elt = int + +type t = { id : int; + key : int; (* hash *) + node : node } +and node = + | Empty + | Leaf of int + | Branch of int * int * t * t + +module Node = + struct + type _t = t + type t = _t + let hash x = x.key + let hash_node = function + | Empty -> 0 + | Leaf i -> i+1 + (* power of 2 +/- 1 are fast ! *) + | Branch (b,i,l,r) -> + (b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key + + (r.key lsl 7) - r.key + let hash_node x = (hash_node x) land max_int + let equal x y = match (x.node,y.node) with + | Empty,Empty -> true + | Leaf k1, Leaf k2 when k1 == k2 -> true + | Branch(p1,m1,l1,r1), Branch(p2,m2,l2,r2) when m1==m2 && p1==p2 && + (l1.id == l2.id) && (r1.id == r2.id) -> true + | _ -> false + end + +module WH = Weak.Make(Node) + +let pool = WH.create 4093 + +(* Neat trick thanks to Alain Frisch ! *) + +let gen_uid () = Oo.id (object end) + +let empty = { id = gen_uid (); + key = 0; + node = Empty } + +let _ = WH.add pool empty + +let is_empty = function { id = 0 } -> true | _ -> false + +let rec norm n = + let v = { id = gen_uid (); + key = Node.hash_node n; + node = n } + in + WH.merge pool v + +(* WH.merge pool *) + +let branch (p,m,l,r) = norm (Branch(p,m,l,r)) +let leaf k = norm (Leaf k) + +(* To enforce the invariant that a branch contains two non empty sub-trees *) +let branch_ne = function + | (_,_,e,t) when is_empty e -> t + | (_,_,t,e) when is_empty e -> t + | (p,m,t0,t1) -> branch (p,m,t0,t1) + +(********** from here on, only use the smart constructors *************) + +let zero_bit k m = (k land m) == 0 + +let singleton k = if k < 0 then failwith "singleton" else leaf k + +let rec mem k n = match n.node with + | Empty -> false + | Leaf j -> k == j + | Branch (p, _, l, r) -> if k <= p then mem k l else mem k r + +let rec min_elt n = match n.node with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_,_,s,_) -> min_elt s + + let rec max_elt n = match n.node with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_,_,_,t) -> max_elt t + + let elements s = + let rec elements_aux acc n = match n.node with + | Empty -> acc + | Leaf k -> k :: acc + | Branch (_,_,l,r) -> elements_aux (elements_aux acc r) l + in + elements_aux [] s + + let mask k m = (k lor (m-1)) land (lnot m) + + let naive_highest_bit x = + assert (x < 256); + let rec loop i = + if i = 0 then 1 else if x lsr i = 1 then 1 lsl i else loop (i-1) + in + loop 7 + + let hbit = Array.init 256 naive_highest_bit + + let highest_bit_32 x = + let n = x lsr 24 in if n != 0 then hbit.(n) lsl 24 + else let n = x lsr 16 in if n != 0 then hbit.(n) lsl 16 + else let n = x lsr 8 in if n != 0 then hbit.(n) lsl 8 + else hbit.(x) + + let highest_bit_64 x = + let n = x lsr 32 in if n != 0 then (highest_bit_32 n) lsl 32 + else highest_bit_32 x + + let highest_bit = match Sys.word_size with + | 32 -> highest_bit_32 + | 64 -> highest_bit_64 + | _ -> assert false + + let branching_bit p0 p1 = highest_bit (p0 lxor p1) + + let join (p0,t0,p1,t1) = + let m = branching_bit p0 p1 in + if zero_bit p0 m then + branch (mask p0 m, m, t0, t1) + else + branch (mask p0 m, m, t1, t0) + + let match_prefix k p m = (mask k m) == p + + let add k t = + let rec ins n = match n.node with + | Empty -> leaf k + | Leaf j -> if j == k then n else join (k, leaf k, j, n) + | Branch (p,m,t0,t1) -> + if match_prefix k p m then + if zero_bit k m then + branch (p, m, ins t0, t1) + else + branch (p, m, t0, ins t1) + else + join (k, leaf k, p, n) + in + ins t + + let remove k t = + let rec rmv n = match n.node with + | Empty -> empty + | Leaf j -> if k == j then empty else n + | Branch (p,m,t0,t1) -> + if match_prefix k p m then + if zero_bit k m then + branch_ne (p, m, rmv t0, t1) + else + branch_ne (p, m, t0, rmv t1) + else + n + in + rmv t + + (* should run in O(1) thanks to Hash consing *) + + let equal = (=) + + let compare = compare + + + let rec merge (s,t) = + if (equal s t) (* This is cheap thanks to hash-consing *) + then s + else + match s.node,t.node with + | Empty, _ -> t + | _, Empty -> s + | Leaf k, _ -> add k t + | _, Leaf k -> add k s + | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> + if m == n && match_prefix q p m then + branch (p, m, merge (s0,t0), merge (s1,t1)) + else if m > n && match_prefix q p m then + if zero_bit q m then + branch (p, m, merge (s0,t), s1) + else + branch (p, m, s0, merge (s1,t)) + else if m < n && match_prefix p q n then + + if zero_bit p n then + branch (q, n, merge (s,t0), t1) + else + branch (q, n, t0, merge (s,t1)) + else + (* The prefixes disagree. *) + join (p, s, q, t) + + let union s t = merge (s,t) + + let rec subset s1 s2 = (equal s1 s2) || + match (s1.node,s2.node) with + | Empty, _ -> true + | _, Empty -> false + | Leaf k1, _ -> mem k1 s2 + | Branch _, Leaf _ -> false + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + subset l1 l2 && subset r1 r2 + else if m1 < m2 && match_prefix p1 p2 m2 then + if zero_bit p1 m2 then + subset l1 l2 && subset r1 l2 + else + subset l1 r2 && subset r1 r2 + else + false + + let rec inter s1 s2 = + if (equal s1 s2) + then s1 + else + match (s1.node,s2.node) with + | Empty, _ -> empty + | _, Empty -> empty + | Leaf k1, _ -> if mem k1 s2 then s1 else empty + | _, Leaf k2 -> if mem k2 s1 then s2 else empty + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + merge (inter l1 l2, inter r1 r2) + else if m1 > m2 && match_prefix p2 p1 m1 then + inter (if zero_bit p2 m1 then l1 else r1) s2 + else if m1 < m2 && match_prefix p1 p2 m2 then + inter s1 (if zero_bit p1 m2 then l2 else r2) + else + empty + + let rec diff s1 s2 = + if (equal s1 s2) + then empty + else + match (s1.node,s2.node) with + | Empty, _ -> empty + | _, Empty -> s1 + | Leaf k1, _ -> if mem k1 s2 then empty else s1 + | _, Leaf k2 -> remove k2 s1 + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + merge (diff l1 l2, diff r1 r2) + else if m1 > m2 && match_prefix p2 p1 m1 then + if zero_bit p2 m1 then + merge (diff l1 s2, r1) + else + merge (l1, diff r1 s2) + else if m1 < m2 && match_prefix p1 p2 m2 then + if zero_bit p1 m2 then diff s1 l2 else diff s1 r2 + else + s1 + + + + +(*s All the following operations ([cardinal], [iter], [fold], [for_all], + [exists], [filter], [partition], [choose], [elements]) are + implemented as for any other kind of binary trees. *) + +let rec cardinal n = match n.node with + | Empty -> 0 + | Leaf _ -> 1 + | Branch (_,_,t0,t1) -> cardinal t0 + cardinal t1 + +let rec iter f n = match n.node with + | Empty -> () + | Leaf k -> f k + | Branch (_,_,t0,t1) -> iter f t0; iter f t1 + +let rec fold f s accu = match s.node with + | Empty -> accu + | Leaf k -> f k accu + | Branch (_,_,t0,t1) -> fold f t0 (fold f t1 accu) + +let rec for_all p n = match n.node with + | Empty -> true + | Leaf k -> p k + | Branch (_,_,t0,t1) -> for_all p t0 && for_all p t1 + +let rec exists p n = match n.node with + | Empty -> false + | Leaf k -> p k + | Branch (_,_,t0,t1) -> exists p t0 || exists p t1 + +let rec filter pr n = match n.node with + | Empty -> empty + | Leaf k -> if pr k then n else empty + | Branch (p,m,t0,t1) -> branch_ne (p, m, filter pr t0, filter pr t1) + +let partition p s = + let rec part (t,f as acc) n = match n.node with + | Empty -> acc + | Leaf k -> if p k then (add k t, f) else (t, add k f) + | Branch (_,_,t0,t1) -> part (part acc t0) t1 + in + part (empty, empty) s + +let rec choose n = match n.node with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *) + + +let split x s = + let coll k (l, b, r) = + if k < x then add k l, b, r + else if k > x then l, b, add k r + else l, true, r + in + fold coll s (empty, false, empty) + + + +let rec dump n = + Printf.eprintf "{ id = %i; key = %i ; node=" n.id n.key; + match n.node with + | Empty -> Printf.eprintf "Empty; }\n" + | Leaf k -> Printf.eprintf "Leaf %i; }\n" k + | Branch (p,m,l,r) -> + Printf.eprintf "Branch(%i,%i,id=%i,id=%i); }\n" + p m l.id r.id; + dump l; + dump r + +(*i*) +let make l = List.fold_left (fun acc e -> add e acc ) empty l +(*i*) + +(*s Additional functions w.r.t to [Set.S]. *) + +let rec intersect s1 s2 = (equal s1 s2) || + match (s1.node,s2.node) with + | Empty, _ -> false + | _, Empty -> false + | Leaf k1, _ -> mem k1 s2 + | _, Leaf k2 -> mem k2 s1 + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + intersect l1 l2 || intersect r1 r2 + else if m1 < m2 && match_prefix p2 p1 m1 then + intersect (if zero_bit p2 m1 then l1 else r1) s2 + else if m1 > m2 && match_prefix p1 p2 m2 then + intersect s1 (if zero_bit p1 m2 then l2 else r2) + else + false + + +let hash s = s.key + +let from_list l = List.fold_left (fun acc i -> add i acc) empty l diff --git a/ptset.mli b/ptset.mli new file mode 100644 index 0000000..c36a08d --- /dev/null +++ b/ptset.mli @@ -0,0 +1,89 @@ +(**************************************************************************) +(* *) +(* Copyright (C) Jean-Christophe Filliatre *) +(* *) +(* This software is free software; you can redistribute it and/or *) +(* modify it under the terms of the GNU Library General Public *) +(* License version 2.1, with the special exception on linking *) +(* described in file LICENSE. *) +(* *) +(* This software is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) +(* *) +(**************************************************************************) + +(*i $Id: ptset.mli,v 1.10 2008-07-21 14:53:06 filliatr Exp $ i*) + +(*s Sets of integers implemented as Patricia trees. The following + signature is exactly [Set.S with type elt = int], with the same + specifications. This is a purely functional data-structure. The + performances are similar to those of the standard library's module + [Set]. The representation is unique and thus structural comparison + can be performed on Patricia trees. *) + +type t + +type elt = int + +val empty : t + +val is_empty : t -> bool + +val mem : int -> t -> bool + +val add : int -> t -> t + +val singleton : int -> t + +val remove : int -> t -> t + +val union : t -> t -> t + +val subset : t -> t -> bool + +val inter : t -> t -> t + +val diff : t -> t -> t + +val equal : t -> t -> bool + +val compare : t -> t -> int + +val elements : t -> int list + +val choose : t -> int + +val cardinal : t -> int + +val iter : (int -> unit) -> t -> unit + +val fold : (int -> 'a -> 'a) -> t -> 'a -> 'a + +val for_all : (int -> bool) -> t -> bool + +val exists : (int -> bool) -> t -> bool + +val filter : (int -> bool) -> t -> t + +val partition : (int -> bool) -> t -> t * t + +val split : int -> t -> t * bool * t + +(*s Warning: [min_elt] and [max_elt] are linear w.r.t. the size of the + set. In other words, [min_elt t] is barely more efficient than [fold + min t (choose t)]. *) + +val min_elt : t -> int +val max_elt : t -> int + +(*s Additional functions not appearing in the signature [Set.S] from ocaml + standard library. *) + +(* [intersect u v] determines if sets [u] and [v] have a non-empty + intersection. *) + +val intersect : t -> t -> bool +val hash : t -> int + +val from_list : int list -> t diff --git a/sigs.mli b/sigs.mli new file mode 100644 index 0000000..aa670ce --- /dev/null +++ b/sigs.mli @@ -0,0 +1,79 @@ +(* Quite useful, taken from CDuce, cduce/misc/custom.ml *) +module type Set = sig + include Set.S + val hash : t -> int + val equal : t -> t -> bool +end + +module type T = sig + type t + (* Debugging *) + val dump : Format.formatter -> t -> unit + val check : t -> unit (* Check internal invariants *) + + (* Data structures *) + val equal : t -> t -> bool + val hash : t -> int + val compare :t -> t -> int + val print : Format.formatter -> t -> unit +end + +module type TAG = +sig + include T + val attribute : t + val pcdata : t + val to_string : t -> string + val tag : string -> t +end + +module type BINARY_TREE = + functor (Tag:TAG) -> +sig + include T + module Tag : TAG with type t = Tag.t + val parse_xml_uri : string -> t + val parse_xml_string : string -> t + + val root : t -> t + + val is_string : t -> bool + val is_node : t -> bool + val is_nil : t -> bool + + val string : t -> string + val first_child : t -> t + val next_sibling : t -> t + val parent : t -> t + + val id : t -> int + val tag : t -> Tag.t + + val print_xml : out_channel -> t -> unit + val size : t -> int*int*int*int +end +module type BINARY_TREE_S = +sig + include T + module Tag : TAG + val parse_xml_uri : string -> t + val parse_xml_string : string -> t + + val root : t -> t + + val is_string : t -> bool + val is_node : t -> bool + val is_nil : t -> bool + + val string : t -> string + val first_child : t -> t + val next_sibling : t -> t + val parent : t -> t + + val id : t -> int + val tag : t -> Tag.t + + val print_xml : out_channel -> t -> unit + val dump : Format.formatter -> t -> unit + val size : t -> int*int*int*int +end