From d04661689691b4587cfc45a35e98604fcdc2b878 Mon Sep 17 00:00:00 2001 From: kim Date: Mon, 9 Feb 2009 07:48:55 +0000 Subject: [PATCH] Added missing files git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@153 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- ata.ml | 543 +++++++++++++++++++++++++++++++++++++++++++++ ata.mli | 56 +++++ custom.ml | 138 ++++++++++++ finiteCofinite.ml | 183 +++++++++++++++ finiteCofinite.mli | 38 ++++ ptset.ml | 362 ++++++++++++++++++++++++++++++ ptset.mli | 89 ++++++++ sigs.mli | 79 +++++++ 8 files changed, 1488 insertions(+) create mode 100644 ata.ml create mode 100644 ata.mli create mode 100644 custom.ml create mode 100644 finiteCofinite.ml create mode 100644 finiteCofinite.mli create mode 100644 ptset.ml create mode 100644 ptset.mli create mode 100644 sigs.mli diff --git a/ata.ml b/ata.ml new file mode 100644 index 0000000..c12be8a --- /dev/null +++ b/ata.ml @@ -0,0 +1,543 @@ +(* Todo refactor and remove this alias *) +INCLUDE "debug.ml" +module Tree = Tree.Binary + +let gen_id() = Oo.id (object end) +module State = struct + + type t = int + let mk = gen_id + +end +let mk_state = State.mk + +type state = State.t + +type predicate = Ptset.t*Ptset.t -> Tree.t -> [ `True | `False | `Maybe ] + +type formula_expr = + | False | True + | Or of formula * formula + | And of formula * formula + | Atom of ([ `Left | `Right ]*bool*state*predicate option) +and formula = { fid: int; + pos : formula_expr; + neg : formula; + st : Ptset.t*Ptset.t; + } + + +module FormNode = +struct + type t = formula + let hash = function + | False -> 0 + | True -> 1 + | And(f1,f2) -> 2+17*f1.fid + 37*f2.fid + | Or(f1,f2) -> 3+101*f1.fid + 253*f2.fid + | Atom(d,b,s,_) -> 5+(if d=`Left then 11 else 19)*(if b then 23 else 31)*s + + let hash t = (hash t.pos) land max_int + + let equal f1 f2 = + match f1.pos,f2.pos with + | False,False | True,True -> true + | Atom(d1,b1,s1,_), Atom(d2,b2,s2,_) when (d1 = d2) && (b1=b2) &&(s1=s2) -> true + | Or(g1,g2),Or(h1,h2) + | And(g1,g2),And(h1,h2) -> g1.fid == h1.fid && g2.fid == h2.fid + | _ -> false +end +module WH = Weak.Make(FormNode) + +let f_pool = WH.create 107 + +let true_,false_ = + let rec t = { fid = 1; pos = True; neg = f ; st = Ptset.empty,Ptset.empty} + and f = { fid = 0; pos = False; neg = t; st = Ptset.empty,Ptset.empty } + in + WH.add f_pool f; + WH.add f_pool t; + t,f + +let is_true f = f.fid == 1 +let is_false f = f.fid == 0 + + +let cons pos neg s1 s2 = + let rec pnode = + { fid = gen_id (); + pos = pos; + neg = nnode; + st = s1; } + and nnode = { + fid = gen_id (); + pos = neg; + neg = pnode; + st = s2; + } + in + (WH.merge f_pool pnode),(WH.merge f_pool nnode) + +let atom_ ?(pred=None) d p s = + let si = Ptset.singleton s in + let ss = match d with + | `Left -> si,Ptset.empty + | `Right -> Ptset.empty,si + in fst (cons (Atom(d,p,s,pred)) (Atom(d,not p,s,pred)) ss ss ) + +let merge_states f1 f2 = + let sp = + Ptset.union (fst f1.st) (fst f2.st), + Ptset.union (snd f1.st) (snd f2.st) + and sn = + Ptset.union (fst f1.neg.st) (fst f2.neg.st), + Ptset.union (snd f1.neg.st) (snd f2.neg.st) + in + sp,sn + +let or_ f1 f2 = + if is_true f1 || is_true f2 then true_ + else if is_false f1 && is_false f2 then false_ + else if is_false f1 then f2 + else if is_false f2 then f1 + else + let sp,sn = merge_states f1 f2 in + fst (cons (Or(f1,f2)) (And(f1.neg,f2.neg)) sp sn) + + + +let and_ f1 f2 = + if is_true f1 && is_true f2 then true_ + else if is_false f1 || is_false f2 then false_ + else if is_true f1 then f2 + else if is_true f2 then f1 + else + let sp,sn = merge_states f1 f2 in + fst (cons (And(f1,f2)) (Or(f1.neg,f2.neg)) sp sn) + + +let not_ f = f.neg + +type property = [ `None | `Existential ] +let get_prop h s = + try + Hashtbl.find h s + with + Not_found -> `None + +type t = { + id : int; + states : Ptset.t; + init : Ptset.t; + final : Ptset.t; + universal : Ptset.t; + (* Transitions of the Alternating automaton *) + (* (tags,q) -> (marking,formula) *) + phi : ((TagSet.t*state),(bool*formula)) Hashtbl.t; + delta : (TagSet.t,(Ptset.t*bool*Ptset.t*Ptset.t)) Hashtbl.t; + properties : (state,property) Hashtbl.t; + } + + module Pair (X : Set.OrderedType) (Y : Set.OrderedType) = + struct + type t = X.t*Y.t + let compare (x1,y1) (x2,y2) = + let r = X.compare x1 x2 in + if r == 0 then Y.compare y1 y2 + else r + end + + module PL = Set.Make (Pair (Ptset) (Ptset)) + + + let pr_st ppf l = Format.fprintf ppf "{"; + begin + match l with + | [] -> () + | [s] -> Format.fprintf ppf " %i" s + | p::r -> Format.fprintf ppf " %i" p; + List.iter (fun i -> Format.fprintf ppf "; %i" i) r + end; + Format.fprintf ppf " }" + let rec pr_frm ppf f = match f.pos with + | True -> Format.fprintf ppf "⊤" + | False -> Format.fprintf ppf "⊤" + | And(f1,f2) -> + Format.fprintf ppf "("; + (pr_frm ppf f1); + Format.fprintf ppf ") ∧ ("; + (pr_frm ppf f2); + Format.fprintf ppf ")" + | Or(f1,f2) -> + (pr_frm ppf f1); + Format.fprintf ppf " ∨ "; + (pr_frm ppf f2); + | Atom(dir,b,s,p) -> Format.fprintf ppf "%s%s[%i]%s" + (if b then "" else "¬") + (if dir = `Left then "↓₁" else "↓₂")s + (match p with None -> "" | _ -> " ") + + let dnf_hash = Hashtbl.create 17 + + let rec dnf_aux f = match f.pos with + | False -> PL.empty + | True -> PL.singleton (Ptset.empty,Ptset.empty) + | Atom(`Left,_,s,_) -> PL.singleton (Ptset.singleton s,Ptset.empty) + | Atom(`Right,_,s,_) -> PL.singleton (Ptset.empty,Ptset.singleton s) + | Or(f1,f2) -> PL.union (dnf f1) (dnf f2) + | And(f1,f2) -> + let pl1 = dnf f1 + and pl2 = dnf f2 + in + PL.fold (fun (s1,s2) acc -> + PL.fold ( fun (s1', s2') acc' -> + (PL.add + ((Ptset.union s1 s1'), + (Ptset.union s2 s2')) acc') ) + pl2 acc ) + pl1 PL.empty + + + and dnf f = + try + Hashtbl.find dnf_hash f.fid + with + Not_found -> + let d = dnf_aux f in + Hashtbl.add dnf_hash f.fid d;d + + + let equal_form f1 f2 = + (f1.fid == f2.fid) || (FormNode.equal f1 f2) || (PL.equal (dnf f1) (dnf f2)) + + let alt_trans_to_nfa ?(accu=[]) ts s mark f = + (* todo memoize *) + let f' = dnf f in + PL.fold (fun (s1,s2) acc -> (ts,s,mark,s1,s2)::acc) f' accu + + + let possible_trans ?(accu=[]) a q tag = + (* todo change the data structure to avoid creating (,) *) + let ata_trans = + Hashtbl.fold (fun (ts,s) (m,f) acc -> + if (q==s) && (TagSet.mem tag ts) + then (ts,s,m,f)::acc + else acc) a.phi [] + in + if ata_trans != [] + then begin + List.iter (fun (ts,s,m,f) -> + (* The following builds too many transitions in the nfa + let ts' = TagSet.remove tag ts + in + Hashtbl.remove a.phi (ts,s); + if not (TagSet.is_empty ts') + then Hashtbl.add a.phi (ts',s) (m,f) + *) + Hashtbl.remove a.phi (ts,s) + ) ata_trans; + (* let tstag = TagSet.tag tag in *) + let nfa_trs = List.fold_left (fun acc (ts,s,m,f) -> + alt_trans_to_nfa ~accu:acc ts s m f) [] ata_trans + in + List.iter (fun (ts,s,m,s1,s2) -> + Hashtbl.add a.delta ts ((Ptset.singleton s),m,s1,s2)) nfa_trs + end; + Hashtbl.fold (fun ts (s,m,s1,s2) acc -> + if (Ptset.mem q s) && (TagSet.mem tag ts) + then (m,s1,s2)::acc else acc) a.delta accu + + let dump ppf a = + Format.fprintf ppf "Automaton (%i) :\n" a.id; + Format.fprintf ppf "States : "; pr_st ppf (Ptset.elements a.states); + Format.fprintf ppf "\nInitial states : "; pr_st ppf (Ptset.elements a.init); + Format.fprintf ppf "\nFinal states : "; pr_st ppf (Ptset.elements a.final); + Format.fprintf ppf "\nUniversal states : "; pr_st ppf (Ptset.elements a.universal); + Format.fprintf ppf "\nAlternating transitions :\n------------------------------\n"; + let l = Hashtbl.fold (fun k t acc -> (k,t)::acc) a.phi [] in + let l = List.sort (fun ((tsx,x),_) ((tsy,y),_) -> if x-y == 0 then TagSet.compare tsx tsy else x-y) l in + List.iter (fun ((ts,q),(b,f)) -> + + let s = + try + Tag.to_string (TagSet.choose ts) + with + | _ -> "*" + in + Format.fprintf ppf "(%s,%i) %s " s q (if b then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf "\n")l; + + Format.fprintf ppf "NFA transitions :\n------------------------------\n"; + Hashtbl.iter (fun (ts) (q,b,s1,s2) -> + + let s = + try + Tag.to_string (TagSet.choose ts) + with + | _ -> "*" + in + pr_st ppf (Ptset.elements q); + Format.fprintf ppf ",%s %s " s (if b then "=>" else "->"); + Format.fprintf ppf "("; + pr_st ppf (Ptset.elements s1); + Format.fprintf ppf ","; + pr_st ppf (Ptset.elements s2); + Format.fprintf ppf ")\n" ) a.delta; + Format.fprintf ppf "=======================================\n" + + module Transitions = struct + type t = state*TagSet.t*bool*formula + let ( ?< ) x = x + let ( >< ) state label = state,label + let ( >=> ) (state,(label,mark)) form = (state,label,mark,form) + let ( +| ) f1 f2 = or_ f1 f2 + let ( *& ) f1 f2 = and_ f1 f2 + let ( ** ) d s = atom_ d true s + + + end + type transition = Transitions.t + + let equal_trans (q1,t1,m1,f1) (q2,t2,m2,f2) = + (q1 == q2) && (TagSet.equal t1 t2) && (m1 == m2) && (equal_form f1 f2) + + module TS : Set.S with type elt = Tree.t = Set.Make(Tree) + let res = ref TS.empty + + + module BottomUpNew = struct + +IFDEF DEBUG +THEN + type trace = + | TNil of Ptset.t*Ptset.t + | TNode of Ptset.t*Ptset.t*bool* (int*bool*formula) list + + let traces = Hashtbl.create 17 + let dump_trace t = + let out = open_out "debug_trace.dot" + in + let outf = Format.formatter_of_out_channel out in + + let rec aux t num = + if Tree.is_node t + then + match (try Hashtbl.find traces (Tree.id t) with Not_found -> TNil(Ptset.empty,Ptset.empty)) with + | TNode(r,s,mark,trs) -> + let numl = aux (Tree.left t) num in + let numr = aux (Tree.right t) (numl+1) in + let mynum = numr + 1 in + Format.fprintf outf "n%i [ label=\"<%s>\\nr=" mynum (Tag.to_string (Tree.tag t)); + pr_st outf (Ptset.elements r); + Format.fprintf outf "\\ns="; + pr_st outf (Ptset.elements s); + List.iter (fun (q,m,f) -> + Format.fprintf outf "\\n%i %s" q (if m then "⇨" else "→"); + pr_frm outf f ) trs; + Format.fprintf outf "\", %s shape=box ];\n" + (if mark then "color=cyan1, style=filled," else ""); + let _ = Format.fprintf outf "n%i -> n%i;\n" mynum numl in + let _ = Format.fprintf outf "n%i -> n%i;\n" mynum numr in + mynum + | TNil(r,s) -> Format.fprintf outf "n%i [ shape=box, label=\"Nil\\nr=" num; + pr_st outf (Ptset.elements r); + Format.fprintf outf "\\ns="; + pr_st outf (Ptset.elements s); + Format.fprintf outf "\"];\n";num + else + match Hashtbl.find traces (-10) with + | TNil(r,s) -> + Format.fprintf outf "n%i [ shape=box, label=\"Nil\\nr=" num; + pr_st outf (Ptset.elements r); + Format.fprintf outf "\\ns="; + pr_st outf (Ptset.elements s); + Format.fprintf outf "\"];\n"; + num + | _ -> assert false + + in + Format.fprintf outf "digraph G {\n"; + ignore(aux t 0); + Format.fprintf outf "}\n%!"; + close_out out; + ignore(Sys.command "dot -Tsvg debug_trace.dot > debug_trace.svg") +END + + + + let hfeval = Hashtbl.create 17 + let miss = ref 0 + let call = ref 0 + let rec findlist s1 s2 = function + | [] -> raise Not_found + | ((ss1,ss2),r)::_ when + (not (Ptset.is_empty s1)) && (Ptset.subset s1 ss1) && + (not (Ptset.is_empty s2)) && (Ptset.subset s2 ss2) -> r + | _::r -> findlist s1 s2 r + + let eval_form f s1 s2 res1 res2 = + + let rec eval_aux f = match f.pos with + | Atom(`Left,b,q,_) -> if b == (Ptset.mem q s1) then (true,res1) else false,TS.empty + | Atom(`Right,b,q,_) -> if b == (Ptset.mem q s2) then (true,res2) else false,TS.empty + | True -> true,(TS.union res1 res2) + | False -> false,TS.empty + | Or(f1,f2) -> + let b1,r1 = eval_aux f1 + and b2,r2 = eval_aux f2 + in + let r1 = if b1 then r1 else TS.empty + and r2 = if b2 then r2 else TS.empty + in (b1 || b2, TS.union r1 r2) + + | And(f1,f2) -> + let b1,r1 = eval_aux f1 + and b2,r2 = eval_aux f2 + in + if b1 && b2 then (true, TS.union r1 r2) + else (false,TS.empty) + + in incr call;eval_aux f + + + (* If true, then the formule may evaluate to true in the future, + if false it will always return false, i.e. necessary conditions are not + satisfied + *) + + let val3 = function true -> `True + | false -> `False + + let or3 a b = match a,b with + | `True,_ | _,`True -> `True + | `False,`False -> `False + | _ -> `Maybe + + let and3 a b = match a,b with + | `True,`True -> `True + | `False,_ | _,`False -> `False + | _ -> `Maybe + let not3 = function + | `True -> `False + | `False -> `True + | `Maybe -> `Maybe + + let true3 = function true -> `Maybe + | false -> `False + + let may_eval (s1,s2) f t = + let rec aux f = match f.pos with + | True -> `True + | False -> `False + | Or(f1,f2) -> or3 (aux f1) (aux f2) + | And(f1,f2) -> and3 (aux f1) (aux f2) + | Atom(dir,b,q,predo) -> + and3 (true3 ((Ptset.mem q (match dir with + | `Left -> s1 + | `Right -> s2)) == b)) + (match predo with + | Some pred -> (pred (s1,s2) t) + | None -> `True) + + in aux f + + let rec accepting_among a t r = + let r = Ptset.diff r a.final in + let rest = Ptset.inter a.final r in + if Ptset.is_empty r then r,TS.empty else + if (not (Tree.is_node t)) + then + let _ = D(Hashtbl.add traces (-10) (TNil(r,Ptset.inter a.final r))) + in + Ptset.inter a.final r,TS.empty + else + let tag = Tree.tag t + and t1 = Tree.first_child t + and t2 = Tree.next_sibling t + in + let r1,r2,trs = + Hashtbl.fold (fun (ts,q) ((m,f)as tr) ((ar1,ar2,lt)as acc) -> + if (TagSet.mem tag ts) && Ptset.mem q r + then begin + (* Format.fprintf Format.err_formatter "Tree with tag %s qualifies for transition : (%s,%i)%s" + (Tag.to_string tag) + (try + Tag.to_string (TagSet.choose ts) + with + | _ -> "*" ) + q + (if m then "=>" else "->"); + pr_frm Format.err_formatter f; + Format.fprintf Format.err_formatter "\n"; *) + let ls,rs = f.st in + Ptset.union ls ar1,Ptset.union rs ar2,(q,tr)::lt + end + else acc + ) a.phi (Ptset.empty,Ptset.empty,[]) + in + let rtrue,rfalse,rmay,trs,selnodes = + List.fold_left (fun (at,af,am,atrs,selnodes) (q,(m,f)) -> + let ppf = Format.err_formatter in + match (*may_eval (r1,r2) f t *) `Maybe with + | `True -> + (* Format.fprintf ppf "Will skip (%i) %s " q (if m then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf ", always true \n"; *) + (Ptset.add q at),af,am,atrs,TS.add t selnodes + | `False -> + (*Format.fprintf ppf "Will skip (%i) %s " q (if m then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf ", always false \n"; *) + at,(Ptset.add q af),am,atrs,selnodes + + | `Maybe -> +(* Format.fprintf ppf "Must take (%i) %s " q (if m then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf "\n"; *) + at,af,(Ptset.add q am),(q,(m,f))::atrs,selnodes) + (Ptset.empty,Ptset.empty,Ptset.empty,[],TS.empty) trs + in + let rr1,rr2,trs = + List.fold_left (fun ((ar1,ar2,trs)as acc) ((q,(_,f)as tr)) -> + if Ptset.mem q rmay + then let ls,rs = f.st in + Ptset.union ls ar1,Ptset.union rs ar2,tr::trs + else acc) (Ptset.empty,Ptset.empty,[]) trs + in + let s1,res1 = accepting_among a t1 rr1 + and s2,res2 = accepting_among a t2 rr2 + in + let res,set,mark,trs = List.fold_left (fun ((sel_nodes,res,amark,acctr) as acc) (q,(mark,f)) -> + let b,resnodes = eval_form f s1 s2 res1 res2 in + (* if b then begin + pr_st Format.err_formatter (Ptset.elements s1); + Format.fprintf Format.err_formatter ","; + pr_st Format.err_formatter (Ptset.elements s2); + Format.fprintf Format.err_formatter " satisfies "; + pr_frm Format.err_formatter f; + Format.fprintf Format.err_formatter " for input tree %s\n" (Tag.to_string tag); + end; *) + if b + then + (TS.union + (if mark then TS.add t resnodes else resnodes) + sel_nodes) + ,Ptset.add q res,amark||mark,(q,mark,f)::acctr + else acc + ) (TS.empty,rtrue,false,[]) trs + in + + let set = Ptset.union a.final set in + let _ = D(Hashtbl.add traces (Tree.id t) (TNode(r,set,mark,trs))) in + set,res + + + let run a t = + let st,res = accepting_among a t a.init in + let b = Ptset.is_empty (st) in + let _ = D(dump_trace t) in + if b then [] + else (TS.elements res) + + end diff --git a/ata.mli b/ata.mli new file mode 100644 index 0000000..f5e2c4f --- /dev/null +++ b/ata.mli @@ -0,0 +1,56 @@ +type state = int +val mk_state : unit -> state + +type predicate = Ptset.t*Ptset.t -> Tree.Binary.t -> [ `True | `False | `Maybe ] +type formula_expr = + False + | True + | Or of formula * formula + | And of formula * formula + | Atom of ([ `Left | `Right ] * bool * state * predicate option) +and formula = { fid : int; pos : formula_expr; neg : formula; st : Ptset.t*Ptset.t;} +val true_ : formula +val false_ : formula +val atom_ : ?pred:predicate option -> [`Left | `Right ] -> bool -> state -> formula +val and_ : formula -> formula -> formula +val or_ : formula -> formula -> formula +val not_ : formula -> formula +val equal_form : formula -> formula -> bool +val pr_frm : Format.formatter -> formula -> unit + + +type property = [ `None | `Existential ] + +type t = { + id : int; + states : Ptset.t; + init : Ptset.t; + final : Ptset.t; + universal : Ptset.t; + phi : (TagSet.t * state, bool * formula) Hashtbl.t; + delta : (TagSet.t, Ptset.t * bool * Ptset.t * Ptset.t) Hashtbl.t; + properties : (state,property) Hashtbl.t; +} +val dump : Format.formatter -> t -> unit + +module Transitions : sig +type t = state*TagSet.t*bool*formula +(* Doing this avoid the parenthesis *) +val ( ?< ) : state -> state +val ( >< ) : state -> TagSet.t*bool -> state*(TagSet.t*bool) +val ( >=> ) : state*(TagSet.t*bool) -> formula -> t +val ( +| ) : formula -> formula -> formula +val ( *& ) : formula -> formula -> formula +val ( ** ) : [`Left | `Right ] -> state -> formula + +end +type transition = Transitions.t +val equal_trans : transition -> transition -> bool + + +module BottomUpNew : +sig + val miss : int ref + val call : int ref + val run : t -> Tree.Binary.t -> Tree.Binary.t list +end diff --git a/custom.ml b/custom.ml new file mode 100644 index 0000000..a71f32a --- /dev/null +++ b/custom.ml @@ -0,0 +1,138 @@ +(* also taken from CDuce misc/custom.ml + this module should always be included not referenced with Open +*) + +module Dummy = +struct + let dump _ _ = failwith "dump not implemented" + let check _ = failwith "check not implemented" + let equal _ _ = failwith "equal not implemented" + let hash _ = failwith "hash not implemented" + let compare _ _ = failwith "compare not implemented" + let print _ _ = failwith "print not implemented" +end + +(* Some of this borrowed from Jean-Christophe Filliâtre : + http://www.lri.fr/~filliatr/ftp/ocaml/ds/bitset.ml.html +*) + +module IntSet : Set.S with type elt = int= +struct + let max = Sys.word_size - 2 + type t = int + type elt = int + + let empty = 0 + let full = -1 + let is_empty x = x == 0 + let mem e s = ((1 lsl e) land s) != 0 + let add e s = (1 lsl e) lor s + let singleton e = (1 lsl e) + let union = (lor) + let inter = (land) + let diff a b = a land (lnot b) + let remove e s = (lnot (1 lsl e) land s) + let compare = (-) + let equal = (==) + let subset a b = a land (lnot b) == 0 + let cardinal s = + let rec loop n s = + if s == 0 then n else loop (succ n) (s - (s land (-s))) + in + loop 0 s +(* inverse of bit i = 1 lsl i i.e. tib i = log_2(i) *) +let log2 = Array.create 255 0 +let () = for i = 0 to 7 do log2.(1 lsl i) <- i done + +(* assumption: x is a power of 2 *) +let tib32 x = + if x land 0xFFFF == 0 then + let x = x lsr 16 in + if x land 0xFF == 0 then 24 + log2.(x lsr 8) else 16 + log2.(x) + else + if x land 0xFF == 0 then 8 + log2.(x lsr 8) else log2.(x) + +let ffffffff = (0xffff lsl 16) lor 0xffff +let tib64 x = + if x land ffffffff == 0 then 32 + tib32 (x lsr 32) else tib32 x + +let tib = + match Sys.word_size with 32 -> tib32 | 64 -> tib64 | _ -> assert false + +let min_elt s = + if s == 0 then raise Not_found; + tib (s land (-s)) + +let choose = min_elt + +(* TODO: improve? *) +let max_elt s = + if s == 0 then raise Not_found; + let rec loop i = + if s land i != 0 then tib i + else if i = 1 then raise Not_found else loop (i lsr 1) + in + loop min_int + +let rec elements s = + if s == 0 then [] else let i = s land (-s) in tib i :: elements (s - i) + +let rec iter f s = + if s != 0 then let i = s land (-s) in f (tib i); iter f (s - i) + +let rec fold f s acc = + if s == 0 then acc else let i = s land (-s) in fold f (s - i) (f (tib i) acc) + +let rec for_all p s = + s == 0 || let i = s land (-s) in p (tib i) && for_all p (s - i) + +let rec exists p s = + s != 0 && let i = s land (-s) in p (tib i) || exists p (s - i) + +let rec filter p s = + if s == 0 then + 0 + else + let i = s land (-s) in + let s = filter p (s - i) in + if p (tib i) then s + i else s + +let rec partition p s = + if s == 0 then + 0, 0 + else + let i = s land (-s) in + let st,sf = partition p (s - i) in + if p (tib i) then st + i, sf else st, sf + i + +let split i s = + let bi = 1 lsl i in + s land (bi - 1), s land bi != 0, s land (-1 lsl (i+1)) +end + + +module Bool = +struct + module Make (X : Sigs.T) (Y : Sigs.T) : + Sigs.T with type t = X.t*Y.t = + struct + module Fst = X + module Snd = Y + type t = X.t*Y.t + let dump ppf (x,y) = + X.dump ppf x; + Y.dump ppf y + + let check (x,y) = X.check x; Y.check y + let equal (x,y) (z,t) = + X.equal x z && Y.equal y t + let hash (x,y) = (X.hash x) + 4093 * Y.hash y + let compare (x,y) (z,t) = + let r = X.compare x z in + if r == 0 + then Y.compare y t + else r + + let print _ _ = failwith "compare not implemented" + end +end diff --git a/finiteCofinite.ml b/finiteCofinite.ml new file mode 100644 index 0000000..32f0e48 --- /dev/null +++ b/finiteCofinite.ml @@ -0,0 +1,183 @@ +(******************************************************************************) +(* SXSI : XPath evaluator *) +(* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) +(* Copyright NICTA 2008 *) +(* Distributed under the terms of the LGPL (see LICENCE) *) +(******************************************************************************) + +exception InfiniteSet +module type S = +sig + type elt + type t + val empty : t + val any : t + val is_empty : t -> bool + val is_any : t -> bool + val is_finite : t -> bool + val kind : t -> [ `Finite | `Cofinite ] + val singleton : elt -> t + val mem : elt -> t -> bool + val add : elt -> t -> t + val remove : elt -> t -> t + val cup : t -> t -> t + val cap : t -> t -> t + val diff : t -> t -> t + val neg : t -> t + val compare : t -> t -> int + val subset : t -> t -> bool + val kind_split : t list -> t * t + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val for_all : (elt -> bool) -> t -> bool + val exists : (elt -> bool) -> t -> bool + val filter : (elt -> bool) -> t -> t + val partition : (elt -> bool) -> t -> t * t + val cardinal : t -> int + val elements : t -> elt list + val from_list : elt list -> t + val choose : t -> elt + val hash : t -> int + val equal : t -> t -> bool +end + +module Make (E : Sigs.Set) : S with type elt = E.elt = +struct + + type elt = E.elt + type t = Finite of E.t | CoFinite of E.t + + + let empty = Finite E.empty + let any = CoFinite E.empty + + let is_empty = function + Finite s when E.is_empty s -> true + | _ -> false + + let is_any = function + CoFinite s when E.is_empty s -> true + | _ -> false + + let is_finite = function + | Finite _ -> true | _ -> false + + let kind = function + Finite _ -> `Finite + | _ -> `Cofinite + + let mem x = function Finite s -> E.mem x s + | CoFinite s -> not (E.mem x s) + + let singleton x = Finite (E.singleton x) + let add e = function + | Finite s -> Finite (E.add e s) + | CoFinite s -> CoFinite (E.remove e s) + let remove e = function + | Finite s -> Finite (E.remove e s) + | CoFinite s -> CoFinite (E.add e s) + + let cup s t = match (s,t) with + | Finite s, Finite t -> Finite (E.union s t) + | CoFinite s, CoFinite t -> CoFinite ( E.inter s t) + | Finite s, CoFinite t -> CoFinite (E.diff t s) + | CoFinite s, Finite t-> CoFinite (E.diff s t) + + let cap s t = match (s,t) with + | Finite s, Finite t -> Finite (E.inter s t) + | CoFinite s, CoFinite t -> CoFinite (E.union s t) + | Finite s, CoFinite t -> Finite (E.diff s t) + | CoFinite s, Finite t-> Finite (E.diff t s) + + let diff s t = match (s,t) with + | Finite s, Finite t -> Finite (E.diff s t) + | Finite s, CoFinite t -> Finite(E.inter s t) + | CoFinite s, Finite t -> CoFinite(E.union t s) + | CoFinite s, CoFinite t -> Finite (E.diff t s) + + let neg = function + | Finite s -> CoFinite s + | CoFinite s -> Finite s + + let compare s t = match (s,t) with + | Finite s , Finite t -> E.compare s t + | CoFinite s , CoFinite t -> E.compare t s + | Finite _, CoFinite _ -> -1 + | CoFinite _, Finite _ -> 1 + + let subset s t = match (s,t) with + | Finite s , Finite t -> E.subset s t + | CoFinite s , CoFinite t -> E.subset t s + | Finite s, CoFinite t -> E.is_empty (E.inter s t) + | CoFinite _, Finite _ -> false + + (* given a list l of type t list, + returns two sets (f,c) where : + - f is the union of all the finite sets of l + - c is the union of all the cofinite sets of l + - f and c are disjoint + Invariant : cup f c = List.fold_left cup empty l + + We treat the CoFinite part explicitely : + *) + + let kind_split l = + + let rec next_finite_cofinite facc cacc = function + | [] -> Finite facc, CoFinite (E.diff cacc facc) + | Finite s ::r -> next_finite_cofinite (E.union s facc) cacc r + | CoFinite _ ::r when E.is_empty cacc -> next_finite_cofinite facc cacc r + | CoFinite s ::r -> next_finite_cofinite facc (E.inter cacc s) r + in + let rec first_cofinite facc = function + | [] -> empty,empty + | Finite s :: r-> first_cofinite (E.union s facc) r + | CoFinite s :: r -> next_finite_cofinite facc s r + in + first_cofinite E.empty l + + let fold f t a = match t with + | Finite s -> E.fold f s a + | CoFinite _ -> raise InfiniteSet + + let for_all f = function + | Finite s -> E.for_all f s + | CoFinite _ -> raise InfiniteSet + + let exists f = function + | Finite s -> E.exists f s + | CoFinite _ -> raise InfiniteSet + + let filter f = function + | Finite s -> Finite (E.filter f s) + | CoFinite _ -> raise InfiniteSet + + let partition f = function + | Finite s -> let a,b = E.partition f s in Finite a,Finite b + | CoFinite _ -> raise InfiniteSet + + let cardinal = function + | Finite s -> E.cardinal s + | CoFinite _ -> raise InfiniteSet + + let elements = function + | Finite s -> E.elements s + | CoFinite _ -> raise InfiniteSet + + let from_list l = + Finite(List.fold_left (fun x a -> E.add a x ) E.empty l) + + let choose = function + Finite s -> E.choose s + | _ -> raise InfiniteSet + + let equal a b = + match a,b with + | Finite x, Finite y | CoFinite x, CoFinite y -> E.equal x y + | _ -> false + + let hash = + function Finite x -> (E.hash x) + | CoFinite x -> ( ~-(E.hash x) land max_int) + +end + diff --git a/finiteCofinite.mli b/finiteCofinite.mli new file mode 100644 index 0000000..b489557 --- /dev/null +++ b/finiteCofinite.mli @@ -0,0 +1,38 @@ +exception InfiniteSet + +module type S = + sig + type elt + type t + val empty : t + val any : t + val is_empty : t -> bool + val is_any : t -> bool + val is_finite : t -> bool + val kind : t -> [ `Cofinite | `Finite ] + val singleton : elt -> t + val mem : elt -> t -> bool + val add : elt -> t -> t + val remove : elt -> t -> t + val cup : t -> t -> t + val cap : t -> t -> t + val diff : t -> t -> t + val neg : t -> t + val compare : t -> t -> int + val subset : t -> t -> bool + val kind_split : t list -> t * t + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val for_all : (elt -> bool) -> t -> bool + val exists : (elt -> bool) -> t -> bool + val filter : (elt -> bool) -> t -> t + val partition : (elt -> bool) -> t -> t * t + val cardinal : t -> int + val elements : t -> elt list + val from_list : elt list -> t + val choose : t -> elt + val hash : t -> int + val equal : t -> t -> bool + end + +module Make : functor (E : Sigs.Set) -> S with type elt = E.elt + diff --git a/ptset.ml b/ptset.ml new file mode 100644 index 0000000..5c029f7 --- /dev/null +++ b/ptset.ml @@ -0,0 +1,362 @@ +(***************************************************************************) +(* Implementation for sets of positive integers implemented as deeply hash-*) +(* consed Patricia trees. Provide fast set operations, fast membership as *) +(* well as fast min and max elements. Hash consing provides O(1) equality *) +(* checking *) +(* *) +(***************************************************************************) + + +type elt = int + +type t = { id : int; + key : int; (* hash *) + node : node } +and node = + | Empty + | Leaf of int + | Branch of int * int * t * t + +module Node = + struct + type _t = t + type t = _t + let hash x = x.key + let hash_node = function + | Empty -> 0 + | Leaf i -> i+1 + (* power of 2 +/- 1 are fast ! *) + | Branch (b,i,l,r) -> + (b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key + + (r.key lsl 7) - r.key + let hash_node x = (hash_node x) land max_int + let equal x y = match (x.node,y.node) with + | Empty,Empty -> true + | Leaf k1, Leaf k2 when k1 == k2 -> true + | Branch(p1,m1,l1,r1), Branch(p2,m2,l2,r2) when m1==m2 && p1==p2 && + (l1.id == l2.id) && (r1.id == r2.id) -> true + | _ -> false + end + +module WH = Weak.Make(Node) + +let pool = WH.create 4093 + +(* Neat trick thanks to Alain Frisch ! *) + +let gen_uid () = Oo.id (object end) + +let empty = { id = gen_uid (); + key = 0; + node = Empty } + +let _ = WH.add pool empty + +let is_empty = function { id = 0 } -> true | _ -> false + +let rec norm n = + let v = { id = gen_uid (); + key = Node.hash_node n; + node = n } + in + WH.merge pool v + +(* WH.merge pool *) + +let branch (p,m,l,r) = norm (Branch(p,m,l,r)) +let leaf k = norm (Leaf k) + +(* To enforce the invariant that a branch contains two non empty sub-trees *) +let branch_ne = function + | (_,_,e,t) when is_empty e -> t + | (_,_,t,e) when is_empty e -> t + | (p,m,t0,t1) -> branch (p,m,t0,t1) + +(********** from here on, only use the smart constructors *************) + +let zero_bit k m = (k land m) == 0 + +let singleton k = if k < 0 then failwith "singleton" else leaf k + +let rec mem k n = match n.node with + | Empty -> false + | Leaf j -> k == j + | Branch (p, _, l, r) -> if k <= p then mem k l else mem k r + +let rec min_elt n = match n.node with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_,_,s,_) -> min_elt s + + let rec max_elt n = match n.node with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_,_,_,t) -> max_elt t + + let elements s = + let rec elements_aux acc n = match n.node with + | Empty -> acc + | Leaf k -> k :: acc + | Branch (_,_,l,r) -> elements_aux (elements_aux acc r) l + in + elements_aux [] s + + let mask k m = (k lor (m-1)) land (lnot m) + + let naive_highest_bit x = + assert (x < 256); + let rec loop i = + if i = 0 then 1 else if x lsr i = 1 then 1 lsl i else loop (i-1) + in + loop 7 + + let hbit = Array.init 256 naive_highest_bit + + let highest_bit_32 x = + let n = x lsr 24 in if n != 0 then hbit.(n) lsl 24 + else let n = x lsr 16 in if n != 0 then hbit.(n) lsl 16 + else let n = x lsr 8 in if n != 0 then hbit.(n) lsl 8 + else hbit.(x) + + let highest_bit_64 x = + let n = x lsr 32 in if n != 0 then (highest_bit_32 n) lsl 32 + else highest_bit_32 x + + let highest_bit = match Sys.word_size with + | 32 -> highest_bit_32 + | 64 -> highest_bit_64 + | _ -> assert false + + let branching_bit p0 p1 = highest_bit (p0 lxor p1) + + let join (p0,t0,p1,t1) = + let m = branching_bit p0 p1 in + if zero_bit p0 m then + branch (mask p0 m, m, t0, t1) + else + branch (mask p0 m, m, t1, t0) + + let match_prefix k p m = (mask k m) == p + + let add k t = + let rec ins n = match n.node with + | Empty -> leaf k + | Leaf j -> if j == k then n else join (k, leaf k, j, n) + | Branch (p,m,t0,t1) -> + if match_prefix k p m then + if zero_bit k m then + branch (p, m, ins t0, t1) + else + branch (p, m, t0, ins t1) + else + join (k, leaf k, p, n) + in + ins t + + let remove k t = + let rec rmv n = match n.node with + | Empty -> empty + | Leaf j -> if k == j then empty else n + | Branch (p,m,t0,t1) -> + if match_prefix k p m then + if zero_bit k m then + branch_ne (p, m, rmv t0, t1) + else + branch_ne (p, m, t0, rmv t1) + else + n + in + rmv t + + (* should run in O(1) thanks to Hash consing *) + + let equal = (=) + + let compare = compare + + + let rec merge (s,t) = + if (equal s t) (* This is cheap thanks to hash-consing *) + then s + else + match s.node,t.node with + | Empty, _ -> t + | _, Empty -> s + | Leaf k, _ -> add k t + | _, Leaf k -> add k s + | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> + if m == n && match_prefix q p m then + branch (p, m, merge (s0,t0), merge (s1,t1)) + else if m > n && match_prefix q p m then + if zero_bit q m then + branch (p, m, merge (s0,t), s1) + else + branch (p, m, s0, merge (s1,t)) + else if m < n && match_prefix p q n then + + if zero_bit p n then + branch (q, n, merge (s,t0), t1) + else + branch (q, n, t0, merge (s,t1)) + else + (* The prefixes disagree. *) + join (p, s, q, t) + + let union s t = merge (s,t) + + let rec subset s1 s2 = (equal s1 s2) || + match (s1.node,s2.node) with + | Empty, _ -> true + | _, Empty -> false + | Leaf k1, _ -> mem k1 s2 + | Branch _, Leaf _ -> false + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + subset l1 l2 && subset r1 r2 + else if m1 < m2 && match_prefix p1 p2 m2 then + if zero_bit p1 m2 then + subset l1 l2 && subset r1 l2 + else + subset l1 r2 && subset r1 r2 + else + false + + let rec inter s1 s2 = + if (equal s1 s2) + then s1 + else + match (s1.node,s2.node) with + | Empty, _ -> empty + | _, Empty -> empty + | Leaf k1, _ -> if mem k1 s2 then s1 else empty + | _, Leaf k2 -> if mem k2 s1 then s2 else empty + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + merge (inter l1 l2, inter r1 r2) + else if m1 > m2 && match_prefix p2 p1 m1 then + inter (if zero_bit p2 m1 then l1 else r1) s2 + else if m1 < m2 && match_prefix p1 p2 m2 then + inter s1 (if zero_bit p1 m2 then l2 else r2) + else + empty + + let rec diff s1 s2 = + if (equal s1 s2) + then empty + else + match (s1.node,s2.node) with + | Empty, _ -> empty + | _, Empty -> s1 + | Leaf k1, _ -> if mem k1 s2 then empty else s1 + | _, Leaf k2 -> remove k2 s1 + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + merge (diff l1 l2, diff r1 r2) + else if m1 > m2 && match_prefix p2 p1 m1 then + if zero_bit p2 m1 then + merge (diff l1 s2, r1) + else + merge (l1, diff r1 s2) + else if m1 < m2 && match_prefix p1 p2 m2 then + if zero_bit p1 m2 then diff s1 l2 else diff s1 r2 + else + s1 + + + + +(*s All the following operations ([cardinal], [iter], [fold], [for_all], + [exists], [filter], [partition], [choose], [elements]) are + implemented as for any other kind of binary trees. *) + +let rec cardinal n = match n.node with + | Empty -> 0 + | Leaf _ -> 1 + | Branch (_,_,t0,t1) -> cardinal t0 + cardinal t1 + +let rec iter f n = match n.node with + | Empty -> () + | Leaf k -> f k + | Branch (_,_,t0,t1) -> iter f t0; iter f t1 + +let rec fold f s accu = match s.node with + | Empty -> accu + | Leaf k -> f k accu + | Branch (_,_,t0,t1) -> fold f t0 (fold f t1 accu) + +let rec for_all p n = match n.node with + | Empty -> true + | Leaf k -> p k + | Branch (_,_,t0,t1) -> for_all p t0 && for_all p t1 + +let rec exists p n = match n.node with + | Empty -> false + | Leaf k -> p k + | Branch (_,_,t0,t1) -> exists p t0 || exists p t1 + +let rec filter pr n = match n.node with + | Empty -> empty + | Leaf k -> if pr k then n else empty + | Branch (p,m,t0,t1) -> branch_ne (p, m, filter pr t0, filter pr t1) + +let partition p s = + let rec part (t,f as acc) n = match n.node with + | Empty -> acc + | Leaf k -> if p k then (add k t, f) else (t, add k f) + | Branch (_,_,t0,t1) -> part (part acc t0) t1 + in + part (empty, empty) s + +let rec choose n = match n.node with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *) + + +let split x s = + let coll k (l, b, r) = + if k < x then add k l, b, r + else if k > x then l, b, add k r + else l, true, r + in + fold coll s (empty, false, empty) + + + +let rec dump n = + Printf.eprintf "{ id = %i; key = %i ; node=" n.id n.key; + match n.node with + | Empty -> Printf.eprintf "Empty; }\n" + | Leaf k -> Printf.eprintf "Leaf %i; }\n" k + | Branch (p,m,l,r) -> + Printf.eprintf "Branch(%i,%i,id=%i,id=%i); }\n" + p m l.id r.id; + dump l; + dump r + +(*i*) +let make l = List.fold_left (fun acc e -> add e acc ) empty l +(*i*) + +(*s Additional functions w.r.t to [Set.S]. *) + +let rec intersect s1 s2 = (equal s1 s2) || + match (s1.node,s2.node) with + | Empty, _ -> false + | _, Empty -> false + | Leaf k1, _ -> mem k1 s2 + | _, Leaf k2 -> mem k2 s1 + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + intersect l1 l2 || intersect r1 r2 + else if m1 < m2 && match_prefix p2 p1 m1 then + intersect (if zero_bit p2 m1 then l1 else r1) s2 + else if m1 > m2 && match_prefix p1 p2 m2 then + intersect s1 (if zero_bit p1 m2 then l2 else r2) + else + false + + +let hash s = s.key + +let from_list l = List.fold_left (fun acc i -> add i acc) empty l diff --git a/ptset.mli b/ptset.mli new file mode 100644 index 0000000..c36a08d --- /dev/null +++ b/ptset.mli @@ -0,0 +1,89 @@ +(**************************************************************************) +(* *) +(* Copyright (C) Jean-Christophe Filliatre *) +(* *) +(* This software is free software; you can redistribute it and/or *) +(* modify it under the terms of the GNU Library General Public *) +(* License version 2.1, with the special exception on linking *) +(* described in file LICENSE. *) +(* *) +(* This software is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) +(* *) +(**************************************************************************) + +(*i $Id: ptset.mli,v 1.10 2008-07-21 14:53:06 filliatr Exp $ i*) + +(*s Sets of integers implemented as Patricia trees. The following + signature is exactly [Set.S with type elt = int], with the same + specifications. This is a purely functional data-structure. The + performances are similar to those of the standard library's module + [Set]. The representation is unique and thus structural comparison + can be performed on Patricia trees. *) + +type t + +type elt = int + +val empty : t + +val is_empty : t -> bool + +val mem : int -> t -> bool + +val add : int -> t -> t + +val singleton : int -> t + +val remove : int -> t -> t + +val union : t -> t -> t + +val subset : t -> t -> bool + +val inter : t -> t -> t + +val diff : t -> t -> t + +val equal : t -> t -> bool + +val compare : t -> t -> int + +val elements : t -> int list + +val choose : t -> int + +val cardinal : t -> int + +val iter : (int -> unit) -> t -> unit + +val fold : (int -> 'a -> 'a) -> t -> 'a -> 'a + +val for_all : (int -> bool) -> t -> bool + +val exists : (int -> bool) -> t -> bool + +val filter : (int -> bool) -> t -> t + +val partition : (int -> bool) -> t -> t * t + +val split : int -> t -> t * bool * t + +(*s Warning: [min_elt] and [max_elt] are linear w.r.t. the size of the + set. In other words, [min_elt t] is barely more efficient than [fold + min t (choose t)]. *) + +val min_elt : t -> int +val max_elt : t -> int + +(*s Additional functions not appearing in the signature [Set.S] from ocaml + standard library. *) + +(* [intersect u v] determines if sets [u] and [v] have a non-empty + intersection. *) + +val intersect : t -> t -> bool +val hash : t -> int + +val from_list : int list -> t diff --git a/sigs.mli b/sigs.mli new file mode 100644 index 0000000..aa670ce --- /dev/null +++ b/sigs.mli @@ -0,0 +1,79 @@ +(* Quite useful, taken from CDuce, cduce/misc/custom.ml *) +module type Set = sig + include Set.S + val hash : t -> int + val equal : t -> t -> bool +end + +module type T = sig + type t + (* Debugging *) + val dump : Format.formatter -> t -> unit + val check : t -> unit (* Check internal invariants *) + + (* Data structures *) + val equal : t -> t -> bool + val hash : t -> int + val compare :t -> t -> int + val print : Format.formatter -> t -> unit +end + +module type TAG = +sig + include T + val attribute : t + val pcdata : t + val to_string : t -> string + val tag : string -> t +end + +module type BINARY_TREE = + functor (Tag:TAG) -> +sig + include T + module Tag : TAG with type t = Tag.t + val parse_xml_uri : string -> t + val parse_xml_string : string -> t + + val root : t -> t + + val is_string : t -> bool + val is_node : t -> bool + val is_nil : t -> bool + + val string : t -> string + val first_child : t -> t + val next_sibling : t -> t + val parent : t -> t + + val id : t -> int + val tag : t -> Tag.t + + val print_xml : out_channel -> t -> unit + val size : t -> int*int*int*int +end +module type BINARY_TREE_S = +sig + include T + module Tag : TAG + val parse_xml_uri : string -> t + val parse_xml_string : string -> t + + val root : t -> t + + val is_string : t -> bool + val is_node : t -> bool + val is_nil : t -> bool + + val string : t -> string + val first_child : t -> t + val next_sibling : t -> t + val parent : t -> t + + val id : t -> int + val tag : t -> Tag.t + + val print_xml : out_channel -> t -> unit + val dump : Format.formatter -> t -> unit + val size : t -> int*int*int*int +end -- 2.17.1