X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=blobdiff_plain;f=src%2Fata.ml;h=df433926b41d263b3ec105f449d02b5fa043d59f;hp=6ec859c12c1c7bc624ad052da307b55b218900c0;hb=41dd1fed04cabad212f10fce3484545f6e9d9444;hpb=d9c0e4863807eaf472e875a4bad35cfefe985c95 diff --git a/src/ata.ml b/src/ata.ml index 6ec859c..df43392 100644 --- a/src/ata.ml +++ b/src/ata.ml @@ -13,125 +13,411 @@ (* *) (***********************************************************************) -(* - Time-stamp: -*) - +INCLUDE "utils.ml" open Format -type move = [ `Left | `Right | `Up1 | `Up2 | `Epsilon ] -type state_ctx = { left : StateSet.t; - right : StateSet.t; - up1 : StateSet.t; - up2 : StateSet.t; - epsilon : StateSet.t} -type ctx_ = { mutable positive : state_ctx; - mutable negative : state_ctx } -type pred_ = move * bool * State.t - -module Move : (Formula.PREDICATE with type data = pred_ and type ctx = ctx_ ) = + +type predicate = | First_child + | Next_sibling + | Parent + | Previous_sibling + | Stay + | Is_first_child + | Is_next_sibling + | Is of (Tree.NodeKind.t) + | Has_first_child + | Has_next_sibling + +let is_move p = match p with +| First_child | Next_sibling +| Parent | Previous_sibling | Stay -> true +| _ -> false + + +type atom = predicate * bool * State.t + +module Atom : (Formula.ATOM with type data = atom) = struct module Node = struct - type t = move * bool * State.t + type t = atom let equal n1 n2 = n1 = n2 let hash n = Hashtbl.hash n end - type ctx = ctx_ - let make_ctx a b c d e = - { left = a; right = b; up1 = c; up2 = d; epsilon = e } - include Hcons.Make(Node) let print ppf a = - let _ = flush_str_formatter() in - let fmt = str_formatter in - - let m, b, s = a.node in - let dir,num = - match m with - | `Left -> Pretty.down_arrow, Pretty.subscript 1 - | `Right -> Pretty.down_arrow, Pretty.subscript 2 - | `Epsilon -> Pretty.epsilon, "" - | `Up1 -> Pretty.up_arrow, Pretty.subscript 1 - | `Up2 -> Pretty.up_arrow, Pretty.subscript 2 - in - fprintf fmt "%s%s" dir num; - State.print fmt s; - let str = flush_str_formatter() in - if b then fprintf ppf "%s" str - else Pretty.pp_overline ppf str - - let neg p = - let l, b, s = p.node in - make (l, not b, s) - - let eval ctx p = - let l, b, s = p.node in - let nctx = if b then ctx.positive else ctx.negative in - StateSet.mem s begin - match l with - `Left -> nctx.left - | `Right -> nctx.right - | `Up1 -> nctx.up1 - | `Up2 -> nctx.up2 - | `Epsilon -> nctx.epsilon - end + let p, b, q = a.node in + if not b then fprintf ppf "%s" Pretty.lnot; + match p with + | First_child -> fprintf ppf "FC(%a)" State.print q + | Next_sibling -> fprintf ppf "NS(%a)" State.print q + | Parent -> fprintf ppf "FC%s(%a)" Pretty.inverse State.print q + | Previous_sibling -> fprintf ppf "NS%s(%a)" Pretty.inverse State.print q + | Stay -> fprintf ppf "%s(%a)" Pretty.epsilon State.print q + | Is_first_child -> fprintf ppf "FC%s?" Pretty.inverse + | Is_next_sibling -> fprintf ppf "NS%s?" Pretty.inverse + | Is k -> fprintf ppf "is-%a?" Tree.NodeKind.print k + | Has_first_child -> fprintf ppf "FC?" + | Has_next_sibling -> fprintf ppf "NS?" + + let neg a = + let p, b, q = a.node in + make (p, not b, q) + + +end + +module SFormula = +struct + include Formula.Make(Atom) + open Tree.NodeKind + let mk_atom a b c = atom_ (Atom.make (a,b,c)) + let mk_kind k = mk_atom (Is k) true State.dummy + let has_first_child = + (mk_atom Has_first_child true State.dummy) + + let has_next_sibling = + (mk_atom Has_next_sibling true State.dummy) + + let is_first_child = + (mk_atom Is_first_child true State.dummy) + + let is_next_sibling = + (mk_atom Is_next_sibling true State.dummy) + + let is_attribute = + (mk_atom (Is Attribute) true State.dummy) + + let is_element = + (mk_atom (Is Element) true State.dummy) + + let is_processing_instruction = + (mk_atom (Is ProcessingInstruction) true State.dummy) + + let is_comment = + (mk_atom (Is Comment) true State.dummy) + + let first_child q = + and_ + (mk_atom First_child true q) + has_first_child + + let next_sibling q = + and_ + (mk_atom Next_sibling true q) + has_next_sibling + + let parent q = + and_ + (mk_atom Parent true q) + is_first_child + + let previous_sibling q = + and_ + (mk_atom Previous_sibling true q) + is_next_sibling + + let stay q = + (mk_atom Stay true q) + + let get_states phi = + fold (fun phi acc -> + match expr phi with + | Formula.Atom a -> let _, _, q = Atom.node a in + if q != State.dummy then StateSet.add q acc else acc + | _ -> acc + ) phi StateSet.empty + end -module SFormula = Formula.Make(Move) + +module Transition = Hcons.Make (struct + type t = State.t * QNameSet.t * SFormula.t + let equal (a, b, c) (d, e, f) = + a == d && b == e && c == f + let hash (a, b, c) = + HASHINT4 (PRIME1, a, ((QNameSet.uid b) :> int), ((SFormula.uid c) :> int)) +end) + + +module TransList : sig + include Hlist.S with type elt = Transition.t + val print : Format.formatter -> ?sep:string -> t -> unit +end = + struct + include Hlist.Make(Transition) + let print ppf ?(sep="\n") l = + iter (fun t -> + let q, lab, f = Transition.node t in + fprintf ppf "%a, %a -> %a%s" State.print q QNameSet.print lab SFormula.print f sep) l + end + + + +type node_summary = int +let dummy_summary = -1 +(* +4444444444443210 +4 -> kind +3 -> is_left +2 -> is_right +1 -> has_left +0 -> has_right +*) + +let has_right (s : node_summary) : bool = + Obj.magic (s land 1) +let has_left (s : node_summary) : bool = + Obj.magic ((s lsr 1) land 1) + +let is_right (s : node_summary) : bool = + Obj.magic ((s lsr 2) land 1) + +let is_left (s : node_summary) : bool = + Obj.magic ((s lsr 3) land 1) + +let kind (s : node_summary ) : Tree.NodeKind.t = + Obj.magic (s lsr 4) + +let node_summary is_left is_right has_left has_right kind = + ((Obj.magic kind) lsl 4) lor + ((Obj.magic is_left) lsl 3) lor + ((Obj.magic is_right) lsl 2) lor + ((Obj.magic has_left) lsl 1) lor + (Obj.magic has_right) + + + +type config = { + sat : StateSet.t; + unsat : StateSet.t; + todo : TransList.t; + summary : node_summary; +} + +module Config = Hcons.Make(struct + type t = config + let equal c d = + c == d || + c.sat == d.sat && + c.unsat == d.unsat && + c.todo == d.todo && + c.summary == d.summary + + let hash c = + HASHINT4((c.sat.StateSet.id :> int), + (c.unsat.StateSet.id :> int), + (c.todo.TransList.id :> int), + c.summary) +end +) + type t = { id : Uid.t; mutable states : StateSet.t; - mutable top_states : StateSet.t; - mutable bottom_states: StateSet.t; mutable selection_states: StateSet.t; transitions: (State.t, (QNameSet.t*SFormula.t) list) Hashtbl.t; + mutable cache2 : TransList.t Cache.N2.t; + mutable cache4 : Config.t Cache.N4.t; } +let next = Uid.make_maker () +let dummy2 = TransList.cons + (Transition.make (State.dummy,QNameSet.empty, SFormula.false_)) + TransList.nil + + + +let dummy_config = + Config.make { sat = StateSet.empty; + unsat = StateSet.empty; + todo = TransList.nil; + summary = dummy_summary + } + + +let create s ss = + let auto = { id = next (); + states = s; + selection_states = ss; + transitions = Hashtbl.create 17; + cache2 = Cache.N2.create dummy2; + cache4 = Cache.N4.create dummy_config; + } + in + at_exit (fun () -> + let n4 = ref 0 in + let n2 = ref 0 in + Cache.N2.iteri (fun _ _ _ b -> if b then incr n2) auto.cache2; + Cache.N4.iteri (fun _ _ _ _ _ b -> if b then incr n4) auto.cache4; + Logger.msg `STATS "automaton %i, cache2: %i entries, cache6: %i entries" + (auto.id :> int) !n2 !n4; + let c2l, c2u = Cache.N2.stats auto.cache2 in + let c4l, c4u = Cache.N4.stats auto.cache4 in + Logger.msg `STATS + "cache2: length: %i, used: %i, occupation: %f" + c2l c2u (float c2u /. float c2l); + Logger.msg `STATS + "cache4: length: %i, used: %i, occupation: %f" + c4l c4u (float c4u /. float c4l) + + ); + auto + +let reset a = + a.cache4 <- Cache.N4.create (Cache.N4.dummy a.cache4) + +let full_reset a = + reset a; + a.cache2 <- Cache.N2.create (Cache.N2.dummy a.cache2) -let next = Uid.make_maker () -let create () = { id = next (); - states = StateSet.empty; - top_states = StateSet.empty; - bottom_states = StateSet.empty; - selection_states = StateSet.empty; - transitions = Hashtbl.create 17; - } +let get_trans_aux a tag states = + StateSet.fold (fun q acc0 -> + try + let trs = Hashtbl.find a.transitions q in + List.fold_left (fun acc1 (labs, phi) -> + if QNameSet.mem tag labs then TransList.cons (Transition.make (q, labs, phi)) acc1 else acc1) acc0 trs + with Not_found -> acc0 + ) states TransList.nil + + +let get_trans a tag states = + let trs = + Cache.N2.find a.cache2 + (tag.QName.id :> int) (states.StateSet.id :> int) + in + if trs == dummy2 then + let trs = get_trans_aux a tag states in + (Cache.N2.add + a.cache2 + (tag.QName.id :> int) + (states.StateSet.id :> int) trs; trs) + else trs + +let simplify_atom atom pos q { Config.node=config; _ } = + if (pos && StateSet.mem q config.sat) + || ((not pos) && StateSet.mem q config.unsat) then SFormula.true_ + else if (pos && StateSet.mem q config.unsat) + || ((not pos) && StateSet.mem q config.sat) then SFormula.false_ + else atom + +let eval_form phi fcs nss ps ss summary = + let rec loop phi = + begin match SFormula.expr phi with + Formula.True | Formula.False -> phi + | Formula.Atom a -> + let p, b, q = Atom.node a in begin + match p with + | First_child -> simplify_atom phi b q fcs + | Next_sibling -> simplify_atom phi b q nss + | Parent | Previous_sibling -> simplify_atom phi b q ps + | Stay -> simplify_atom phi b q ss + | Is_first_child -> SFormula.of_bool (b == (is_left summary)) + | Is_next_sibling -> SFormula.of_bool (b == (is_right summary)) + | Is k -> SFormula.of_bool (b == (k == (kind summary))) + | Has_first_child -> SFormula.of_bool (b == (has_left summary)) + | Has_next_sibling -> SFormula.of_bool (b == (has_right summary)) + end + | Formula.And(phi1, phi2) -> SFormula.and_ (loop phi1) (loop phi2) + | Formula.Or (phi1, phi2) -> SFormula.or_ (loop phi1) (loop phi2) + end + in + loop phi + + + +let eval_trans auto fcs nss ps ss = + let fcsid = (fcs.Config.id :> int) in + let nssid = (nss.Config.id :> int) in + let psid = (ps.Config.id :> int) in + let rec loop old_config = + let oid = (old_config.Config.id :> int) in + let res = + let res = Cache.N4.find auto.cache4 oid fcsid nssid psid in + if res != dummy_config then res + else + let { sat = old_sat; + unsat = old_unsat; + todo = old_todo; + summary = old_summary } = old_config.Config.node + in + let sat, unsat, removed, kept, todo = + TransList.fold + (fun trs acc -> + let q, lab, phi = Transition.node trs in + let a_sat, a_unsat, a_rem, a_kept, a_todo = acc in + if StateSet.mem q a_sat || StateSet.mem q a_unsat then acc else + let new_phi = + eval_form phi fcs nss ps old_config old_summary + in + if SFormula.is_true new_phi then + StateSet.add q a_sat, a_unsat, StateSet.add q a_rem, a_kept, a_todo + else if SFormula.is_false new_phi then + a_sat, StateSet.add q a_unsat, StateSet.add q a_rem, a_kept, a_todo + else + let new_tr = Transition.make (q, lab, new_phi) in + (a_sat, a_unsat, a_rem, StateSet.add q a_kept, (TransList.cons new_tr a_todo)) + ) old_todo (old_sat, old_unsat, StateSet.empty, StateSet.empty, TransList.nil) + in + (* States that have been removed from the todo list and not kept are now + unsatisfiable *) + let unsat = StateSet.union unsat (StateSet.diff removed kept) in + (* States that were found once to be satisfiable remain so *) + let unsat = StateSet.diff unsat sat in + let new_config = Config.make { old_config.Config.node with sat; unsat; todo; } in + Cache.N4.add auto.cache4 oid fcsid nssid psid new_config; + new_config + in + if res == old_config then res else loop res + in + loop ss + +(* + [add_trans a q labels f] adds a transition [(q,labels) -> f] to the + automaton [a] but ensures that transitions remains pairwise disjoint +*) let add_trans a q s f = let trs = try Hashtbl.find a.transitions q with Not_found -> [] in - let rem, ntrs = - List.fold_left (fun (rem, atrs) ((labs, phi) as tr) -> - let nlabs = QNameSet.inter labs rem in - if QNameSet.is_empty nlabs then - (rem, tr :: atrs) - else - let nrem = QNameSet.diff rem labs in - nrem, (nlabs, SFormula.or_ phi f)::atrs - ) (s, []) trs + let cup, ntrs = + List.fold_left (fun (acup, atrs) (labs, phi) -> + let lab1 = QNameSet.inter labs s in + let lab2 = QNameSet.diff labs s in + let tr1 = + if QNameSet.is_empty lab1 then [] + else [ (lab1, SFormula.or_ phi f) ] + in + let tr2 = + if QNameSet.is_empty lab2 then [] + else [ (lab2, SFormula.or_ phi f) ] + in + (QNameSet.union acup labs, tr1@ tr2 @ atrs) + ) (QNameSet.empty, []) trs in + let rem = QNameSet.diff s cup in let ntrs = if QNameSet.is_empty rem then ntrs else (rem, f) :: ntrs in Hashtbl.replace a.transitions q ntrs +let _pr_buff = Buffer.create 50 +let _str_fmt = formatter_of_buffer _pr_buff +let _flush_str_fmt () = pp_print_flush _str_fmt (); + let s = Buffer.contents _pr_buff in + Buffer.clear _pr_buff; s let print fmt a = fprintf fmt - "Unique ID: %i@\n\ - States %a@\n\ - Top states: %a@\n\ - Bottom states: %a@\n\ + "Internal UID: %i@\n\ + States: %a@\n\ Selection states: %a@\n\ Alternating transitions:@\n" (a.id :> int) StateSet.print a.states - StateSet.print a.top_states - StateSet.print a.bottom_states StateSet.print a.selection_states; let trs = Hashtbl.fold @@ -139,22 +425,133 @@ let print fmt a = a.transitions [] in - let sorted_trs = List.stable_sort (fun (q1, s1, phi1) (q2, s2, phi2) -> + let sorted_trs = List.stable_sort (fun (q1, s1, _) (q2, s2, _) -> let c = State.compare q1 q2 in - (if c == 0 then QNameSet.compare s1 s2 else c)) trs in - let sfmt = str_formatter in - let _ = flush_str_formatter () in - let strs_strings, maxs = List.fold_left (fun (accl, accm) (q, s, f) -> - let s1 = State.print sfmt q; flush_str_formatter () in - let s2 = QNameSet.print sfmt s; flush_str_formatter () in - let s3 = SFormula.print sfmt f; flush_str_formatter () in - ( (s1, s2, s3) :: accl, - max - accm (2 + String.length s1 + String.length s2)) - ) ([], 0) sorted_trs + let _ = _flush_str_fmt () in + let strs_strings, max_pre, max_all = List.fold_left (fun (accl, accp, acca) (q, s, f) -> + let s1 = State.print _str_fmt q; _flush_str_fmt () in + let s2 = QNameSet.print _str_fmt s; _flush_str_fmt () in + let s3 = SFormula.print _str_fmt f; _flush_str_fmt () in + let pre = Pretty.length s1 + Pretty.length s2 in + let all = Pretty.length s3 in + ( (q, s1, s2, s3) :: accl, max accp pre, max acca all) + ) ([], 0, 0) sorted_trs in - List.iter (fun (s1, s2, s3) -> + let line = Pretty.line (max_all + max_pre + 6) in + let prev_q = ref State.dummy in + fprintf fmt "%s@\n" line; + List.iter (fun (q, s1, s2, s3) -> + if !prev_q != q && !prev_q != State.dummy then fprintf fmt "%s@\n" line; + prev_q := q; fprintf fmt "%s, %s" s1 s2; - fprintf fmt "%s" (Pretty.padding (maxs - String.length s1 - String.length s2 - 2)); - fprintf fmt "%s %s@\n" Pretty.right_arrow s3) strs_strings + fprintf fmt "%s" (Pretty.padding (max_pre - Pretty.length s1 - Pretty.length s2)); + fprintf fmt " %s %s@\n" Pretty.right_arrow s3; + ) strs_strings; + fprintf fmt "%s@\n" line + +(* + [complete transitions a] ensures that for each state q + and each symbols s in the alphabet, a transition q, s exists. + (adding q, s -> F when necessary). +*) + +let complete_transitions a = + StateSet.iter (fun q -> + let qtrans = Hashtbl.find a.transitions q in + let rem = + List.fold_left (fun rem (labels, _) -> + QNameSet.diff rem labels) QNameSet.any qtrans + in + let nqtrans = + if QNameSet.is_empty rem then qtrans + else + (rem, SFormula.false_) :: qtrans + in + Hashtbl.replace a.transitions q nqtrans + ) a.states + +let cleanup_states a = + let memo = ref StateSet.empty in + let rec loop q = + if not (StateSet.mem q !memo) then begin + memo := StateSet.add q !memo; + let trs = try Hashtbl.find a.transitions q with Not_found -> [] in + List.iter (fun (_, phi) -> + StateSet.iter loop (SFormula.get_states phi)) trs + end + in + StateSet.iter loop a.selection_states; + let unused = StateSet.diff a.states !memo in + StateSet.iter (fun q -> Hashtbl.remove a.transitions q) unused; + a.states <- !memo + +(* [normalize_negations a] removes negative atoms in the formula + complementing the sub-automaton in the negative states. + [TODO check the meaning of negative upward arrows] +*) + +let normalize_negations auto = + let memo_state = Hashtbl.create 17 in + let todo = Queue.create () in + let rec flip b f = + match SFormula.expr f with + Formula.True | Formula.False -> if b then f else SFormula.not_ f + | Formula.Or(f1, f2) -> (if b then SFormula.or_ else SFormula.and_)(flip b f1) (flip b f2) + | Formula.And(f1, f2) -> (if b then SFormula.and_ else SFormula.or_)(flip b f1) (flip b f2) + | Formula.Atom(a) -> begin + let l, b', q = Atom.node a in + if q == State.dummy then if b then f else SFormula.not_ f + else + if b == b' then begin + (* a appears positively, either no negation or double negation *) + if not (Hashtbl.mem memo_state (q,b)) then Queue.add (q,true) todo; + SFormula.atom_ (Atom.make (l, true, q)) + end else begin + (* need to reverse the atom + either we have a positive state deep below a negation + or we have a negative state in a positive formula + b' = sign of the state + b = sign of the enclosing formula + *) + let not_q = + try + (* does the inverted state of q exist ? *) + Hashtbl.find memo_state (q, false) + with + Not_found -> + (* create a new state and add it to the todo queue *) + let nq = State.make () in + auto.states <- StateSet.add nq auto.states; + Hashtbl.add memo_state (q, false) nq; + Queue.add (q, false) todo; nq + in + SFormula.atom_ (Atom.make (l, true, not_q)) + end + end + in + (* states that are not reachable from a selection stat are not interesting *) + StateSet.iter (fun q -> Queue.add (q, true) todo) auto.selection_states; + + while not (Queue.is_empty todo) do + let (q, b) as key = Queue.pop todo in + let q' = + try + Hashtbl.find memo_state key + with + Not_found -> + let nq = if b then q else + let nq = State.make () in + auto.states <- StateSet.add nq auto.states; + nq + in + Hashtbl.add memo_state key nq; nq + in + let trans = Hashtbl.find auto.transitions q in + let trans' = List.map (fun (lab, f) -> lab, flip b f) trans in + Hashtbl.replace auto.transitions q' trans'; + done; + cleanup_states auto + +