X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=blobdiff_plain;f=src%2Fata.ml;h=a4ea306d87267dbd91713ca53e5e275effbf06c9;hp=681ce69d12f21c944609f424837c4cad7ec9c050;hb=da84ba92e452d4d2a3eaf89f7638db25a7f84909;hpb=d2f5764e9f0e89f485d6237d07cbe9b04b7426ae diff --git a/src/ata.ml b/src/ata.ml index 681ce69..a4ea306 100644 --- a/src/ata.ml +++ b/src/ata.ml @@ -1,77 +1,554 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2013 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +INCLUDE "utils.ml" open Format +open Misc +type move = [ `First_child + | `Next_sibling + | `Parent + | `Previous_sibling + | `Stay ] + +type predicate = Move of move * State.t + | Is_first_child + | Is_next_sibling + | Is of Tree.NodeKind.t + | Has_first_child + | Has_next_sibling + +module Atom = +struct + + module Node = + struct + type t = predicate + let equal n1 n2 = n1 = n2 + let hash n = Hashtbl.hash n + end + + include Hcons.Make(Node) + + let print ppf a = + match a.node with + | Move (m, q) -> begin + match m with + `First_child -> fprintf ppf "%s" Pretty.down_arrow + | `Next_sibling -> fprintf ppf "%s" Pretty.right_arrow + | `Parent -> fprintf ppf "%s" Pretty.up_arrow + | `Previous_sibling -> fprintf ppf "%s" Pretty.left_arrow + | `Stay -> fprintf ppf "%s" Pretty.bullet + end; + fprintf ppf "%a" State.print q + | Is_first_child -> fprintf ppf "%s?" Pretty.up_arrow + | Is_next_sibling -> fprintf ppf "%s?" Pretty.left_arrow + | Is k -> fprintf ppf "is-%a?" Tree.NodeKind.print k + | Has_first_child -> fprintf ppf "%s?" Pretty.down_arrow + | Has_next_sibling -> fprintf ppf "%s?" Pretty.right_arrow + +end + + +module Formula = +struct + include Boolean.Make(Atom) + open Tree.NodeKind + let mk_atom a = atom_ (Atom.make a) + let is k = mk_atom (Is k) + + let has_first_child = mk_atom Has_first_child + + let has_next_sibling = mk_atom Has_next_sibling + + let is_first_child = mk_atom Is_first_child + + let is_next_sibling = mk_atom Is_next_sibling + + let is_attribute = mk_atom (Is Attribute) + + let is_element = mk_atom (Is Element) + + let is_processing_instruction = mk_atom (Is ProcessingInstruction) + + let is_comment = mk_atom (Is Comment) + + let mk_move m q = mk_atom (Move(m,q)) + let first_child q = + and_ + (mk_move `First_child q) + has_first_child + + let next_sibling q = + and_ + (mk_move `Next_sibling q) + has_next_sibling + + let parent q = + and_ + (mk_move `Parent q) + is_first_child + + let previous_sibling q = + and_ + (mk_move `Previous_sibling q) + is_next_sibling + + let stay q = mk_move `Stay q + + let get_states phi = + fold (fun phi acc -> + match expr phi with + | Boolean.Atom ({ Atom.node = Move(_,q) ; _ }, _) -> StateSet.add q acc + | _ -> acc + ) phi StateSet.empty + +end + +module Transition = + struct + include Hcons.Make (struct + type t = State.t * QNameSet.t * Formula.t + let equal (a, b, c) (d, e, f) = + a == d && b == e && c == f + let hash (a, b, c) = + HASHINT4 (PRIME1, a, ((QNameSet.uid b) :> int), ((Formula.uid c) :> int)) +end) + let print ppf t = + let q, l, f = t.node in + fprintf ppf "%a, %a %s %a" + State.print q + QNameSet.print l + Pretty.double_right_arrow + Formula.print f + end + + +module TransList : sig + include Hlist.S with type elt = Transition.t + val print : Format.formatter -> ?sep:string -> t -> unit +end = + struct + include Hlist.Make(Transition) + let print ppf ?(sep="\n") l = + iter (fun t -> + let q, lab, f = Transition.node t in + fprintf ppf "%a, %a -> %a%s" State.print q QNameSet.print lab Formula.print f sep) l + end + + type t = { id : Uid.t; mutable states : StateSet.t; - mutable top_states : StateSet.t; - mutable bottom_states: StateSet.t; - mutable selection_states: StateSet.t; + mutable starting_states : StateSet.t; + mutable selecting_states: StateSet.t; transitions: (State.t, (QNameSet.t*Formula.t) list) Hashtbl.t; } -let next = Uid.make_maker () - -let create () = { id = next (); - states = StateSet.empty; - top_states = StateSet.empty; - bottom_states = StateSet.empty; - selection_states = StateSet.empty; - transitions = Hashtbl.create 17; - } - -let add_trans a q s f = - let trs = try Hashtbl.find a.transitions q with Not_found -> [] in - let rem, ntrs = - List.fold_left (fun (rem, atrs) ((labs, phi) as tr) -> - let nlabs = QNameSet.inter labs rem in - if QNameSet.is_empty nlabs then - (rem, tr :: atrs) - else - let nrem = QNameSet.diff rem labs in - nrem, (nlabs, Formula.or_ phi f)::atrs - ) (s, []) trs - in - let ntrs = if QNameSet.is_empty rem then ntrs - else (rem, f) :: ntrs - in - Hashtbl.replace a.transitions q ntrs +let uid t = t.id + +let get_states a = a.states +let get_starting_states a = a.starting_states +let get_selecting_states a = a.selecting_states + +let _pr_buff = Buffer.create 50 +let _str_fmt = formatter_of_buffer _pr_buff +let _flush_str_fmt () = pp_print_flush _str_fmt (); + let s = Buffer.contents _pr_buff in + Buffer.clear _pr_buff; s let print fmt a = + let _ = _flush_str_fmt() in fprintf fmt - "Unique ID: %i@\n\ - States %a@\n\ - Top states: %a@\n\ - Bottom states: %a@\n\ + "Internal UID: %i@\n\ + States: %a@\n\ + Number of states: %i@\n\ + Starting states: %a@\n\ Selection states: %a@\n\ Alternating transitions:@\n" (a.id :> int) StateSet.print a.states - StateSet.print a.top_states - StateSet.print a.bottom_states - StateSet.print a.selection_states; + (StateSet.cardinal a.states) + StateSet.print a.starting_states + StateSet.print a.selecting_states; let trs = Hashtbl.fold (fun q t acc -> List.fold_left (fun acc (s , f) -> (q,s,f)::acc) acc t) a.transitions [] in - let sorted_trs = List.stable_sort (fun (q1, s1, phi1) (q2, s2, phi2) -> + let sorted_trs = List.stable_sort (fun (q1, s1, _) (q2, s2, _) -> let c = State.compare q1 q2 in - (if c == 0 then QNameSet.compare s1 s2 else c)) trs in - let sfmt = str_formatter in - let _ = flush_str_formatter () in - let strs_strings, maxs = List.fold_left (fun (accl, accm) (q, s, f) -> - let s1 = State.print sfmt q; flush_str_formatter () in - let s2 = QNameSet.print sfmt s; flush_str_formatter () in - let s3 = Formula.print sfmt f; flush_str_formatter () in - ( (s1, s2, s3) :: accl, - max - accm (2 + String.length s1 + String.length s2)) - ) ([], 0) sorted_trs + let _ = _flush_str_fmt () in + let strs_strings, max_pre, max_all = List.fold_left (fun (accl, accp, acca) (q, s, f) -> + let s1 = State.print _str_fmt q; _flush_str_fmt () in + let s2 = QNameSet.print _str_fmt s; _flush_str_fmt () in + let s3 = Formula.print _str_fmt f; _flush_str_fmt () in + let pre = Pretty.length s1 + Pretty.length s2 in + let all = Pretty.length s3 in + ( (q, s1, s2, s3) :: accl, max accp pre, max acca all) + ) ([], 0, 0) sorted_trs in - List.iter (fun (s1, s2, s3) -> + let line = Pretty.line (max_all + max_pre + 6) in + let prev_q = ref State.dummy in + fprintf fmt "%s@\n" line; + List.iter (fun (q, s1, s2, s3) -> + if !prev_q != q && !prev_q != State.dummy then fprintf fmt "%s@\n" line; + prev_q := q; fprintf fmt "%s, %s" s1 s2; - fprintf fmt "%s" (Pretty.padding (maxs - String.length s1 - String.length s2 - 2)); - fprintf fmt "%s %s@\n" Pretty.right_arrow s3) strs_strings + fprintf fmt "%s" (Pretty.padding (max_pre - Pretty.length s1 - Pretty.length s2)); + fprintf fmt " %s %s@\n" Pretty.right_arrow s3; + ) strs_strings; + fprintf fmt "%s@\n" line + + +let get_trans a tag states = + StateSet.fold (fun q acc0 -> + try + let trs = Hashtbl.find a.transitions q in + List.fold_left (fun acc1 (labs, phi) -> + if QNameSet.mem tag labs then + TransList.cons (Transition.make (q, labs, phi)) acc1 + else acc1) acc0 trs + with Not_found -> acc0 + ) states TransList.nil + + +let get_form a tag q = + try + let trs = Hashtbl.find a.transitions q in + List.fold_left (fun aphi (labs, phi) -> + if QNameSet.mem tag labs then Formula.or_ aphi phi else aphi + ) Formula.false_ trs + with + Not_found -> Formula.false_ + +(* + [complete transitions a] ensures that for each state q + and each symbols s in the alphabet, a transition q, s exists. + (adding q, s -> F when necessary). +*) + +let complete_transitions a = + StateSet.iter (fun q -> + if StateSet.mem q a.starting_states then () + else + let qtrans = Hashtbl.find a.transitions q in + let rem = + List.fold_left (fun rem (labels, _) -> + QNameSet.diff rem labels) QNameSet.any qtrans + in + let nqtrans = + if QNameSet.is_empty rem then qtrans + else + (rem, Formula.false_) :: qtrans + in + Hashtbl.replace a.transitions q nqtrans + ) a.states + +(* [cleanup_states] remove states that do not lead to a + selecting states *) + +let cleanup_states a = + let memo = ref StateSet.empty in + let rec loop q = + if not (StateSet.mem q !memo) then begin + memo := StateSet.add q !memo; + let trs = try Hashtbl.find a.transitions q with Not_found -> [] in + List.iter (fun (_, phi) -> + StateSet.iter loop (Formula.get_states phi)) trs + end + in + StateSet.iter loop a.selecting_states; + let unused = StateSet.diff a.states !memo in + StateSet.iter (fun q -> Hashtbl.remove a.transitions q) unused; + a.states <- !memo + +(* [normalize_negations a] removes negative atoms in the formula + complementing the sub-automaton in the negative states. + [TODO check the meaning of negative upward arrows] +*) + +let normalize_negations auto = + let memo_state = Hashtbl.create 17 in + let todo = Queue.create () in + let rec flip b f = + match Formula.expr f with + Boolean.True | Boolean.False -> if b then f else Formula.not_ f + | Boolean.Or(f1, f2) -> (if b then Formula.or_ else Formula.and_)(flip b f1) (flip b f2) + | Boolean.And(f1, f2) -> (if b then Formula.and_ else Formula.or_)(flip b f1) (flip b f2) + | Boolean.Atom(a, b') -> begin + match a.Atom.node with + | Move (m, q) -> + if b == b' then begin + (* a appears positively, either no negation or double negation *) + if not (Hashtbl.mem memo_state (q,b)) then Queue.add (q,true) todo; + Formula.mk_atom (Move(m, q)) + end else begin + (* need to reverse the atom + either we have a positive state deep below a negation + or we have a negative state in a positive formula + b' = sign of the state + b = sign of the enclosing formula + *) + let not_q = + try + (* does the inverted state of q exist ? *) + Hashtbl.find memo_state (q, false) + with + Not_found -> + (* create a new state and add it to the todo queue *) + let nq = State.make () in + auto.states <- StateSet.add nq auto.states; + Hashtbl.add memo_state (q, false) nq; + Queue.add (q, false) todo; nq + in + Formula.mk_atom (Move (m,not_q)) + end + | _ -> if b then f else Formula.not_ f + end + in + (* states that are not reachable from a selection stat are not interesting *) + StateSet.iter (fun q -> Queue.add (q, true) todo) auto.selecting_states; + + while not (Queue.is_empty todo) do + let (q, b) as key = Queue.pop todo in + let q' = + try + Hashtbl.find memo_state key + with + Not_found -> + let nq = if b then q else + let nq = State.make () in + auto.states <- StateSet.add nq auto.states; + nq + in + Hashtbl.add memo_state key nq; nq + in + let trans = try Hashtbl.find auto.transitions q with Not_found -> [] in + let trans' = List.map (fun (lab, f) -> lab, flip b f) trans in + Hashtbl.replace auto.transitions q' trans'; + done; + cleanup_states auto + + + + + +module Builder = + struct + type auto = t + type t = auto + let next = Uid.make_maker () + + let make () = + let auto = + { + id = next (); + states = StateSet.empty; + starting_states = StateSet.empty; + selecting_states = StateSet.empty; + transitions = Hashtbl.create MED_H_SIZE; + } + in + (* + at_exit (fun () -> + let n4 = ref 0 in + let n2 = ref 0 in + Cache.N2.iteri (fun _ _ _ b -> if b then incr n2) auto.cache2; + Cache.N4.iteri (fun _ _ _ _ _ b -> if b then incr n4) auto.cache4; + Logger.msg `STATS "automaton %i, cache2: %i entries, cache6: %i entries" + (auto.id :> int) !n2 !n4; + let c2l, c2u = Cache.N2.stats auto.cache2 in + let c4l, c4u = Cache.N4.stats auto.cache4 in + Logger.msg `STATS + "cache2: length: %i, used: %i, occupation: %f" + c2l c2u (float c2u /. float c2l); + Logger.msg `STATS + "cache4: length: %i, used: %i, occupation: %f" + c4l c4u (float c4u /. float c4l) + + ); *) + auto + + let add_state a ?(starting=false) ?(selecting=false) q = + a.states <- StateSet.add q a.states; + if starting then a.starting_states <- StateSet.add q a.starting_states; + if selecting then a.selecting_states <- StateSet.add q a.selecting_states + + let add_trans a q s f = + if not (StateSet.mem q a.states) then add_state a q; + let trs = try Hashtbl.find a.transitions q with Not_found -> [] in + let cup, ntrs = + List.fold_left (fun (acup, atrs) (labs, phi) -> + let lab1 = QNameSet.inter labs s in + let lab2 = QNameSet.diff labs s in + let tr1 = + if QNameSet.is_empty lab1 then [] + else [ (lab1, Formula.or_ phi f) ] + in + let tr2 = + if QNameSet.is_empty lab2 then [] + else [ (lab2, Formula.or_ phi f) ] + in + (QNameSet.union acup labs, tr1@ tr2 @ atrs) + ) (QNameSet.empty, []) trs + in + let rem = QNameSet.diff s cup in + let ntrs = if QNameSet.is_empty rem then ntrs + else (rem, f) :: ntrs + in + Hashtbl.replace a.transitions q ntrs + + let finalize a = + complete_transitions a; + normalize_negations a; + a + end + + +let map_set f s = + StateSet.fold (fun q a -> StateSet.add (f q) a) s StateSet.empty + +let map_hash fk fv h = + let h' = Hashtbl.create (Hashtbl.length h) in + let () = Hashtbl.iter (fun k v -> Hashtbl.add h' (fk k) (fv v)) h in + h' + +let rec map_form f phi = + match Formula.expr phi with + | Boolean.Or(phi1, phi2) -> Formula.or_ (map_form f phi1) (map_form f phi2) + | Boolean.And(phi1, phi2) -> Formula.and_ (map_form f phi1) (map_form f phi2) + | Boolean.Atom({ Atom.node = Move(m,q); _}, b) -> + let a = Formula.mk_atom (Move (m,f q)) in + if b then a else Formula.not_ a + | _ -> phi + +let rename_states mapper a = + let rename q = try Hashtbl.find mapper q with Not_found -> q in + { Builder.make () with + states = map_set rename a.states; + starting_states = map_set rename a.starting_states; + selecting_states = map_set rename a.selecting_states; + transitions = + map_hash + rename + (fun l -> + (List.map (fun (labels, form) -> (labels, map_form rename form)) l)) + a.transitions; + } + +let copy a = + let mapper = Hashtbl.create MED_H_SIZE in + let () = + StateSet.iter (fun q -> Hashtbl.add mapper q (State.make())) a.states + in + rename_states mapper a + + +let concat a1 a2 = + let a1 = copy a1 in + let a2 = copy a2 in + let link_phi = + StateSet.fold + (fun q phi -> Formula.(or_ (stay q) phi)) + a1.selecting_states Formula.false_ + in + Hashtbl.iter (fun q trs -> Hashtbl.add a1.transitions q trs) + a2.transitions; + StateSet.iter + (fun q -> + Hashtbl.replace a1.transitions q [(QNameSet.any, link_phi)]) + a2.starting_states; + { a1 with + states = StateSet.union a1.states a2.states; + selecting_states = a2.selecting_states; + transitions = a1.transitions; + } + +let merge a1 a2 = + let a1 = copy a1 in + let a2 = copy a2 in + { a1 with + states = StateSet.union a1.states a2.states; + selecting_states = StateSet.union a1.selecting_states a2.selecting_states; + starting_states = StateSet.union a1.starting_states a2.starting_states; + transitions = + let () = + Hashtbl.iter (fun k v -> Hashtbl.add a1.transitions k v) a2.transitions + in + a1.transitions + } + + +let link a1 a2 q link_phi = + { a1 with + states = StateSet.union a1.states a2.states; + selecting_states = StateSet.singleton q; + starting_states = StateSet.union a1.starting_states a2.starting_states; + transitions = + let () = + Hashtbl.iter (fun k v -> Hashtbl.add a1.transitions k v) a2.transitions + in + Hashtbl.add a1.transitions q [(QNameSet.any, link_phi)]; + a1.transitions + } + +let union a1 a2 = + let a1 = copy a1 in + let a2 = copy a2 in + let q = State.make () in + let link_phi = + StateSet.fold + (fun q phi -> Formula.(or_ (stay q) phi)) + (StateSet.union a1.selecting_states a2.selecting_states) + Formula.false_ + in + link a1 a2 q link_phi + +let inter a1 a2 = + let a1 = copy a1 in + let a2 = copy a2 in + let q = State.make () in + let link_phi = + StateSet.fold + (fun q phi -> Formula.(and_ (stay q) phi)) + (StateSet.union a1.selecting_states a2.selecting_states) + Formula.true_ + in + link a1 a2 q link_phi + +let neg a = + let a = copy a in + let q = State.make () in + let link_phi = + StateSet.fold + (fun q phi -> Formula.(and_ (not_(stay q)) phi)) + a.selecting_states + Formula.true_ + in + let () = Hashtbl.add a.transitions q [(QNameSet.any, link_phi)] in + let a = + { a with + selecting_states = StateSet.singleton q; + } + in + normalize_negations a; a + +let diff a1 a2 = inter a1 (neg a2) +