X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=blobdiff_plain;f=src%2Fata.ml;h=80843e6deae7aafb8860db5a3b8bce7125968ed5;hp=cad28e16f8774accaacb703969ba330ed62db316;hb=78d247dc5e6d5e64a4ab848702c23ce81b6fc615;hpb=6b66008811639324be623a42037b60e02056772c diff --git a/src/ata.ml b/src/ata.ml index cad28e1..80843e6 100644 --- a/src/ata.ml +++ b/src/ata.ml @@ -13,84 +13,670 @@ (* *) (***********************************************************************) -(* - Time-stamp: -*) - +INCLUDE "utils.ml" open Format +open Misc +type move = [ `First_child + | `Next_sibling + | `Parent + | `Previous_sibling + | `Stay ] + +module Move = + struct + type t = move + type 'a table = 'a array + let idx = function + | `First_child -> 0 + | `Next_sibling -> 1 + | `Parent -> 2 + | `Previous_sibling -> 3 + | `Stay -> 4 + let ridx = function + | 0 -> `First_child + | 1 -> `Next_sibling + | 2 -> `Parent + | 3 -> `Previous_sibling + | 4 -> `Stay + | _ -> assert false + + let create_table a = Array.make 5 a + let get m k = m.(idx k) + let set m k v = m.(idx k) <- v + let iter f m = Array.iteri (fun i v -> f (ridx i) v) m + let fold f m acc = + let acc = ref acc in + iter (fun i v -> acc := f i v !acc) m; + !acc + let for_all p m = + try + iter (fun i v -> if not (p i v) then raise Exit) m; + true + with + Exit -> false + let for_all2 p m1 m2 = + try + for i = 0 to 4 do + let v1 = m1.(i) + and v2 = m2.(i) in + if not (p (ridx i) v1 v2) then raise Exit + done; + true + with + Exit -> false + + let exists p m = + try + iter (fun i v -> if p i v then raise Exit) m; + false + with + Exit -> true + let print ppf m = + match m with + `First_child -> fprintf ppf "%s" Pretty.down_arrow + | `Next_sibling -> fprintf ppf "%s" Pretty.right_arrow + | `Parent -> fprintf ppf "%s" Pretty.up_arrow + | `Previous_sibling -> fprintf ppf "%s" Pretty.left_arrow + | `Stay -> fprintf ppf "%s" Pretty.bullet + + let print_table pr_e ppf m = + iter (fun i v -> fprintf ppf "%a: %a" print i pr_e v; + if (idx i) < 4 then fprintf ppf ", ") m + end + +type predicate = Move of move * State.t + | Is_first_child + | Is_next_sibling + | Is of Tree.NodeKind.t + | Has_first_child + | Has_next_sibling + +module Atom = +struct + + module Node = + struct + type t = predicate + let equal n1 n2 = n1 = n2 + let hash n = Hashtbl.hash n + end + + include Hcons.Make(Node) + + let print ppf a = + match a.node with + | Move (m, q) -> + fprintf ppf "%a%a" Move.print m State.print q + | Is_first_child -> fprintf ppf "%s?" Pretty.up_arrow + | Is_next_sibling -> fprintf ppf "%s?" Pretty.left_arrow + | Is k -> fprintf ppf "is-%a?" Tree.NodeKind.print k + | Has_first_child -> fprintf ppf "%s?" Pretty.down_arrow + | Has_next_sibling -> fprintf ppf "%s?" Pretty.right_arrow + +end + + +module Formula = +struct + include Boolean.Make(Atom) + open Tree.NodeKind + let mk_atom a = atom_ (Atom.make a) + let is k = mk_atom (Is k) + + let has_first_child = mk_atom Has_first_child + + let has_next_sibling = mk_atom Has_next_sibling + + let is_first_child = mk_atom Is_first_child + + let is_next_sibling = mk_atom Is_next_sibling + + let is_attribute = mk_atom (Is Attribute) + + let is_element = mk_atom (Is Element) + + let is_processing_instruction = mk_atom (Is ProcessingInstruction) + + let is_comment = mk_atom (Is Comment) + + let mk_move m q = mk_atom (Move(m,q)) + let first_child q = + and_ + (mk_move `First_child q) + has_first_child + + let next_sibling q = + and_ + (mk_move `Next_sibling q) + has_next_sibling + + let parent q = + and_ + (mk_move `Parent q) + is_first_child + + let previous_sibling q = + and_ + (mk_move `Previous_sibling q) + is_next_sibling + + let stay q = mk_move `Stay q + + let get_states_by_move phi = + let table = Move.create_table StateSet.empty in + iter (fun phi -> + match expr phi with + | Boolean.Atom ({ Atom.node = Move(v,q) ; _ }, _) -> + let s = Move.get table v in + Move.set table v (StateSet.add q s) + | _ -> () + ) phi; + table + let get_states phi = + let table = get_states_by_move phi in + Move.fold (fun _ s acc -> StateSet.union s acc) table StateSet.empty + +end + +module Transition = + struct + include Hcons.Make (struct + type t = State.t * QNameSet.t * Formula.t + let equal (a, b, c) (d, e, f) = + a == d && b == e && c == f + let hash (a, b, c) = + HASHINT4 (PRIME1, a, ((QNameSet.uid b) :> int), ((Formula.uid c) :> int)) +end) + let print ppf t = + let q, l, f = t.node in + fprintf ppf "%a, %a %s %a" + State.print q + QNameSet.print l + Pretty.double_right_arrow + Formula.print f + end + + +module TransList : sig + include Hlist.S with type elt = Transition.t + val print : Format.formatter -> ?sep:string -> t -> unit +end = + struct + include Hlist.Make(Transition) + let print ppf ?(sep="\n") l = + iter (fun t -> + let q, lab, f = Transition.node t in + fprintf ppf "%a, %a -> %a%s" State.print q QNameSet.print lab Formula.print f sep) l + end + + type t = { id : Uid.t; mutable states : StateSet.t; - mutable top_states : StateSet.t; - mutable bottom_states: StateSet.t; - mutable selection_states: StateSet.t; + mutable starting_states : StateSet.t; + mutable selecting_states: StateSet.t; transitions: (State.t, (QNameSet.t*Formula.t) list) Hashtbl.t; + mutable ranked_states : StateSet.t array } -let next = Uid.make_maker () - -let create () = { id = next (); - states = StateSet.empty; - top_states = StateSet.empty; - bottom_states = StateSet.empty; - selection_states = StateSet.empty; - transitions = Hashtbl.create 17; - } - -let add_trans a q s f = - let trs = try Hashtbl.find a.transitions q with Not_found -> [] in - let rem, ntrs = - List.fold_left (fun (rem, atrs) ((labs, phi) as tr) -> - let nlabs = QNameSet.inter labs rem in - if QNameSet.is_empty nlabs then - (rem, tr :: atrs) - else - let nrem = QNameSet.diff rem labs in - nrem, (nlabs, Formula.or_ phi f)::atrs - ) (s, []) trs - in - let ntrs = if QNameSet.is_empty rem then ntrs - else (rem, f) :: ntrs - in - Hashtbl.replace a.transitions q ntrs +let uid t = t.id +let get_states a = a.states +let get_starting_states a = a.starting_states +let get_selecting_states a = a.selecting_states +let get_states_by_rank a = a.ranked_states +let get_max_rank a = Array.length a.ranked_states - 1 + +let _pr_buff = Buffer.create 50 +let _str_fmt = formatter_of_buffer _pr_buff +let _flush_str_fmt () = pp_print_flush _str_fmt (); + let s = Buffer.contents _pr_buff in + Buffer.clear _pr_buff; s let print fmt a = + let _ = _flush_str_fmt() in fprintf fmt - "Unique ID: %i@\n\ - States %a@\n\ - Top states: %a@\n\ - Bottom states: %a@\n\ + "Internal UID: %i@\n\ + States: %a@\n\ + Number of states: %i@\n\ + Starting states: %a@\n\ Selection states: %a@\n\ + Ranked states: %a@\n\ Alternating transitions:@\n" (a.id :> int) StateSet.print a.states - StateSet.print a.top_states - StateSet.print a.bottom_states - StateSet.print a.selection_states; + (StateSet.cardinal a.states) + StateSet.print a.starting_states + StateSet.print a.selecting_states + (let r = ref 0 in Pretty.print_array ~sep:", " (fun ppf s -> + fprintf ppf "%i:%a" !r StateSet.print s; incr r)) a.ranked_states; let trs = Hashtbl.fold (fun q t acc -> List.fold_left (fun acc (s , f) -> (q,s,f)::acc) acc t) a.transitions [] in - let sorted_trs = List.stable_sort (fun (q1, s1, phi1) (q2, s2, phi2) -> + let sorted_trs = List.stable_sort (fun (q1, s1, _) (q2, s2, _) -> let c = State.compare q1 q2 in - (if c == 0 then QNameSet.compare s1 s2 else c)) trs in - let sfmt = str_formatter in - let _ = flush_str_formatter () in - let strs_strings, maxs = List.fold_left (fun (accl, accm) (q, s, f) -> - let s1 = State.print sfmt q; flush_str_formatter () in - let s2 = QNameSet.print sfmt s; flush_str_formatter () in - let s3 = Formula.print sfmt f; flush_str_formatter () in - ( (s1, s2, s3) :: accl, - max - accm (2 + String.length s1 + String.length s2)) - ) ([], 0) sorted_trs + let _ = _flush_str_fmt () in + let strs_strings, max_pre, max_all = List.fold_left (fun (accl, accp, acca) (q, s, f) -> + let s1 = State.print _str_fmt q; _flush_str_fmt () in + let s2 = QNameSet.print _str_fmt s; _flush_str_fmt () in + let s3 = Formula.print _str_fmt f; _flush_str_fmt () in + let pre = Pretty.length s1 + Pretty.length s2 in + let all = Pretty.length s3 in + ( (q, s1, s2, s3) :: accl, max accp pre, max acca all) + ) ([], 0, 0) sorted_trs in - List.iter (fun (s1, s2, s3) -> + let line = Pretty.line (max_all + max_pre + 6) in + let prev_q = ref State.dummy in + fprintf fmt "%s@\n" line; + List.iter (fun (q, s1, s2, s3) -> + if !prev_q != q && !prev_q != State.dummy then fprintf fmt "%s@\n" line; + prev_q := q; fprintf fmt "%s, %s" s1 s2; - fprintf fmt "%s" (Pretty.padding (maxs - String.length s1 - String.length s2 - 2)); - fprintf fmt "%s %s@\n" Pretty.right_arrow s3) strs_strings + fprintf fmt "%s" (Pretty.padding (max_pre - Pretty.length s1 - Pretty.length s2)); + fprintf fmt " %s %s@\n" Pretty.right_arrow s3; + ) strs_strings; + fprintf fmt "%s@\n" line + + +let get_trans a tag states = + StateSet.fold (fun q acc0 -> + try + let trs = Hashtbl.find a.transitions q in + List.fold_left (fun acc1 (labs, phi) -> + if QNameSet.mem tag labs then + TransList.cons (Transition.make (q, labs, phi)) acc1 + else acc1) acc0 trs + with Not_found -> acc0 + ) states TransList.nil + + +let get_form a tag q = + try + let trs = Hashtbl.find a.transitions q in + List.fold_left (fun aphi (labs, phi) -> + if QNameSet.mem tag labs then Formula.or_ aphi phi else aphi + ) Formula.false_ trs + with + Not_found -> Formula.false_ + +(* + [complete transitions a] ensures that for each state q + and each symbols s in the alphabet, a transition q, s exists. + (adding q, s -> F when necessary). +*) + +let complete_transitions a = + StateSet.iter (fun q -> + if StateSet.mem q a.starting_states then () + else + let qtrans = Hashtbl.find a.transitions q in + let rem = + List.fold_left (fun rem (labels, _) -> + QNameSet.diff rem labels) QNameSet.any qtrans + in + let nqtrans = + if QNameSet.is_empty rem then qtrans + else + (rem, Formula.false_) :: qtrans + in + Hashtbl.replace a.transitions q nqtrans + ) a.states + +(* [cleanup_states] remove states that do not lead to a + selecting states *) + +let cleanup_states a = + let memo = ref StateSet.empty in + let rec loop q = + if not (StateSet.mem q !memo) then begin + memo := StateSet.add q !memo; + let trs = try Hashtbl.find a.transitions q with Not_found -> [] in + List.iter (fun (_, phi) -> + StateSet.iter loop (Formula.get_states phi)) trs + end + in + StateSet.iter loop a.selecting_states; + let unused = StateSet.diff a.states !memo in + StateSet.iter (fun q -> Hashtbl.remove a.transitions q) unused; + a.states <- !memo + +(* [normalize_negations a] removes negative atoms in the formula + complementing the sub-automaton in the negative states. + [TODO check the meaning of negative upward arrows] +*) + +let normalize_negations auto = + let memo_state = Hashtbl.create 17 in + let todo = Queue.create () in + let rec flip b f = + match Formula.expr f with + Boolean.True | Boolean.False -> if b then f else Formula.not_ f + | Boolean.Or(f1, f2) -> (if b then Formula.or_ else Formula.and_)(flip b f1) (flip b f2) + | Boolean.And(f1, f2) -> (if b then Formula.and_ else Formula.or_)(flip b f1) (flip b f2) + | Boolean.Atom(a, b') -> begin + match a.Atom.node with + | Move (m, q) -> + if b == b' then begin + (* a appears positively, either no negation or double negation *) + if not (Hashtbl.mem memo_state (q,b)) then Queue.add (q,true) todo; + Formula.mk_atom (Move(m, q)) + end else begin + (* need to reverse the atom + either we have a positive state deep below a negation + or we have a negative state in a positive formula + b' = sign of the state + b = sign of the enclosing formula + *) + let not_q = + try + (* does the inverted state of q exist ? *) + Hashtbl.find memo_state (q, false) + with + Not_found -> + (* create a new state and add it to the todo queue *) + let nq = State.make () in + auto.states <- StateSet.add nq auto.states; + Hashtbl.add memo_state (q, false) nq; + Queue.add (q, false) todo; nq + in + Formula.mk_atom (Move (m,not_q)) + end + | _ -> if b then f else Formula.not_ f + end + in + (* states that are not reachable from a selection stat are not interesting *) + StateSet.iter (fun q -> Queue.add (q, true) todo) auto.selecting_states; + + while not (Queue.is_empty todo) do + let (q, b) as key = Queue.pop todo in + if not (StateSet.mem q auto.starting_states) then + let q' = + try + Hashtbl.find memo_state key + with + Not_found -> + let nq = if b then q else + let nq = State.make () in + auto.states <- StateSet.add nq auto.states; + nq + in + Hashtbl.add memo_state key nq; nq + in + let trans = try Hashtbl.find auto.transitions q with Not_found -> [] in + let trans' = List.map (fun (lab, f) -> lab, flip b f) trans in + Hashtbl.replace auto.transitions q' trans'; + done; + cleanup_states auto + +(* [compute_dependencies auto] returns a hash table storing for each + states [q] a Move.table containing the set of states on which [q] + depends (loosely). [q] depends on [q'] if there is a transition + [q, {...} -> phi], where [q'] occurs in [phi]. +*) +let compute_dependencies auto = + let edges = Hashtbl.create 17 in + StateSet.iter + (fun q -> Hashtbl.add edges q (Move.create_table StateSet.empty)) + auto.starting_states; + Hashtbl.iter (fun q trans -> + let moves = try Hashtbl.find edges q with Not_found -> + let m = Move.create_table StateSet.empty in + Hashtbl.add edges q m; + m + in + List.iter (fun (_, phi) -> + let m_phi = Formula.get_states_by_move phi in + Move.iter (fun m set -> + Move.set moves m (StateSet.union set (Move.get moves m))) + m_phi) trans) auto.transitions; + + edges + + +let compute_rank auto = + let dependencies = compute_dependencies auto in + let upward = [ `Stay ; `Parent ; `Previous_sibling ] in + let downward = [ `Stay; `First_child; `Next_sibling ] in + let swap dir = if dir == upward then downward else upward in + let is_satisfied q t = + Move.for_all (fun _ set -> StateSet.(is_empty (remove q set))) t + in + let update_dependencies dir initacc = + let rec loop acc = + let new_acc = + Hashtbl.fold (fun q deps acc -> + let to_remove = StateSet.union acc initacc in + List.iter + (fun m -> + Move.set deps m (StateSet.diff (Move.get deps m) to_remove) + ) + dir; + if is_satisfied q deps then StateSet.add q acc else acc + ) dependencies acc + in + if acc == new_acc then new_acc else loop new_acc + in + let satisfied = loop StateSet.empty in + StateSet.iter (fun q -> + Hashtbl.remove dependencies q) satisfied; + satisfied + in + let current_states = ref StateSet.empty in + let rank_list = ref [] in + let rank = ref 0 in + let current_dir = ref upward in + let detect_cycle = ref 0 in + while Hashtbl.length dependencies != 0 do + let new_sat = update_dependencies !current_dir !current_states in + if StateSet.is_empty new_sat then incr detect_cycle; + if !detect_cycle > 2 then assert false; + rank_list := (!rank, new_sat) :: !rank_list; + rank := !rank + 1; + current_dir := swap !current_dir; + current_states := StateSet.union new_sat !current_states; + done; + let by_rank = Hashtbl.create 17 in + List.iter (fun (r,s) -> + let r = r/2 in + let set = try Hashtbl.find by_rank r with Not_found -> StateSet.empty in + Hashtbl.replace by_rank r (StateSet.union s set)) !rank_list; + auto.ranked_states <- + Array.init (Hashtbl.length by_rank) (fun i -> Hashtbl.find by_rank i) + + +module Builder = + struct + type auto = t + type t = auto + let next = Uid.make_maker () + + let make () = + let auto = + { + id = next (); + states = StateSet.empty; + starting_states = StateSet.empty; + selecting_states = StateSet.empty; + transitions = Hashtbl.create MED_H_SIZE; + ranked_states = [| |] + } + in + auto + + let add_state a ?(starting=false) ?(selecting=false) q = + a.states <- StateSet.add q a.states; + if starting then a.starting_states <- StateSet.add q a.starting_states; + if selecting then a.selecting_states <- StateSet.add q a.selecting_states + + let add_trans a q s f = + if not (StateSet.mem q a.states) then add_state a q; + let trs = try Hashtbl.find a.transitions q with Not_found -> [] in + let cup, ntrs = + List.fold_left (fun (acup, atrs) (labs, phi) -> + let lab1 = QNameSet.inter labs s in + let lab2 = QNameSet.diff labs s in + let tr1 = + if QNameSet.is_empty lab1 then [] + else [ (lab1, Formula.or_ phi f) ] + in + let tr2 = + if QNameSet.is_empty lab2 then [] + else [ (lab2, Formula.or_ phi f) ] + in + (QNameSet.union acup labs, tr1@ tr2 @ atrs) + ) (QNameSet.empty, []) trs + in + let rem = QNameSet.diff s cup in + let ntrs = if QNameSet.is_empty rem then ntrs + else (rem, f) :: ntrs + in + Hashtbl.replace a.transitions q ntrs + + let finalize a = + complete_transitions a; + normalize_negations a; + compute_rank a; + a + end + + +let map_set f s = + StateSet.fold (fun q a -> StateSet.add (f q) a) s StateSet.empty + +let map_hash fk fv h = + let h' = Hashtbl.create (Hashtbl.length h) in + let () = Hashtbl.iter (fun k v -> Hashtbl.add h' (fk k) (fv v)) h in + h' + +let rec map_form f phi = + match Formula.expr phi with + | Boolean.Or(phi1, phi2) -> Formula.or_ (map_form f phi1) (map_form f phi2) + | Boolean.And(phi1, phi2) -> Formula.and_ (map_form f phi1) (map_form f phi2) + | Boolean.Atom({ Atom.node = Move(m,q); _}, b) -> + let a = Formula.mk_atom (Move (m,f q)) in + if b then a else Formula.not_ a + | _ -> phi + +let rename_states mapper a = + let rename q = try Hashtbl.find mapper q with Not_found -> q in + { Builder.make () with + states = map_set rename a.states; + starting_states = map_set rename a.starting_states; + selecting_states = map_set rename a.selecting_states; + transitions = + map_hash + rename + (fun l -> + (List.map (fun (labels, form) -> (labels, map_form rename form)) l)) + a.transitions; + ranked_states = Array.map (map_set rename) a.ranked_states + } + +let copy a = + let mapper = Hashtbl.create MED_H_SIZE in + let () = + StateSet.iter (fun q -> Hashtbl.add mapper q (State.make())) a.states + in + rename_states mapper a + + +let concat a1 a2 = + let a1 = copy a1 in + let a2 = copy a2 in + let link_phi = + StateSet.fold + (fun q phi -> Formula.(or_ (stay q) phi)) + a1.selecting_states Formula.false_ + in + Hashtbl.iter (fun q trs -> Hashtbl.add a1.transitions q trs) + a2.transitions; + StateSet.iter + (fun q -> + Hashtbl.replace a1.transitions q [(QNameSet.any, link_phi)]) + a2.starting_states; + let a = { a1 with + states = StateSet.union a1.states a2.states; + selecting_states = a2.selecting_states; + transitions = a1.transitions; + } + in compute_rank a; a + +let merge a1 a2 = + let a1 = copy a1 in + let a2 = copy a2 in + let a = { a1 with + states = StateSet.union a1.states a2.states; + selecting_states = StateSet.union a1.selecting_states a2.selecting_states; + starting_states = StateSet.union a1.starting_states a2.starting_states; + transitions = + let () = + Hashtbl.iter (fun k v -> Hashtbl.add a1.transitions k v) a2.transitions + in + a1.transitions + } in + compute_rank a ; a + + +let link a1 a2 q link_phi = + let a = { a1 with + states = StateSet.union a1.states a2.states; + selecting_states = StateSet.singleton q; + starting_states = StateSet.union a1.starting_states a2.starting_states; + transitions = + let () = + Hashtbl.iter (fun k v -> Hashtbl.add a1.transitions k v) a2.transitions + in + Hashtbl.add a1.transitions q [(QNameSet.any, link_phi)]; + a1.transitions + } + in + compute_rank a; a + +let union a1 a2 = + let a1 = copy a1 in + let a2 = copy a2 in + let q = State.make () in + let link_phi = + StateSet.fold + (fun q phi -> Formula.(or_ (stay q) phi)) + (StateSet.union a1.selecting_states a2.selecting_states) + Formula.false_ + in + link a1 a2 q link_phi + +let inter a1 a2 = + let a1 = copy a1 in + let a2 = copy a2 in + let q = State.make () in + let link_phi = + StateSet.fold + (fun q phi -> Formula.(and_ (stay q) phi)) + (StateSet.union a1.selecting_states a2.selecting_states) + Formula.true_ + in + link a1 a2 q link_phi + +let neg a = + let a = copy a in + let q = State.make () in + let link_phi = + StateSet.fold + (fun q phi -> Formula.(and_ (not_(stay q)) phi)) + a.selecting_states + Formula.true_ + in + let () = Hashtbl.add a.transitions q [(QNameSet.any, link_phi)] in + let a = + { a with + selecting_states = StateSet.singleton q; + } + in + normalize_negations a; compute_rank a; a + +let diff a1 a2 = inter a1 (neg a2)