From: Kim Nguyễn Date: Sat, 9 Mar 2013 10:14:37 +0000 (+0100) Subject: Rework the formula predicates: X-Git-Tag: v0.1~157 X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=commitdiff_plain;h=35c32fbd2543a399cc6939f21317bebf37172646 Rework the formula predicates: - adds testing for first child, next sibling etc and their converse - make the predicate complete w.r.t negation - move the evaluation of formula to the Eval module (where the type of context is known) --- diff --git a/src/auto/ata.ml b/src/auto/ata.ml index 13c4854..90168b2 100644 --- a/src/auto/ata.ml +++ b/src/auto/ata.ml @@ -14,105 +14,111 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) INCLUDE "utils.ml" open Format open Utils -type move = [ `Left | `Right | `Up1 | `Up2 | `Epsilon |`Is1 |`Is2 ] -type state_ctx = { mutable left : StateSet.t; - mutable right : StateSet.t; - mutable up1 : StateSet.t; - mutable up2 : StateSet.t; - mutable epsilon : StateSet.t; - mutable is_left : bool; - mutable is_root : bool} - -type pred_ = move * bool * State.t -let make_ctx a b c d e f g = - { left = a; right = b; up1 = c; up2 = d; epsilon = e; is_left = f; is_root = g } - -let print_ctx fmt c = fprintf fmt "{ left : %a; right : %a; up1: %a ; up2 : %a; epsilon : %a ; is_left : %b; is_root : %b }" - StateSet.print c.left StateSet.print c.right StateSet.print c.up1 StateSet.print c.up2 - StateSet.print c.epsilon - c.is_left c.is_root - -module Move : (Formula.PREDICATE with type data = pred_ and type ctx = state_ctx ) = +type predicate = | First_child + | Next_sibling + | Parent + | Previous_sibling + | Stay + | Is_first_child + | Is_next_sibling + | Is_attribute + | Has_first_child + | Has_next_sibling + +let is_move p = match p with +| First_child | Next_sibling +| Parent | Previous_sibling | Stay -> true +| _ -> false + + +type atom = predicate * bool * State.t + +module Atom : (Formula.ATOM with type data = atom) = struct module Node = struct - type t = move * bool * State.t + type t = atom let equal n1 n2 = n1 = n2 let hash n = Hashtbl.hash n end - type ctx = state_ctx - - include Hcons.Make(Node) - let _pr_buff = Buffer.create 10 - let _str_fmt = formatter_of_buffer _pr_buff - let _flush_str_fmt () = pp_print_flush _str_fmt (); - let s = Buffer.contents _pr_buff in - Buffer.clear _pr_buff; s let print ppf a = - let _ = _flush_str_fmt () in - - let m, b, s = a.node in - let dir,num = - match m with - | `Left -> Pretty.down_arrow, Pretty.subscript 1 - | `Right -> Pretty.down_arrow, Pretty.subscript 2 - | `Epsilon -> Pretty.epsilon, "" - | `Up1 -> Pretty.up_arrow, Pretty.subscript 1 - | `Up2 -> Pretty.up_arrow, Pretty.subscript 2 - | `Is1 -> "?", Pretty.subscript 1 - | `Is2 -> "?", Pretty.subscript 2 - in - fprintf _str_fmt "%s%s" dir num; - if s != State.dummy then State.print _str_fmt s; - let str = _flush_str_fmt () in - fprintf ppf "%s%s" (if b then "" else Pretty.lnot) str - let neg p = - let l, b, s = p.node in - make (l, not b, s) - - exception NegativeAtom of (move*State.t) - - let eval ctx p = - let l, b, s = p.node in - if s == State.dummy then - let dir = - match l with - | `Is1 -> ctx.is_left - | _ -> not ctx.is_left - in - let res = dir && not ctx.is_root in - res && b || (not (b || res)) - else begin - if not b then raise (NegativeAtom(l,s)); - StateSet.mem s begin - match l with - `Left -> ctx.left - | `Right -> ctx.right - | `Up1 -> ctx.up1 - | `Up2 -> ctx.up2 - | `Epsilon -> ctx.epsilon - | _ -> StateSet.empty - end - end + let p, b, q = a.node in + if not b then fprintf ppf "%s" Pretty.lnot; + match p with + | First_child -> fprintf ppf "FC(%a)" State.print q + | Next_sibling -> fprintf ppf "NS(%a)" State.print q + | Parent -> fprintf ppf "FC%s(%a)" Pretty.inverse State.print q + | Previous_sibling -> fprintf ppf "NS%s(%a)" Pretty.inverse State.print q + | Stay -> fprintf ppf "%s(%a)" Pretty.epsilon State.print q + | Is_first_child -> fprintf ppf "FC%s?" Pretty.inverse + | Is_next_sibling -> fprintf ppf "NS%s?" Pretty.inverse + | Is_attribute -> fprintf ppf "@?" + | Has_first_child -> fprintf ppf "FC?" + | Has_next_sibling -> fprintf ppf "NS?" + + let neg a = + let p, b, q = a.node in + make (p, not b, q) + + +end + +module SFormula = +struct + include Formula.Make(Atom) + let mk_atom a b c = atom_ (Atom.make (a,b,c)) + let has_first_child = + (mk_atom Has_first_child true State.dummy) + + let has_next_sibling = + (mk_atom Has_next_sibling true State.dummy) + + let is_first_child = + (mk_atom Is_first_child true State.dummy) + + let is_next_sibling = + (mk_atom Is_next_sibling true State.dummy) + + let is_attribute = + (mk_atom Is_attribute true State.dummy) + + let first_child q = + and_ + (mk_atom First_child true q) + has_first_child + + let next_sibling q = + and_ + (mk_atom Next_sibling true q) + has_next_sibling + + let parent q = + and_ + (mk_atom Parent true q) + is_first_child + + let previous_sibling q = + and_ + (mk_atom Previous_sibling true q) + is_next_sibling + let stay q = + (mk_atom Stay true q) end -module SFormula = Formula.Make(Move) type t = { id : Uid.t; mutable states : StateSet.t; - mutable top_states : StateSet.t; - mutable bottom_states: StateSet.t; mutable selection_states: StateSet.t; transitions: (State.t, (QNameSet.t*SFormula.t) list) Hashtbl.t; } @@ -121,8 +127,6 @@ let next = Uid.make_maker () let create () = { id = next (); states = StateSet.empty; - top_states = StateSet.empty; - bottom_states = StateSet.empty; selection_states = StateSet.empty; transitions = Hashtbl.create 17; } @@ -175,14 +179,10 @@ let print fmt a = fprintf fmt "\nInternal UID: %i@\n\ States: %a@\n\ - Top states: %a@\n\ - Bottom states: %a@\n\ Selection states: %a@\n\ Alternating transitions:@\n" (a.id :> int) StateSet.print a.states - StateSet.print a.top_states - StateSet.print a.bottom_states StateSet.print a.selection_states; let trs = Hashtbl.fold @@ -236,11 +236,17 @@ let complete_transitions a = Hashtbl.replace a.transitions q nqtrans ) a.states + (* [normalize_negations a] removes negative atoms in the formula complementing the sub-automaton in the negative states. [TODO check the meaning of negative upward arrows] *) + let normalize_negations auto = + eprintf "Automaton before normalize_trans:\n"; + print err_formatter auto; + eprintf "--------------------\n%!"; + let memo_state = Hashtbl.create 17 in let todo = Queue.create () in let rec flip b f = @@ -249,13 +255,13 @@ let normalize_negations auto = | Formula.Or(f1, f2) -> (if b then SFormula.or_ else SFormula.and_)(flip b f1) (flip b f2) | Formula.And(f1, f2) -> (if b then SFormula.and_ else SFormula.or_)(flip b f1) (flip b f2) | Formula.Atom(a) -> begin - let l, b', q = Move.node a in + let l, b', q = Atom.node a in if q == State.dummy then if b then f else SFormula.not_ f else if b == b' then begin (* a appears positively, either no negation or double negation *) if not (Hashtbl.mem memo_state (q,b)) then Queue.add (q,true) todo; - SFormula.atom_ (Move.make (l, true, q)) + SFormula.atom_ (Atom.make (l, true, q)) end else begin (* need to reverse the atom either we have a positive state deep below a negation @@ -272,14 +278,10 @@ let normalize_negations auto = (* create a new state and add it to the todo queue *) let nq = State.make () in auto.states <- StateSet.add nq auto.states; - if not (StateSet.mem q auto.bottom_states) then - auto.bottom_states <- StateSet.add nq auto.bottom_states; - if not (StateSet.mem q auto.top_states) then - auto.top_states <- StateSet.add nq auto.top_states; Hashtbl.add memo_state (q, false) nq; Queue.add (q, false) todo; nq in - SFormula.atom_ (Move.make (l, true, not_q)) + SFormula.atom_ (Atom.make (l, true, not_q)) end end in @@ -296,15 +298,13 @@ let normalize_negations auto = let nq = if b then q else let nq = State.make () in auto.states <- StateSet.add nq auto.states; - if not (StateSet.mem q auto.bottom_states) then - auto.bottom_states <- StateSet.add nq auto.bottom_states; - if not (StateSet.mem q auto.top_states) then - auto.top_states <- StateSet.add nq auto.top_states; nq in Hashtbl.add memo_state key nq; nq in let trans = Hashtbl.find auto.transitions q in let trans' = List.map (fun (lab, f) -> lab, flip b f) trans in - Hashtbl.replace auto.transitions q' trans' + Hashtbl.replace auto.transitions q' trans'; done + + diff --git a/src/auto/eval.ml b/src/auto/eval.ml index 80df253..4921b7f 100644 --- a/src/auto/eval.ml +++ b/src/auto/eval.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) INCLUDE "utils.ml" @@ -30,11 +30,43 @@ module Make (T : Tree.Sig.S) = struct with Not_found -> StateSet.empty let set c t n v = Hashtbl.replace c (T.preorder t n) v + let eval_form phi tree node fcs nss ps ss = + let rec loop phi = + match Ata.SFormula.expr phi with + Formula.True -> true + | Formula.False -> false + | Formula.Atom a -> + let p, b, q = Ata.Atom.node a in + let pos = + let open Ata in + match p with + | First_child -> StateSet.mem q fcs + | Next_sibling -> StateSet.mem q nss + | Parent | Previous_sibling -> StateSet.mem q ps + | Stay -> StateSet.mem q ss + | Is_first_child -> + node == (T.first_child tree (T.parent tree node)) + | Is_next_sibling -> + node == (T.next_sibling tree (T.parent tree node)) + | Is_attribute -> + QName.has_attribute_prefix (T.tag tree node) + | Has_first_child -> + T.nil != T.first_child tree node + | Has_next_sibling -> + T.nil != T.next_sibling tree node + in + if Ata.is_move p && (not b) then + eprintf "Warning: Invalid negative atom %a" Ata.Atom.print a; + b == pos + | Formula.And(phi1, phi2) -> loop phi1 && loop phi2 + | Formula.Or (phi1, phi2) -> loop phi1 || loop phi2 + in + loop phi - let eval_trans l ctx acc = + let eval_trans l tree node fcs nss ps ss acc = List.fold_left (fun (acct, accs) ((q, phi) as trs) -> if StateSet.mem q accs then (acct, accs) else - if Ata.SFormula.eval ctx phi then + if eval_form phi tree node fcs nss ps ss then (acct, StateSet.add q accs) else (trs::acct, accs) @@ -42,7 +74,7 @@ module Make (T : Tree.Sig.S) = struct let top_down_run auto tree node cache i = let redo = ref false in - let rec loop node is_left = + let rec loop node = if node != T.nil then begin let parent = T.parent tree node in let fc = T.first_child tree node in @@ -50,34 +82,32 @@ module Make (T : Tree.Sig.S) = struct let states0 = get cache tree node in let tag = T.tag tree node in let trans0 = Ata.get_trans auto auto.Ata.states tag in - let parent_states = if parent == T.nil then auto.Ata.top_states else get cache tree parent in - let fc_states = if fc == T.nil then auto.Ata.bottom_states else get cache tree fc in - let ns_states = if ns == T.nil then auto.Ata.bottom_states else get cache tree ns in - let is_root = parent == T.nil in - let ctx0 = - if is_left then - Ata.make_ctx fc_states ns_states parent_states StateSet.empty states0 is_left is_root - else - Ata.make_ctx fc_states ns_states StateSet.empty parent_states states0 is_left is_root - in - eprintf "-- [Iteration % 4d] --\n node: %a\n context: %a\n%!" - i T.print_node node Ata.print_ctx ctx0; - List.iter (fun (q, phi) -> eprintf " %a -> %a\n" State.print q Ata.SFormula.print phi) trans0; + let ps = get cache tree parent in + let fcs = get cache tree fc in + let nss = get cache tree ns in + eprintf "-- [Iteration % 4d] --\n node: %a\n%!" i T.print_node node; + List.iter (fun (q, phi) -> eprintf " %a -> %a\n" + State.print q Ata.SFormula.print phi) trans0; eprintf "----------------------\n%!"; - let trans1, states1 = eval_trans trans0 ctx0 StateSet.empty in + let trans1, states1 = + eval_trans trans0 tree node fcs nss ps states0 states0 + in if states1 != states0 then set cache tree node states1; - let () = loop fc true in - let ctx1 = {ctx0 with Ata.left = (get cache tree fc) ; Ata.epsilon = states1 } in - let trans2, states2 = eval_trans trans1 ctx1 states1 in + let () = loop fc in + let fcs1 = get cache tree fc in + let trans2, states2 = + eval_trans trans1 tree node fcs1 nss ps states1 states1 + in if states2 != states1 then set cache tree node states2; - let () = loop ns false in - let ctx2 = { ctx1 with Ata.right = (get cache tree ns); Ata.epsilon = states2 } in - let _, states3 = eval_trans trans2 ctx2 states2 in + let () = loop ns in + let _, states3 = + eval_trans trans2 tree node fcs1 (get cache tree ns) ps states2 states2 + in if states3 != states2 then set cache tree node states3; if states0 != states3 && (not !redo) then redo := true end in - loop node true; + loop node; !redo let get_results auto tree node cache = diff --git a/src/auto/formula.ml b/src/auto/formula.ml index a27ffc6..7128ba4 100644 --- a/src/auto/formula.ml +++ b/src/auto/formula.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) INCLUDE "utils.ml" @@ -27,24 +27,22 @@ open Utils (** Implementation of hashconsed Boolean formulae *) *) -module type PREDICATE = +module type ATOM = sig type t - type ctx - val eval : ctx -> t -> bool val neg : t -> t include Hcons.Abstract with type t := t include Common_sig.Printable with type t := t end -type ('formula,'pred) expr = +type ('formula,'atom) expr = | False | True | Or of 'formula * 'formula | And of 'formula * 'formula - | Atom of 'pred + | Atom of 'atom -module Make (P: PREDICATE) = +module Make (P: ATOM) = struct @@ -172,12 +170,4 @@ let and_ f1 f2 = let of_bool = function true -> true_ | false -> false_ -let rec eval ctx f = - match f.Node.node.pos with - True -> true - | False -> false - | Atom p -> P.eval ctx p - | And(f1, f2) -> eval ctx f1 && eval ctx f2 - | Or(f1, f2) -> eval ctx f1 || eval ctx f2 - end diff --git a/src/auto/formula.mli b/src/auto/formula.mli index 78df01c..2421d51 100644 --- a/src/auto/formula.mli +++ b/src/auto/formula.mli @@ -14,30 +14,28 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) -module type PREDICATE = +module type ATOM = sig type t - type ctx - val eval : ctx -> t -> bool val neg : t -> t include Utils.Hcons.Abstract with type t := t include Utils.Common_sig.Printable with type t := t end -type ('formula,'pred) expr = +type ('formula,'atom) expr = | False | True | Or of 'formula * 'formula | And of 'formula * 'formula - | Atom of 'pred + | Atom of 'atom (** View of the internal representation of a formula, used for pattern matching *) -module Make(P : PREDICATE) : +module Make(P : ATOM) : sig type t @@ -86,6 +84,4 @@ sig val of_bool : bool -> t (** Convert an ocaml Boolean value to a formula *) - val eval : P.ctx -> t -> bool - (** Evaluate a formula given a context for atomic predicates *) end diff --git a/src/xpath/compile.ml b/src/xpath/compile.ml index 15a5c60..123583f 100644 --- a/src/xpath/compile.ml +++ b/src/xpath/compile.ml @@ -14,24 +14,22 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) open Ast open Auto open Utils -let mk_atom l b q = - Ata.SFormula.atom_ (Ata.Move.make (l,b,q)) let ( => ) a b = (a, b) -let ( ** ) l q = mk_atom l true q -let is_left = mk_atom `Is1 true State.dummy -let is_right = mk_atom `Is2 true State.dummy let ( ++ ) a b = Ata.SFormula.or_ a b let ( %% ) a b = Ata.SFormula.and_ a b let ( @: ) a b = StateSet.add a b +module F = Ata.SFormula + + let node_set = QNameSet.remove QName.document QNameSet.any let star_set = QNameSet.diff QNameSet.any ( List.fold_right (QNameSet.add) @@ -53,64 +51,62 @@ let compile_axis_test axis test phi trans states = let phi', trans', states' = match axis with | Self -> - (`Epsilon ** q), - (q, [ test => phi ]) :: trans, - states + (F.stay q, + (q, [ test => phi ]) :: trans, + states) | Child -> - (`Left ** q), - (q, [ test => phi; - QNameSet.any => (`Right ** q) ]) :: trans, - states + (F.first_child q, + (q, [ test => phi; + QNameSet.any => F.next_sibling q ]) :: trans, + states) | Descendant self -> - (if self then (`Epsilon ** q) else (`Left ** q)), - (q, [ test => phi; - QNameSet.any => (`Left ** q); - QNameSet.any => (`Right ** q) ]) :: trans, - states + ((if self then F.stay q else F.first_child q), + (q, [ test => phi; + QNameSet.any => F.first_child q ++ F.next_sibling q; + ]) :: trans, + states) | Parent -> let q' = State.make () in - let move = (`Up1 ** q %% is_left) ++ (`Up2 ** q' %% is_right) in - move, - (q, [ test => phi ]) - :: (q', [ QNameSet.any => move ]) :: trans, - (q' @: states) + let move = F.parent q ++ F.previous_sibling q' in + (move, + (q, [ test => phi ]) + :: (q', [ QNameSet.any => move ]) :: trans, + (q' @: states)) | Ancestor self -> let q' = State.make () in - let move = (`Up1 ** q %% is_left) ++ (`Up2 ** q' %% is_right) in - (if self then (`Epsilon ** q) else move), + let move = F.parent q ++ F.previous_sibling q' in + (if self then F.stay q else move), (q, [ test => phi; - star_set => move ]) + QNameSet.any => move ]) :: (q', [ QNameSet.any => move ]) :: trans, (q' @: states) | FollowingSibling | PrecedingSibling -> let move = if axis = PrecedingSibling then - (`Up2 ** q) - else (`Right ** q %% is_right) + F.previous_sibling q + else F.next_sibling q in move, (q, [ test => phi; - star_set => move ]) :: trans, + QNameSet.any => move ]) :: trans, states | Attribute -> - let q' = State.make () in let test = if QNameSet.is_finite test then QNameSet.fold (fun tag acc -> QNameSet.add (QName.add_attribute_prefix tag) acc) test QNameSet.empty else test in - (`Left ** q), - (q, [ QNameSet.singleton QName.attribute_map => (`Left ** q') ]) - :: (q', [ test => phi; - QNameSet.any => (`Right ** q') ]) :: trans, - (q' @:states) + (F.first_child q, + (q, [ test => phi %% F.is_attribute; + QNameSet.any => F.next_sibling q]) :: trans, + states) | _ -> assert false in @@ -121,9 +117,6 @@ let compile_rev_axis_test axis test phi trans states = match axis with | Attribute -> assert false | _ -> compile_axis_test (invert_axis axis) test phi trans states -;; - - let rec compile_expr e trans states = match e with