From af9d790ca62e678e8e70ab8d8fa7f804985a75e0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Fri, 19 Jul 2013 15:33:37 +0200 Subject: [PATCH] Refactor the Ata module: - add a builder class to ensure we only manipulate well-formed automata - move all the caching infrastructure to the computation of the run - rename the Eval module to Run --- src/ata.ml | 325 ++++++++---------------------- src/ata.mli | 128 +++++++----- src/boolean.mli | 11 +- src/eval.ml | 183 ----------------- src/hcons.mli | 1 + src/run.ml | 413 ++++++++++++++++++++++++++++++++++++++ src/{eval.mli => run.mli} | 0 src/tatoo.ml | 2 +- src/tree.ml | 5 +- src/xpath/compile.ml | 14 +- 10 files changed, 590 insertions(+), 492 deletions(-) delete mode 100644 src/eval.ml create mode 100644 src/run.ml rename src/{eval.mli => run.mli} (100%) diff --git a/src/ata.ml b/src/ata.ml index 3fa2698..7310839 100644 --- a/src/ata.ml +++ b/src/ata.ml @@ -15,6 +15,7 @@ INCLUDE "utils.ml" open Format +open Misc type move = [ `First_child | `Next_sibling | `Parent @@ -28,9 +29,7 @@ type predicate = Move of move * State.t | Has_first_child | Has_next_sibling -let is_move = function Move _ -> true | _ -> false - -module Atom : (Boolean.ATOM with type data = predicate) = +module Atom = struct module Node = @@ -61,12 +60,13 @@ struct end + module Formula = struct include Boolean.Make(Atom) open Tree.NodeKind let mk_atom a = atom_ (Atom.make a) - let mk_kind k = mk_atom (Is k) + let is k = mk_atom (Is k) let has_first_child = mk_atom Has_first_child @@ -116,7 +116,6 @@ struct end - module Transition = Hcons.Make (struct type t = State.t * QNameSet.t * Formula.t let equal (a, b, c) (d, e, f) = @@ -140,256 +139,30 @@ end = -type node_summary = int -let dummy_summary = -1 -(* -4444444444443210 -4 -> kind -3 -> is_left -2 -> is_right -1 -> has_left -0 -> has_right -*) - -let has_right (s : node_summary) : bool = - Obj.magic (s land 1) -let has_left (s : node_summary) : bool = - Obj.magic ((s lsr 1) land 1) - -let is_right (s : node_summary) : bool = - Obj.magic ((s lsr 2) land 1) - -let is_left (s : node_summary) : bool = - Obj.magic ((s lsr 3) land 1) - -let kind (s : node_summary ) : Tree.NodeKind.t = - Obj.magic (s lsr 4) - -let node_summary is_left is_right has_left has_right kind = - ((Obj.magic kind) lsl 4) lor - ((Obj.magic is_left) lsl 3) lor - ((Obj.magic is_right) lsl 2) lor - ((Obj.magic has_left) lsl 1) lor - (Obj.magic has_right) - - - -type config = { - sat : StateSet.t; - unsat : StateSet.t; - todo : TransList.t; - summary : node_summary; -} - -module Config = Hcons.Make(struct - type t = config - let equal c d = - c == d || - c.sat == d.sat && - c.unsat == d.unsat && - c.todo == d.todo && - c.summary == d.summary - - let hash c = - HASHINT4((c.sat.StateSet.id :> int), - (c.unsat.StateSet.id :> int), - (c.todo.TransList.id :> int), - c.summary) -end -) - type t = { id : Uid.t; mutable states : StateSet.t; - mutable selection_states: StateSet.t; + mutable selecting_states: StateSet.t; transitions: (State.t, (QNameSet.t*Formula.t) list) Hashtbl.t; - mutable cache2 : TransList.t Cache.N2.t; - mutable cache4 : Config.t Cache.N4.t; } -let next = Uid.make_maker () - -let dummy2 = TransList.cons - (Transition.make (State.dummy,QNameSet.empty, Formula.false_)) - TransList.nil - - -let dummy_config = - Config.make { sat = StateSet.empty; - unsat = StateSet.empty; - todo = TransList.nil; - summary = dummy_summary - } +let get_states a = a.states +let get_selecting_states a = a.selecting_states -let create s ss = - let auto = { id = next (); - states = s; - selection_states = ss; - transitions = Hashtbl.create 17; - cache2 = Cache.N2.create dummy2; - cache4 = Cache.N4.create dummy_config; - } - in - at_exit (fun () -> - let n4 = ref 0 in - let n2 = ref 0 in - Cache.N2.iteri (fun _ _ _ b -> if b then incr n2) auto.cache2; - Cache.N4.iteri (fun _ _ _ _ _ b -> if b then incr n4) auto.cache4; - Logger.msg `STATS "automaton %i, cache2: %i entries, cache6: %i entries" - (auto.id :> int) !n2 !n4; - let c2l, c2u = Cache.N2.stats auto.cache2 in - let c4l, c4u = Cache.N4.stats auto.cache4 in - Logger.msg `STATS - "cache2: length: %i, used: %i, occupation: %f" - c2l c2u (float c2u /. float c2l); - Logger.msg `STATS - "cache4: length: %i, used: %i, occupation: %f" - c4l c4u (float c4u /. float c4l) - - ); - auto - -let reset a = - a.cache4 <- Cache.N4.create (Cache.N4.dummy a.cache4) - -let full_reset a = - reset a; - a.cache2 <- Cache.N2.create (Cache.N2.dummy a.cache2) - - -let get_trans_aux a tag states = +let get_trans a tag states = StateSet.fold (fun q acc0 -> try let trs = Hashtbl.find a.transitions q in List.fold_left (fun acc1 (labs, phi) -> - if QNameSet.mem tag labs then TransList.cons (Transition.make (q, labs, phi)) acc1 else acc1) acc0 trs + if QNameSet.mem tag labs then + TransList.cons (Transition.make (q, labs, phi)) acc1 + else acc1) acc0 trs with Not_found -> acc0 ) states TransList.nil -let get_trans a tag states = - let trs = - Cache.N2.find a.cache2 - (tag.QName.id :> int) (states.StateSet.id :> int) - in - if trs == dummy2 then - let trs = get_trans_aux a tag states in - (Cache.N2.add - a.cache2 - (tag.QName.id :> int) - (states.StateSet.id :> int) trs; trs) - else trs - -let simplify_atom atom pos q { Config.node=config; _ } = - if (pos && StateSet.mem q config.sat) - || ((not pos) && StateSet.mem q config.unsat) then Formula.true_ - else if (pos && StateSet.mem q config.unsat) - || ((not pos) && StateSet.mem q config.sat) then Formula.false_ - else atom - -let eval_form phi fcs nss ps ss summary = - let rec loop phi = - begin match Formula.expr phi with - Boolean.True | Boolean.False -> phi - | Boolean.Atom (a, b) -> - begin - match a.Atom.node with - | Move (m, q) -> - let states = match m with - `First_child -> fcs - | `Next_sibling -> nss - | `Parent | `Previous_sibling -> ps - | `Stay -> ss - in simplify_atom phi b q states - | Is_first_child -> Formula.of_bool (b == (is_left summary)) - | Is_next_sibling -> Formula.of_bool (b == (is_right summary)) - | Is k -> Formula.of_bool (b == (k == (kind summary))) - | Has_first_child -> Formula.of_bool (b == (has_left summary)) - | Has_next_sibling -> Formula.of_bool (b == (has_right summary)) - end - | Boolean.And(phi1, phi2) -> Formula.and_ (loop phi1) (loop phi2) - | Boolean.Or (phi1, phi2) -> Formula.or_ (loop phi1) (loop phi2) - end - in - loop phi - - - -let eval_trans auto fcs nss ps ss = - let fcsid = (fcs.Config.id :> int) in - let nssid = (nss.Config.id :> int) in - let psid = (ps.Config.id :> int) in - let rec loop old_config = - let oid = (old_config.Config.id :> int) in - let res = - let res = Cache.N4.find auto.cache4 oid fcsid nssid psid in - if res != dummy_config then res - else - let { sat = old_sat; - unsat = old_unsat; - todo = old_todo; - summary = old_summary } = old_config.Config.node - in - let sat, unsat, removed, kept, todo = - TransList.fold - (fun trs acc -> - let q, lab, phi = Transition.node trs in - let a_sat, a_unsat, a_rem, a_kept, a_todo = acc in - if StateSet.mem q a_sat || StateSet.mem q a_unsat then acc else - let new_phi = - eval_form phi fcs nss ps old_config old_summary - in - if Formula.is_true new_phi then - StateSet.add q a_sat, a_unsat, StateSet.add q a_rem, a_kept, a_todo - else if Formula.is_false new_phi then - a_sat, StateSet.add q a_unsat, StateSet.add q a_rem, a_kept, a_todo - else - let new_tr = Transition.make (q, lab, new_phi) in - (a_sat, a_unsat, a_rem, StateSet.add q a_kept, (TransList.cons new_tr a_todo)) - ) old_todo (old_sat, old_unsat, StateSet.empty, StateSet.empty, TransList.nil) - in - (* States that have been removed from the todo list and not kept are now - unsatisfiable *) - let unsat = StateSet.union unsat (StateSet.diff removed kept) in - (* States that were found once to be satisfiable remain so *) - let unsat = StateSet.diff unsat sat in - let new_config = Config.make { old_config.Config.node with sat; unsat; todo; } in - Cache.N4.add auto.cache4 oid fcsid nssid psid new_config; - new_config - in - if res == old_config then res else loop res - in - loop ss - -(* - [add_trans a q labels f] adds a transition [(q,labels) -> f] to the - automaton [a] but ensures that transitions remains pairwise disjoint -*) - -let add_trans a q s f = - let trs = try Hashtbl.find a.transitions q with Not_found -> [] in - let cup, ntrs = - List.fold_left (fun (acup, atrs) (labs, phi) -> - let lab1 = QNameSet.inter labs s in - let lab2 = QNameSet.diff labs s in - let tr1 = - if QNameSet.is_empty lab1 then [] - else [ (lab1, Formula.or_ phi f) ] - in - let tr2 = - if QNameSet.is_empty lab2 then [] - else [ (lab2, Formula.or_ phi f) ] - in - (QNameSet.union acup labs, tr1@ tr2 @ atrs) - ) (QNameSet.empty, []) trs - in - let rem = QNameSet.diff s cup in - let ntrs = if QNameSet.is_empty rem then ntrs - else (rem, f) :: ntrs - in - Hashtbl.replace a.transitions q ntrs let _pr_buff = Buffer.create 50 let _str_fmt = formatter_of_buffer _pr_buff @@ -405,7 +178,7 @@ let print fmt a = Alternating transitions:@\n" (a.id :> int) StateSet.print a.states - StateSet.print a.selection_states; + StateSet.print a.selecting_states; let trs = Hashtbl.fold (fun q t acc -> List.fold_left (fun acc (s , f) -> (q,s,f)::acc) acc t) @@ -469,7 +242,7 @@ let cleanup_states a = StateSet.iter loop (Formula.get_states phi)) trs end in - StateSet.iter loop a.selection_states; + StateSet.iter loop a.selecting_states; let unused = StateSet.diff a.states !memo in StateSet.iter (fun q -> Hashtbl.remove a.transitions q) unused; a.states <- !memo @@ -519,7 +292,7 @@ let normalize_negations auto = end in (* states that are not reachable from a selection stat are not interesting *) - StateSet.iter (fun q -> Queue.add (q, true) todo) auto.selection_states; + StateSet.iter (fun q -> Queue.add (q, true) todo) auto.selecting_states; while not (Queue.is_empty todo) do let (q, b) as key = Queue.pop todo in @@ -540,3 +313,73 @@ let normalize_negations auto = Hashtbl.replace auto.transitions q' trans'; done; cleanup_states auto + + +module Builder = + struct + type auto = t + type t = auto + let next = Uid.make_maker () + + let make () = + let auto = + { + id = next (); + states = StateSet.empty; + selecting_states = StateSet.empty; + transitions = Hashtbl.create MED_H_SIZE; + } + in + (* + at_exit (fun () -> + let n4 = ref 0 in + let n2 = ref 0 in + Cache.N2.iteri (fun _ _ _ b -> if b then incr n2) auto.cache2; + Cache.N4.iteri (fun _ _ _ _ _ b -> if b then incr n4) auto.cache4; + Logger.msg `STATS "automaton %i, cache2: %i entries, cache6: %i entries" + (auto.id :> int) !n2 !n4; + let c2l, c2u = Cache.N2.stats auto.cache2 in + let c4l, c4u = Cache.N4.stats auto.cache4 in + Logger.msg `STATS + "cache2: length: %i, used: %i, occupation: %f" + c2l c2u (float c2u /. float c2l); + Logger.msg `STATS + "cache4: length: %i, used: %i, occupation: %f" + c4l c4u (float c4u /. float c4l) + + ); *) + auto + + let add_state a ?(selecting=false) q = + a.states <- StateSet.add q a.states; + if selecting then a.selecting_states <- StateSet.add q a.selecting_states + + let add_trans a q s f = + if not (StateSet.mem q a.states) then add_state a q; + let trs = try Hashtbl.find a.transitions q with Not_found -> [] in + let cup, ntrs = + List.fold_left (fun (acup, atrs) (labs, phi) -> + let lab1 = QNameSet.inter labs s in + let lab2 = QNameSet.diff labs s in + let tr1 = + if QNameSet.is_empty lab1 then [] + else [ (lab1, Formula.or_ phi f) ] + in + let tr2 = + if QNameSet.is_empty lab2 then [] + else [ (lab2, Formula.or_ phi f) ] + in + (QNameSet.union acup labs, tr1@ tr2 @ atrs) + ) (QNameSet.empty, []) trs + in + let rem = QNameSet.diff s cup in + let ntrs = if QNameSet.is_empty rem then ntrs + else (rem, f) :: ntrs + in + Hashtbl.replace a.transitions q ntrs + + let finalize a = + complete_transitions a; + normalize_negations a; + a + end diff --git a/src/ata.mli b/src/ata.mli index e4f0ffd..8b7851b 100644 --- a/src/ata.mli +++ b/src/ata.mli @@ -13,88 +13,114 @@ (* *) (***********************************************************************) +(** Implementation of 2-way Selecting Alternating Tree Automata *) + + type move = [ `First_child | `Next_sibling | `Parent | `Previous_sibling | `Stay ] - -type predicate = Move of move * State.t - | Is_first_child - | Is_next_sibling - | Is of Tree.NodeKind.t - | Has_first_child - | Has_next_sibling - -val is_move : predicate -> bool - -module Atom : Boolean.ATOM with type data = predicate +(** Type of moves an automaton can perform *) + +type predicate = + Move of move * State.t (** In the [move] direction, the automaton must be in the given state *) + | Is_first_child (** True iff the node is the first child of its parent *) + | Is_next_sibling (** True iff the node is the next sibling of its parent *) + | Is of Tree.NodeKind.t (** True iff the node is of the given kind *) + | Has_first_child (** True iff the node has a first child *) + | Has_next_sibling (** True iff the node has a next sibling *) +(** Type of the predicates that can occur in the Boolean formulae that are in the transitions of the automaton *) + +module Atom : sig + include Hcons.S with type data = predicate + include Common_sig.Printable with type t:= t +end +(** Module representing atoms of Boolean formulae, which are simply hashconsed [predicate]s *) module Formula : sig include module type of Boolean.Make(Atom) - val mk_atom : predicate -> t - val mk_kind : Tree.NodeKind.t -> t - val has_first_child : t - val has_next_sibling : t + val first_child : State.t -> t + val next_sibling : State.t -> t + val parent : State.t -> t + val previous_sibling : State.t -> t + val stay : State.t -> t + (** [first_child], [next_sibling], [parent], [previous_sibling], [stay] create a formula which consists only + of the corresponding [move] atom. *) val is_first_child : t val is_next_sibling : t + val has_first_child : t + val has_next_sibling : t + (** [is_first_child], [is_next_sibling], [has_first_child], [has_next_sibling] are constant formulae which consist + only of the corresponding atom + *) + val is : Tree.NodeKind.t -> t + (** [is k] creates a formula that tests the kind of the current node *) val is_attribute : t val is_element : t val is_processing_instruction : t val is_comment : t - val first_child : State.t -> t - val next_sibling : State.t -> t - val parent : State.t -> t - val previous_sibling : State.t -> t - val stay : State.t -> t + (** [is_attribute], [is_element], [is_processing_instruction], [is_comment] are constant formulae that tests a + particular kind *) val get_states : t -> StateSet.t + (** [get_state f] retrieves all the states occuring in [move] predicates in [f] *) end +(** Modules representing the Boolean formulae used in transitions *) +module Transition : Hcons.S with type data = State.t * QNameSet.t * Formula.t +(** A [Transition.t] is a hashconsed triple of the state, the set of labels and the formula *) -module Transition : Hcons.S with - type data = State.t * QNameSet.t * Formula.t module TransList : sig include Hlist.S with type elt = Transition.t val print : Format.formatter -> ?sep:string -> t -> unit end +(** Hashconsed lists of transitions, with a printing facility *) + +type t +(** 2-way Selecting Alternating Tree Automata *) -type node_summary = private int -val node_summary : bool -> bool -> bool -> bool -> Tree.NodeKind.t -> node_summary -val dummy_summary : node_summary -type config = { - sat : StateSet.t; - unsat : StateSet.t; - todo : TransList.t; - summary : node_summary; -} +val get_states : t -> StateSet.t +(** return the set of states of the automaton *) -module Config : Hcons.S with type data = config -val dummy_config : Config.t +val get_selecting_states : t -> StateSet.t +(** return the set of selecting states of the automaton *) -type t = private { - id : Uid.t; - mutable states : StateSet.t; - mutable selection_states: StateSet.t; - transitions: (State.t, (QNameSet.t*Formula.t) list) Hashtbl.t; - mutable cache2 : TransList.t Cache.N2.t; - mutable cache4 : Config.t Cache.N4.t; -} +val get_trans : t -> QNameSet.elt -> StateSet.t -> TransList.t +(** [get_trans auto l q] return the list of transitions taken by [auto] + for label [l] in state [q]. Takes time proportional to the number of + transitions in the automaton. + *) +val print : Format.formatter -> t -> unit +(** Pretty printing of the automaton *) -val create : StateSet.t -> StateSet.t -> t -val reset : t -> unit -val full_reset : t -> unit -val get_trans : t -> QNameSet.elt -> StateSet.t -> TransList.t +module Builder : +sig + type auto = t + (** Alias type for the automata type *) + type t + (** Abstract type for a builder *) -val eval_trans : t -> Config.t -> Config.t -> Config.t -> Config.t -> Config.t + val make : unit -> t + (** Create a fresh builder *) -val add_trans : t -> State.t -> QNameSet.t -> Formula.t -> unit -val print : Format.formatter -> t -> unit -val complete_transitions : t -> unit -val cleanup_states : t -> unit -val normalize_negations : t -> unit + val add_state : t -> ?selecting:bool -> State.t -> unit + (** Add a state to the set of states of the automaton. The optional argument + [?selecting] (defaulting to [false]) allows to specify whether the state is + selecting. *) + + val add_trans : t -> State.t -> QNameSet.t -> Formula.t -> unit + (** Add a transition to the automaton *) + + val finalize : t -> auto + (** Finalize the automaton and return it. Clean-up unused states (states that + do not occur in any transitions and remove instantes of negative [move] atoms + by creating fresh states that accept the complement of the negated state. + *) +end + (** Builder facility for the automaton *) diff --git a/src/boolean.mli b/src/boolean.mli index 73af3c1..2c1607f 100644 --- a/src/boolean.mli +++ b/src/boolean.mli @@ -13,12 +13,6 @@ (* *) (***********************************************************************) -module type ATOM = -sig - include Hcons.S - include Common_sig.Printable with type t := t -end - type ('formula,'atom) expr = | False | True @@ -29,7 +23,10 @@ type ('formula,'atom) expr = (** View of the internal representation of a formula, used for pattern matching *) -module Make(A : ATOM) : +module Make(A : sig + include Hcons.S + include Common_sig.Printable with type t := t +end) : sig type t diff --git a/src/eval.ml b/src/eval.ml deleted file mode 100644 index 0ef0d5d..0000000 --- a/src/eval.ml +++ /dev/null @@ -1,183 +0,0 @@ -(***********************************************************************) -(* *) -(* TAToo *) -(* *) -(* Kim Nguyen, LRI UMR8623 *) -(* Université Paris-Sud & CNRS *) -(* *) -(* Copyright 2010-2013 Université Paris-Sud and Centre National de la *) -(* Recherche Scientifique. All rights reserved. This file is *) -(* distributed under the terms of the GNU Lesser General Public *) -(* License, with the special exception on linking described in file *) -(* ../LICENSE. *) -(* *) -(***********************************************************************) - -INCLUDE "utils.ml" -open Format - -module Make (T : Tree.S) : - sig - val eval : Ata.t -> T.t -> T.node -> T.node list - end - = struct - - -IFDEF HTMLTRACE - THEN -DEFINE TRACE(e) = (e) - ELSE -DEFINE TRACE(e) = () -END - - let html tree node i config msg = - let config = config.Ata.Config.node in - Html.trace (T.preorder tree node) i - "node: %i
%s
sat: %a
unsat: %a
todo: %around: %i
" - (T.preorder tree node) - msg - StateSet.print config.Ata.sat - StateSet.print config.Ata.unsat - (Ata.TransList.print ~sep:"
") config.Ata.todo i - - - - type run = { config : Ata.Config.t array; - unstable : Bitvector.t; - mutable redo : bool; - mutable pass : int; - } - - - - let top_down_run auto tree node run = - let module Array = - struct - include Array - let get a i = - if i < 0 then Ata.dummy_config else get a i - let unsafe_get a i = - if i < 0 then Ata.dummy_config else unsafe_get a i - end - in - let cache = run.config in - let unstable = run.unstable in - let _i = run.pass in - let rec loop node = - let node_id = T.preorder tree node in - if node == T.nil || not (Bitvector.get unstable node_id) then false else begin - let parent = T.parent tree node in - let fc = T.first_child tree node in - let fc_id = T.preorder tree fc in - let ns = T.next_sibling tree node in - let ns_id = T.preorder tree ns in - let tag = T.tag tree node in - let config0 = - let c = cache.(node_id) in - if c == Ata.dummy_config then - Ata.Config.make - { c.Ata.Config.node with - Ata.todo = Ata.get_trans auto tag auto.Ata.states; - summary = Ata.node_summary - (node == T.first_child tree parent) (* is_left *) - (node == T.next_sibling tree parent) (* is_right *) - (fc != T.nil) (* has_left *) - (ns != T.nil) (* has_right *) - (T.kind tree node) (* kind *) - } - else c - in - - TRACE(html tree node _i config0 "Entering node"); - - let ps = cache.(T.preorder tree parent) in - let fcs = cache.(fc_id) in - let nss = cache.(ns_id) in - let config1 = Ata.eval_trans auto fcs nss ps config0 in - - TRACE(html tree node _i config1 "Updating transitions"); - - if config0 != config1 then cache.(node_id) <- config1; - let unstable_left = loop fc in - let fcs1 = cache.(fc_id) in - let config2 = Ata.eval_trans auto fcs1 nss ps config1 in - - TRACE(html tree node _i config2 "Updating transitions (after first-child)"); - - if config1 != config2 then cache.(node_id) <- config2; - let unstable_right = loop ns in - let nss1 = cache.(ns_id) in - let config3 = Ata.eval_trans auto fcs1 nss1 ps config2 in - - TRACE(html tree node _i config3 "Updating transitions (after next-sibling)"); - - if config2 != config3 then cache.(node_id) <- config3; - let unstable_self = - unstable_left - || unstable_right - || Ata.(TransList.nil != config3.Config.node.todo) - in - Bitvector.unsafe_set unstable node_id unstable_self; - TRACE((if not unstable_self then - Html.finalize_node - node_id - _i - Ata.(StateSet.intersect config3.Config.node.sat auto.selection_states))); - unstable_self - end - in - loop node - - let get_results auto tree node cache = - let rec loop node acc = - if node == T.nil then acc - else - let acc0 = loop (T.next_sibling tree node) acc in - let acc1 = loop (T.first_child tree node) acc0 in - - if Ata.( - StateSet.intersect - cache.(T.preorder tree node).Config.node.sat - auto.selection_states) then node::acc1 - else acc1 - in - loop node [] - - - let stats run = - let count = ref 0 in - let len = Bitvector.length run.unstable in - for i = 0 to len - 1 do - if not (Bitvector.unsafe_get run.unstable i) then - incr count - done; - Logger.msg `STATS - "%i nodes over %i were skipped in iteration %i (%.2f %%), redo is: %b" - !count len run.pass (100. *. (float !count /. float len)) - run.redo - - - let eval auto tree node = - let len = T.size tree in - let run = { config = Array.create len Ata.dummy_config; - unstable = Bitvector.create ~init:true len; - redo = true; - pass = 0 - } - in - while run.redo do - run.redo <- false; - Ata.reset auto; (* prevents the .cache2 and .cache4 memoization tables from growing too much *) - run.redo <- top_down_run auto tree node run; - stats run; - run.pass <- run.pass + 1; - done; - at_exit (fun () -> Logger.msg `STATS "%i iterations" run.pass); - at_exit (fun () -> stats run); - let r = get_results auto tree node run.config in - - TRACE(Html.gen_trace (module T : Tree.S with type t = T.t) (tree)); - - r - -end diff --git a/src/hcons.mli b/src/hcons.mli index 3e80a03..05def66 100644 --- a/src/hcons.mli +++ b/src/hcons.mli @@ -14,6 +14,7 @@ (***********************************************************************) (** Implementation of generic hashconsing. *) +(** {directinclude true} *) include module type of Hcons_sig diff --git a/src/run.ml b/src/run.ml new file mode 100644 index 0000000..38b7e45 --- /dev/null +++ b/src/run.ml @@ -0,0 +1,413 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2013 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +INCLUDE "utils.ml" +open Format +open Misc + +module Make (T : Tree.S) = + struct + + module NodeSummary = + struct + (* Pack into an integer the result of the is_* and has_ predicates + for a given node *) + type t = int + let dummy = -1 + (* + 4444444444443210 + 4 -> kind + 3 -> is_left + 2 -> is_right + 1 -> has_left + 0 -> has_right + *) + + let has_right (s : t) : bool = + Obj.magic (s land 1) + + let has_left (s : t) : bool = + Obj.magic ((s lsr 1) land 1) + + let is_right (s : t) : bool = + Obj.magic ((s lsr 2) land 1) + + let is_left (s : t) : bool = + Obj.magic ((s lsr 3) land 1) + + let kind (s : t) : Tree.NodeKind.t = + Obj.magic (s lsr 4) + + let make is_left is_right has_left has_right kind = + ((Obj.magic kind) lsl 4) lor + ((int_of_bool is_left) lsl 3) lor + ((int_of_bool is_right) lsl 2) lor + ((int_of_bool has_left) lsl 1) lor + (int_of_bool has_right) + + end + + type node_status = { + sat : StateSet.t; + unsat : StateSet.t; + todo : Ata.TransList.t; + summary : NodeSummary.t; + } +(* Describe what is kept at each node for a run *) + + module NodeStatus = Hcons.Make(struct + type t = node_status + let equal c d = + c == d || + c.sat == d.sat && + c.unsat == d.unsat && + c.todo == d.todo && + c.summary == d.summary + + let hash c = + HASHINT4((c.sat.StateSet.id :> int), + (c.unsat.StateSet.id :> int), + (c.todo.Ata.TransList.id :> int), + c.summary) + end + ) + + let dummy_status = + NodeStatus.make { sat = StateSet.empty; + unsat = StateSet.empty; + todo = Ata.TransList.nil; + summary = NodeSummary.dummy; + } + + + type run = { + tree : T.t ; + (* The argument of the run *) + auto : Ata.t; + (* The automaton to be run *) + status : NodeStatus.t array; + (* A mapping from node preorders to NodeStatus *) + unstable : Bitvector.t; + (* A bitvector remembering whether a subtree is stable *) + mutable redo : bool; + (* A boolean indicating whether the run is incomplete *) + mutable pass : int; + (* The number of times this run was updated *) + mutable cache2 : Ata.TransList.t Cache.N2.t; + (* A cache from states * label to list of transitions *) + mutable cache4 : NodeStatus.t Cache.N4.t; + } + + let pass r = r.pass + let stable r = not r.redo + let auto r = r.auto + let tree r = r.tree + + + let dummy_trl = + Ata.(TransList.cons + (Transition.make + (State.dummy,QNameSet.empty, Formula.false_)) + TransList.nil) + + let make auto tree = + let len = T.size tree in + { + tree = tree; + auto = auto; + status = Array.create len dummy_status; + unstable = Bitvector.create ~init:true len; + redo = true; + pass = 0; + cache2 = Cache.N2.create dummy_trl; + cache4 = Cache.N4.create dummy_status; + } + + let get_status a i = + if i < 0 then dummy_status else Array.get a i + + let unsafe_get_status a i = + if i < 0 then dummy_status else Array.unsafe_get a i + +IFDEF HTMLTRACE + THEN +DEFINE TRACE(e) = (e) + ELSE +DEFINE TRACE(e) = () +END + + let html tree node i config msg = + let config = config.NodeStatus.node in + Html.trace (T.preorder tree node) i + "node: %i
%s
sat: %a
unsat: %a
todo: %around: %i
" + (T.preorder tree node) + msg + StateSet.print config.sat + StateSet.print config.unsat + (Ata.TransList.print ~sep:"
") config.todo i + + + + let get_trans cache2 auto tag states = + let trs = + Cache.N2.find cache2 + (tag.QName.id :> int) (states.StateSet.id :> int) + in + if trs == dummy_trl then + let trs = Ata.get_trans auto tag states in + (Cache.N2.add + cache2 + (tag.QName.id :> int) + (states.StateSet.id :> int) trs; trs) + else trs + + + + let simplify_atom atom pos q { NodeStatus.node = status; _ } = + if (pos && StateSet.mem q status.sat) + || ((not pos) && StateSet.mem q status.unsat) then Ata.Formula.true_ + else if (pos && StateSet.mem q status.unsat) + || ((not pos) && StateSet.mem q status.sat) then Ata.Formula.false_ + else atom + + + let eval_form phi fcs nss ps ss summary = + let open Ata in + let rec loop phi = + begin match Formula.expr phi with + Boolean.True | Boolean.False -> phi + | Boolean.Atom (a, b) -> + begin + let open NodeSummary in + match a.Atom.node with + | Move (m, q) -> + let states = match m with + `First_child -> fcs + | `Next_sibling -> nss + | `Parent | `Previous_sibling -> ps + | `Stay -> ss + in simplify_atom phi b q states + | Is_first_child -> Formula.of_bool (b == is_left summary) + | Is_next_sibling -> Formula.of_bool (b == is_right summary) + | Is k -> Formula.of_bool (b == (k == kind summary)) + | Has_first_child -> Formula.of_bool (b == has_left summary) + | Has_next_sibling -> Formula.of_bool (b == has_right summary) + end + | Boolean.And(phi1, phi2) -> Formula.and_ (loop phi1) (loop phi2) + | Boolean.Or (phi1, phi2) -> Formula.or_ (loop phi1) (loop phi2) + end + in + loop phi + + + + let eval_trans cache4 fcs nss ps ss = + let fcsid = (fcs.NodeStatus.id :> int) in + let nssid = (nss.NodeStatus.id :> int) in + let psid = (ps.NodeStatus.id :> int) in + let rec loop old_config = + let oid = (old_config.NodeStatus.id :> int) in + let res = + let res = Cache.N4.find cache4 oid fcsid nssid psid in + if res != dummy_status then res + else + let { sat = old_sat; + unsat = old_unsat; + todo = old_todo; + summary = old_summary } = old_config.NodeStatus.node + in + let sat, unsat, removed, kept, todo = + Ata.TransList.fold + (fun trs acc -> + let q, lab, phi = Ata.Transition.node trs in + let a_sat, a_unsat, a_rem, a_kept, a_todo = acc in + if StateSet.mem q a_sat || StateSet.mem q a_unsat then acc else + let new_phi = + eval_form phi fcs nss ps old_config old_summary + in + if Ata.Formula.is_true new_phi then + StateSet.add q a_sat, a_unsat, StateSet.add q a_rem, a_kept, a_todo + else if Ata.Formula.is_false new_phi then + a_sat, StateSet.add q a_unsat, StateSet.add q a_rem, a_kept, a_todo + else + let new_tr = Ata.Transition.make (q, lab, new_phi) in + (a_sat, a_unsat, a_rem, StateSet.add q a_kept, (Ata.TransList.cons new_tr a_todo)) + ) old_todo (old_sat, old_unsat, StateSet.empty, StateSet.empty, Ata.TransList.nil) + in + (* States that have been removed from the todo list and not kept are now + unsatisfiable *) + let unsat = StateSet.union unsat (StateSet.diff removed kept) in + (* States that were found once to be satisfiable remain so *) + let unsat = StateSet.diff unsat sat in + let new_config = NodeStatus.make { old_config.NodeStatus.node with sat; unsat; todo; } in + Cache.N4.add cache4 oid fcsid nssid psid new_config; + new_config + in + if res == old_config then res else loop res + in + loop ss + + + + + let top_down node run = + let tree = run.tree in + let auto = run.auto in + let status = run.status in + let cache2 = run.cache2 in + let cache4 = run.cache4 in + let unstable = run.unstable in + let rec loop node = + let node_id = T.preorder tree node in + if node == T.nil || not (Bitvector.get unstable node_id) then false else begin + let parent = T.parent tree node in + let fc = T.first_child tree node in + let fc_id = T.preorder tree fc in + let ns = T.next_sibling tree node in + let ns_id = T.preorder tree ns in + let tag = T.tag tree node in + (* We enter the node from its parent *) + + let status0 = + let c = unsafe_get_status status node_id in + if c == dummy_status then + (* first time we visit the node *) + NodeStatus.make + { c.NodeStatus.node with + todo = get_trans cache2 auto tag (Ata.get_states auto); + summary = NodeSummary.make + (node == T.first_child tree parent) (* is_left *) + (node == T.next_sibling tree parent) (* is_right *) + (fc != T.nil) (* has_left *) + (ns != T.nil) (* has_right *) + (T.kind tree node) (* kind *) + } + else c + in + + TRACE(html tree node _i config0 "Entering node"); + + (* get the node_statuses for the first child, next sibling and parent *) + let ps = unsafe_get_status status (T.preorder tree parent) in + let fcs = unsafe_get_status status fc_id in + let nss = unsafe_get_status status ns_id in + (* evaluate the transitions with all this statuses *) + let status1 = eval_trans cache4 fcs nss ps status0 in + + TRACE(html tree node _i config1 "Updating transitions"); + + (* update the cache if the status of the node changed *) + + if status1 != status0 then status.(node_id) <- status1; + (* recursively traverse the first child *) + let unstable_left = loop fc in + (* here we re-enter the node from its first child, + get the new status of the first child *) + let fcs1 = unsafe_get_status status fc_id in + (* update the status *) + let status2 = eval_trans cache4 fcs1 nss ps status1 in + + TRACE(html tree node _i config2 "Updating transitions (after first-child)"); + + if status2 != status1 then status.(node_id) <- status2; + let unstable_right = loop ns in + let nss1 = unsafe_get_status status ns_id in + let status3 = eval_trans cache4 fcs1 nss1 ps status2 in + + TRACE(html tree node _i config3 "Updating transitions (after next-sibling)"); + + if status3 != status2 then status.(node_id) <- status3; + + let unstable_self = + (* if either our left or right child is unstable or if we still have transitions + pending, the current node is unstable *) + unstable_left + || unstable_right + || Ata.TransList.nil != status3.NodeStatus.node.todo + in + Bitvector.unsafe_set unstable node_id unstable_self; + TRACE((if not unstable_self then + Html.finalize_node + node_id + _i + Ata.(StateSet.intersect config3.Config.node.sat auto.selection_states))); + unstable_self + end + in + run.redo <- loop node; + run.pass <- run.pass + 1 + +(* + let stats run = + let count = ref 0 in + let len = Bitvector.length run.unstable in + for i = 0 to len - 1 do + if not (Bitvector.unsafe_get run.unstable i) then + incr count + done; + Logger.msg `STATS + "%i nodes over %i were skipped in iteration %i (%.2f %%), redo is: %b" + !count len run.pass (100. *. (float !count /. float len)) + run.redo + + + let eval auto tree node = + let len = T.size tree in + let run = { config = Array.create len Ata.dummy_config; + unstable = Bitvector.create ~init:true len; + redo = true; + pass = 0 + } + in + while run.redo do + run.redo <- false; + Ata.reset auto; (* prevents the .cache2 and .cache4 memoization tables from growing too much *) + run.redo <- top_down_run auto tree node run; + stats run; + run.pass <- run.pass + 1; + done; + at_exit (fun () -> Logger.msg `STATS "%i iterations" run.pass); + at_exit (fun () -> stats run); + let r = get_results auto tree node run.config in + + TRACE(Html.gen_trace (module T : Tree.S with type t = T.t) (tree)); + + r +*) + + let get_results run = + let cache = run.status in + let auto = run.auto in + let tree = run.tree in + let rec loop node acc = + if node == T.nil then acc + else + let acc0 = loop (T.next_sibling tree node) acc in + let acc1 = loop (T.first_child tree node) acc0 in + + if Ata.( + StateSet.intersect + cache.(T.preorder tree node).NodeStatus.node.sat + (get_selecting_states auto)) then node::acc1 + else acc1 + in + loop (T.root tree) [] + + + let eval auto tree node = + let run = make auto tree in + while run.redo do top_down node run done; + get_results run +end diff --git a/src/eval.mli b/src/run.mli similarity index 100% rename from src/eval.mli rename to src/run.mli diff --git a/src/tatoo.ml b/src/tatoo.ml index 8b50581..c3cddfe 100644 --- a/src/tatoo.ml +++ b/src/tatoo.ml @@ -50,7 +50,7 @@ let main () = Logger.msg `STATS "@[Automaton: @\n%a@]" Ata.print auto; end; - let module Naive = Eval.Make(Naive_tree) in + let module Naive = Run.Make(Naive_tree) in let results = time (Naive.eval auto doc) (Naive_tree.root doc) "evaluating query" in diff --git a/src/tree.ml b/src/tree.ml index 8f37be5..d44755e 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -104,9 +104,8 @@ sig (** Returns the kind of the given node *) val preorder : t -> node -> int - (** Returns the position of a node in pre-order in the tree. The - root has preorder 0. [nil] has pre-order [-1]. + (** [preorder t n] returns the pre-order position of [n] in [t]. + [preodrder t (root t) == 0] and [preorder t nil < 0]. *) - val print_node : Format.formatter -> node -> unit end diff --git a/src/xpath/compile.ml b/src/xpath/compile.ml index 8ab26fb..c53057e 100644 --- a/src/xpath/compile.ml +++ b/src/xpath/compile.ml @@ -43,7 +43,7 @@ let compile_axis_test axis (test,kind) phi trans states = let q = State.make () in let phi = match kind with Tree.NodeKind.Node -> phi - | _ -> phi %% F.mk_kind kind + | _ -> phi %% F.is kind in let phi', trans', states' = match axis with @@ -229,11 +229,13 @@ let path p = in (StateSet.add ms ams), natrs, nasts) (StateSet.empty, [], StateSet.empty) p in - let a = Ata.create states mstates in + let builder = Ata.Builder.make () in + StateSet.iter + (Ata.Builder.add_state builder ~selecting:true) mstates; + StateSet.iter + (Ata.Builder.add_state builder) states; List.iter (fun (q, l) -> List.iter (fun (lab, phi) -> - Ata.add_trans a q lab phi + Ata.Builder.add_trans builder q lab phi ) l) trans; - Ata.complete_transitions a; - Ata.normalize_negations a; - a + Ata.Builder.finalize builder -- 2.17.1