From: Kim Nguyễn Date: Mon, 4 Mar 2013 15:49:36 +0000 (+0100) Subject: First implementation of compilation from XPath to automata using X-Git-Tag: v0.1~174 X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=commitdiff_plain;h=f49a93deba13602e16a3923695281e9a20215ac8 First implementation of compilation from XPath to automata using the reverse axes technique. --- diff --git a/src/auto/ata.ml b/src/auto/ata.ml index ca641b4..aa4fb38 100644 --- a/src/auto/ata.ml +++ b/src/auto/ata.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) INCLUDE "utils.ml" @@ -84,11 +84,11 @@ struct end module SFormula = Formula.Make(Move) -type 'a t = { +type t = { id : Uid.t; mutable states : StateSet.t; - mutable top_states : StateSet.t; - mutable bottom_states: StateSet.t; +(* mutable top_states : StateSet.t; + mutable bottom_states: StateSet.t; *) mutable selection_states: StateSet.t; transitions: (State.t, (QNameSet.t*SFormula.t) list) Hashtbl.t; } @@ -97,8 +97,8 @@ let next = Uid.make_maker () let create () = { id = next (); states = StateSet.empty; - top_states = StateSet.empty; - bottom_states = StateSet.empty; +(* top_states = StateSet.empty; + bottom_states = StateSet.empty; *) selection_states = StateSet.empty; transitions = Hashtbl.create 17; } @@ -137,14 +137,12 @@ let print fmt a = fprintf fmt "Unique ID: %i@\n\ States %a@\n\ - Top states: %a@\n\ - Bottom states: %a@\n\ Selection states: %a@\n\ Alternating transitions:@\n" (a.id :> int) StateSet.print a.states - StateSet.print a.top_states - StateSet.print a.bottom_states +(* StateSet.print a.top_states + StateSet.print a.bottom_states *) StateSet.print a.selection_states; let trs = Hashtbl.fold @@ -233,7 +231,7 @@ let normalize_negations a = end end in - StateSet.iter (fun q -> Queue.add (q, true) todo) a.top_states; + StateSet.iter (fun q -> Queue.add (q, true) todo) a.selection_states; while not (Queue.is_empty todo) do let (q, b) as key = Queue.pop todo in let q' = @@ -247,9 +245,4 @@ let normalize_negations a = let trans = Hashtbl.find a.transitions q in let trans' = List.map (fun (lab, f) -> lab, flip b f) trans in Hashtbl.replace a.transitions q' trans' - done; - Hashtbl.iter (fun (q, b) q' -> - if not (b || StateSet.mem q a.bottom_states) then - a.bottom_states <- StateSet.add q' a.bottom_states - ) memo_state - + done diff --git a/src/test.ml b/src/test.ml index f62e68d..2dd1cae 100644 --- a/src/test.ml +++ b/src/test.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) (** use: xml_file "XPath querie" @@ -39,10 +39,14 @@ let query = let arg2 = Sys.argv.(2) in Xpath.Parser.parse (Ulexing.from_latin1_string arg2) +let auto = + Xpath.Compile.path query + open Format let () = fprintf err_formatter "Query: %a\n%!" Xpath.Ast.print_path query; + fprintf err_formatter "Automata: %a\n%!" Auto.Ata.print auto; fprintf err_formatter "Document:\n%!"; Tree.Naive.print_xml stderr doc (Tree.Naive.root doc); exit 0 diff --git a/src/xpath/ast.ml b/src/xpath/ast.ml index 39777eb..d70227b 100644 --- a/src/xpath/ast.ml +++ b/src/xpath/ast.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) open Utils @@ -168,3 +168,17 @@ and print_expr fmt = function print_expr fmt e0; if need_par0 then pp fmt ")" + + +let invert_axis = function +| Self | Attribute as a -> a +| Child -> Parent +| Descendant (b) -> Ancestor (b) +| FollowingSibling -> PrecedingSibling +| Parent -> Child +| Ancestor (b) -> Descendant (b) +| PrecedingSibling -> FollowingSibling +| Preceding -> Following +| Following -> Preceding +;; + diff --git a/src/xpath/ast.mli b/src/xpath/ast.mli index 8040bb1..7cc91e4 100644 --- a/src/xpath/ast.mli +++ b/src/xpath/ast.mli @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) type path = single_path list @@ -52,3 +52,4 @@ val print_axis : Format.formatter -> axis -> unit val print_test : Format.formatter -> test -> unit val print_expr : Format.formatter -> expr -> unit +val invert_axis : axis -> axis diff --git a/src/xpath/compile.ml b/src/xpath/compile.ml index 783d7f1..b22a74b 100644 --- a/src/xpath/compile.ml +++ b/src/xpath/compile.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) open Ast @@ -40,7 +40,7 @@ let ( @: ) a b = StateSet.add a b let compile_axis_test axis test phi trans states = let q = State.make () in - let phi, trans, states = + let phi', trans', states' = match axis with | Self -> (`Epsilon ** q), @@ -90,7 +90,8 @@ let compile_axis_test axis test phi trans states = | Attribute -> let q' = State.make () in let test = if QNameSet.is_finite test then - QNameSet.fold (fun tag acc -> QNameSet.add (QName.add_attribute_prefix tag) acc) + QNameSet.fold (fun tag acc -> + QNameSet.add (QName.add_attribute_prefix tag) acc) test QNameSet.empty else test in @@ -102,8 +103,17 @@ let compile_axis_test axis test phi trans states = | _ -> assert false in - phi, trans, q @: states + phi', trans', q @: states' + + +let compile_rev_axis_test axis test phi trans states = + match axis with + | Attribute -> assert false + | _ -> compile_axis_test (invert_axis axis) test phi trans states ;; + + + let rec compile_expr e trans states = match e with | Binop (e1, (And|Or as op), e2) -> @@ -135,9 +145,10 @@ and compile_single_path p trans states = | Relative steps -> steps in compile_step_list steps trans states + and compile_step_list l trans states = match l with - [] -> Ata.SFormula.true_, trans, states + | [] -> Ata.SFormula.true_, trans, states | (axis, test, elist) :: ll -> let phi0, trans0, states0 = compile_step_list ll trans states in let phi1, trans1, states1 = @@ -146,3 +157,68 @@ and compile_step_list l trans states = List.fold_left (fun (aphi, atrans, astates) e -> let ephi, etrans, estates = compile_expr e atrans astates in aphi %% ephi, etrans, estates) (phi1, trans1, states1) elist + +let compile_top_level_step_list l trans states = + let rec loop l trans states phi_above = + match l with + | (axis, test, elist) :: [] -> + let phi0, trans0, states0 = + compile_rev_axis_test axis QNameSet.any phi_above trans states + in + let phi1, trans1, states1 = + List.fold_left (fun (aphi, atrans, astates) e -> + let ephi, etrans, estates = compile_expr e atrans astates in + aphi %% ephi, etrans, estates) (phi0, trans0, states0) elist + in + let _, trans2, states2 = + compile_axis_test Self test phi1 trans1 states1 + in + let marking_state = + StateSet.choose (StateSet.diff states2 states1) + in + marking_state, trans2, states2 + | (axis, test, elist) :: ll -> + let phi0, trans0, states0 = + compile_rev_axis_test axis QNameSet.any phi_above trans states + in + let phi1, trans1, states1 = + compile_axis_test Self test phi0 trans0 states0 + in + let phi2, trans2, states2 = + List.fold_left (fun (aphi, atrans, astates) e -> + let ephi, etrans, estates = compile_expr e atrans astates in + aphi %% ephi, etrans, estates) (phi1, trans1, states1) elist + in + loop ll trans2 states2 phi2 + | _ -> assert false + in + let phi0, trans0, states0 = + compile_axis_test + Self + (QNameSet.singleton QName.document) + Ata.SFormula.true_ + trans + states + in + loop l trans0 states0 phi0 +;; + + +let path p = + let mstates, trans, states = List.fold_left (fun (ams, atrs, asts) p -> + let ms, natrs, nasts = + match p with + | Absolute l | Relative l -> compile_top_level_step_list l atrs asts + in + (StateSet.add ms ams), natrs, nasts) (StateSet.empty, [], StateSet.empty) p + in + let a = Ata.create () in + a.Ata.states <- states; + a.Ata.selection_states <- mstates; + List.iter (fun (q, l) -> + List.iter (fun (lab, phi) -> + Ata.add_trans a q lab phi + ) l) trans; + Ata.complete_transitions a; + Ata.normalize_negations a; + a diff --git a/src/xpath/compile.mli b/src/xpath/compile.mli new file mode 100644 index 0000000..6f50f11 --- /dev/null +++ b/src/xpath/compile.mli @@ -0,0 +1,20 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2013 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(* + Time-stamp: +*) + +val path : Ast.path -> Auto.Ata.t