From e13f5deae217f945b44fa345ef4f0008e1780787 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Tue, 12 Feb 2013 21:52:54 +0100 Subject: [PATCH] Add the compilation from existential XPath to automata. This patches misses the book-keeping stuff to keep to build the automaton object and the compilation of the top-level part of the query. --- src/xpath/ast.ml | 19 +-- src/xpath/ast.mli | 11 +- src/xpath/compile.ml | 172 ++++++++++++++++++---------- src/xpath/ulexer.ml | 10 +- src/xpath/xpath_internal_parser.mly | 6 +- 5 files changed, 137 insertions(+), 81 deletions(-) diff --git a/src/xpath/ast.ml b/src/xpath/ast.ml index c3c00d9..39777eb 100644 --- a/src/xpath/ast.ml +++ b/src/xpath/ast.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) open Utils @@ -22,8 +22,13 @@ open Utils type path = single_path list and single_path = Absolute of step list | Relative of step list and step = axis * test * expr list -and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling - | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following +and axis = Self | Attribute | Child + | Descendant of bool (* true = descendant-or-self, false = descendant *) + | FollowingSibling + | Parent + | Ancestor of bool (* true = ancestor-or-self, false = ancestor *) + | PrecedingSibling + | Preceding | Following and test = QNameSet.t @@ -111,12 +116,12 @@ and print_axis fmt a = pp fmt "%s" begin match a with Self -> "self" | Child -> "child" - | Descendant -> "descendant" - | DescendantOrSelf -> "descendant-or-self" + | Descendant false -> "descendant" + | Descendant true -> "descendant-or-self" | FollowingSibling -> "following-sibling" | Attribute -> "attribute" - | Ancestor -> "ancestor" - | AncestorOrSelf -> "ancestor-or-self" + | Ancestor false -> "ancestor" + | Ancestor true -> "ancestor-or-self" | PrecedingSibling -> "preceding-sibling" | Parent -> "parent" | Preceding -> "preceding" diff --git a/src/xpath/ast.mli b/src/xpath/ast.mli index f2a4df7..8040bb1 100644 --- a/src/xpath/ast.mli +++ b/src/xpath/ast.mli @@ -14,14 +14,19 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) type path = single_path list and single_path = Absolute of step list | Relative of step list and step = axis * test * expr list -and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling - | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following +and axis = Self | Attribute | Child + | Descendant of bool + | FollowingSibling + | Parent + | Ancestor of bool + | PrecedingSibling + | Preceding | Following and test = Utils.QNameSet.t diff --git a/src/xpath/compile.ml b/src/xpath/compile.ml index a84a50f..783d7f1 100644 --- a/src/xpath/compile.ml +++ b/src/xpath/compile.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) open Ast @@ -30,73 +30,119 @@ let ( ++ ) a b = Ata.SFormula.or_ a b let ( %% ) a b = Ata.SFormula.and_ a b let ( @: ) a b = StateSet.add a b +(* [compile_axis_test axis test q phi trans states] Takes an xpath + [axis] and node [test], a formula [phi], a list of [trans]itions + and a set of [states] and returns a formula [phi'], a new set of + transitions, and a new set of states such that [phi'] holds iff + there exists a node reachable through [axis]::[test] where [phi] + holds. +*) -let compile_axis_test ax tst inq trs sts = - match ax with - | Self -> - let outq = State.make () in - outq, - (inq, [ tst => (`Epsilon ** outq ) ]) :: trs, - outq @: sts +let compile_axis_test axis test phi trans states = + let q = State.make () in + let phi, trans, states = + match axis with + | Self -> + (`Epsilon ** q), + (q, [ test => phi ]) :: trans, + states - | Child -> - let outq = State.make () in - let outq' = State.make () in - outq', - (inq, [ QNameSet.any => (`Left ** outq)]) - :: (outq, [ tst => (`Epsilon ** outq'); - QNameSet.any => (`Right ** outq) ]) - :: trs, - outq @: (outq' @: sts) + | Child -> + (`Left ** q), + (q, [ test => phi; + QNameSet.any => (`Right ** q) ]) :: trans, + states - | Descendant | DescendantOrSelf -> - let dir = if ax = Descendant then `Left else `Epsilon in - let outq = State.make () in - let outq' = State.make () in - outq', - (inq, [ QNameSet.any => (dir ** outq)]) - :: (outq, [ tst => (`Epsilon ** outq'); - QNameSet.any => ((`Left ** outq) ++ (`Right ** outq)) - ]) - :: trs, - outq @: (outq' @: sts) + | Descendant self -> + (if self then (`Epsilon ** q) else (`Left ** q)), + (q, [ test => phi; + QNameSet.any => (`Left ** q) %% (`Right ** q) ]) :: trans, + states - | Parent -> - let outq = State.make () in - let outq' = State.make () in - let outq'' = State.make () in - let move = (`Up1 ** outq') ++ (`Up2 ** outq) in - outq'', - (inq, [QNameSet.any => move ]) - :: (outq, [ QNameSet.any => move ]) - :: (outq', [ tst => (`Epsilon ** outq'') ]) - :: trs, - outq @: (outq' @: (outq'' @: sts)) + | Parent -> + let q' = State.make () in + let move = (`Up1 ** q) ++ (`Up2 ** q') in + move, + (q, [ test => phi ]) + :: (q', [ QNameSet.any => move ]) :: trans, + (q' @: states) - | Ancestor | AncestorOrSelf -> - let outq = State.make () in - let outq' = State.make () in - let outq'' = State.make () in - let move = - (if ax = Ancestor then (`Up1 ** outq') - else (`Epsilon ** outq')) ++ (`Up1 ** outq) ++ (`Up2 ** outq) - in - outq'', - (inq, [QNameSet.any => move ]) - :: (outq, [ QNameSet.any => move ]) - :: (outq', [ tst => (`Epsilon ** outq'') ]) - :: trs, - outq @: (outq' @: (outq'' @: sts)) + | Ancestor self -> + let q' = State.make () in + let move = (`Up1 ** q) ++ (`Up2 ** q') in + (if self then (`Epsilon ** q) else move), + (q, [ test => phi; + QNameSet.any => move ]) + :: (q', [ QNameSet.any => move ]) :: trans, + (q' @: states) + + | FollowingSibling | PrecedingSibling -> + let move = + if axis = PrecedingSibling then + (`Up2 ** q) + else (`Right ** q) + in + move, + (q, [ test => phi; + QNameSet.any => move ]) :: trans, + states - | FollowingSibling | PrecedingSibling -> - let outq = State.make () in - let outq' = State.make () in - let dir = if ax = FollowingSibling then `Right else `Up2 in - outq', - (inq, [ QNameSet.any => (dir ** outq) ]) - :: (outq, [ tst => (`Epsilon ** outq'); - QNameSet.any => (dir ** outq) ]) - :: trs, - outq @: (outq' @: sts) + | Attribute -> + let q' = State.make () in + let test = if QNameSet.is_finite test then + QNameSet.fold (fun tag acc -> QNameSet.add (QName.add_attribute_prefix tag) acc) + test QNameSet.empty + else test + in + (`Left ** q), + (q, [ QNameSet.singleton QName.attribute_map => (`Left ** q') ]) + :: (q', [ test => phi; + QNameSet.any => (`Right ** q') ]) :: trans, + (q' @:states) + | _ -> assert false + + in + phi, trans, q @: states +;; +let rec compile_expr e trans states = + match e with + | Binop (e1, (And|Or as op), e2) -> + let phi1, trans1, states1 = compile_expr e1 trans states in + let phi2, trans2, states2 = compile_expr e2 trans1 states1 in + (if op = Or then phi1 ++ phi2 else phi1 %% phi2), + trans2, + states2 + | Fun_call (f, [ e0 ]) when (QName.to_string f) = "not" -> + let phi, trans0, states0 = compile_expr e0 trans states in + (Ata.SFormula.not_ phi), + trans0, + states0 + | Path p -> compile_path p trans states | _ -> assert false +and compile_path paths trans states = + List.fold_left (fun (aphi, atrans, astates) p -> + let phi, ntrans, nstates = compile_single_path p atrans astates in + (Ata.SFormula.or_ phi aphi), + ntrans, + nstates) (Ata.SFormula.false_,trans,states) paths + +and compile_single_path p trans states = + let steps = + match p with + | Absolute steps -> + (Ancestor false, QNameSet.singleton QName.document, [])::steps + | Relative steps -> steps + in + compile_step_list steps trans states +and compile_step_list l trans states = + match l with + [] -> Ata.SFormula.true_, trans, states + | (axis, test, elist) :: ll -> + let phi0, trans0, states0 = compile_step_list ll trans states in + let phi1, trans1, states1 = + compile_axis_test axis test phi0 trans0 states0 + in + List.fold_left (fun (aphi, atrans, astates) e -> + let ephi, etrans, estates = compile_expr e atrans astates in + aphi %% ephi, etrans, estates) (phi1, trans1, states1) elist diff --git a/src/xpath/ulexer.ml b/src/xpath/ulexer.ml index aa66179..2153e18 100644 --- a/src/xpath/ulexer.ml +++ b/src/xpath/ulexer.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) open Xpath_internal_parser @@ -81,15 +81,15 @@ let keyword_or_tag s = try List.assoc s [ "self", AXIS Ast.Self; - "descendant", AXIS Ast.Descendant; + "descendant", AXIS (Ast.Descendant false); "child", AXIS Ast.Child; - "descendant-or-self", AXIS Ast.DescendantOrSelf; + "descendant-or-self", AXIS (Ast.Descendant true); "attribute", AXIS Ast.Attribute; "following-sibling", AXIS Ast.FollowingSibling; "preceding-sibling", AXIS Ast.PrecedingSibling; "parent", AXIS Ast.Parent; - "ancestor", AXIS Ast.Ancestor; - "ancestor-or-self", AXIS Ast.AncestorOrSelf; + "ancestor", AXIS (Ast.Ancestor false); + "ancestor-or-self", AXIS (Ast.Ancestor true); "preceding", AXIS Ast.Preceding; "following", AXIS Ast.Following; "and", AND; diff --git a/src/xpath/xpath_internal_parser.mly b/src/xpath/xpath_internal_parser.mly index 04ecf36..a05beb2 100644 --- a/src/xpath/xpath_internal_parser.mly +++ b/src/xpath/xpath_internal_parser.mly @@ -15,7 +15,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) open Ast @@ -68,14 +68,14 @@ simple_path: absolute_path: SLASH relative_path { $2 } -| SLASHSLASH relative_path { (DescendantOrSelf, node, []) :: $2 } +| SLASHSLASH relative_path { (Descendant true, node, []) :: $2 } ; relative_path: step { [ $1 ] } | relative_path SLASH step { $3 :: $1 } | relative_path SLASHSLASH step { $3 - :: (DescendantOrSelf, node, []) + :: (Descendant true, node, []) :: $1 } ; -- 2.17.1