From c951f1d4b8f4264acb0b5910dc544ad3a6ceebab Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Mon, 28 Jan 2013 18:44:40 +0100 Subject: [PATCH] Rewrite the AST to conform to the W3C grammar --- src/xPath.ml | 385 ++++++++++++-------------------------------------- src/xPath.mli | 44 +++--- 2 files changed, 108 insertions(+), 321 deletions(-) diff --git a/src/xPath.ml b/src/xPath.ml index 5258b67..0e71caf 100644 --- a/src/xPath.ml +++ b/src/xPath.ml @@ -12,28 +12,28 @@ (* ../LICENSE. *) (* *) (***********************************************************************) - -#load "pa_extend.cmo";; module Ast = struct - type path = Absolute of step list | Relative of step list - and step = axis * test *predicate + type path = single_path list + and single_path = Absolute of step list | Relative of step list + and step = axis * test * expr list and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following and test = Simple of QNameSet.t + and binop = Eq | Neq | Lt | Gt | Lte | Gte | Or | And | Add | Sub | Mult | Div | Mod + and unop = Neg + and expr = + | Number of [ `Int of int | `Float of float ] + | String of string + | Fun_call of QName.t * expr list + | Path of path + | Binop of expr * binop * expr + | Unop of unop * expr + - and predicate = Or of predicate*predicate - | And of predicate*predicate - | Not of predicate - | Expr of expression - and expression = Path of path - | Function of string*expression list - | Int of int - | String of string - | True | False type t = path @@ -51,25 +51,60 @@ struct - let pp fmt = Format.fprintf fmt - let print_list printer fmt sep l = - match l with - [] -> () - | [e] -> printer fmt e - | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es - - - let rec print fmt p = + let pp fmt e = Format.fprintf fmt e + + let prio e = match e with + | Unop (Neg, _) -> 11 + | Path _ -> 10 + | Number _ | String _ | Fun_call _ -> 9 + | Binop (_,op,_) -> begin match op with + | Lt | Lte | Gt | Gte -> 7 + | Neq | Eq -> 6 + | And -> 5 + | Or -> 4 + | Mult | Div | Mod -> 3 + | Add | Sub -> 2 + end + + let print_binop fmt o = + pp fmt "%s" begin match o with + | Eq -> "=" + | Neq -> "!=" + | Lt -> "<" + | Gt -> ">" + | Lte -> "<=" + | Gte -> ">=" + | Or -> "or" + | And -> "and" + | Add -> "+" + | Sub -> "-" + | Mult -> "*" + | Div -> "div" + | Mod -> "mod" + end + let print_unop fmt o = + pp fmt "%s" begin match o with + | Neg -> "-" + end + + let rec print_path fmt p = + Pretty.print_list ~sep:" | " print_single_path fmt p + + and print_single_path fmt p = let l = match p with | Absolute l -> pp fmt "/"; l | Relative l -> l in Pretty.print_list ~sep:"/" print_step fmt l - and print_step fmt (axis, test, predicate) = - print_axis fmt axis;pp fmt "::";print_test fmt test; - match predicate with - Expr True -> () - | _ -> pp fmt "["; print_predicate fmt predicate; pp fmt "]" + + and print_step fmt (axis, test, expr) = + pp fmt "%a::%a" print_axis axis print_test test; + match expr with + [] -> () + | l -> pp fmt "[ "; + Pretty.print_list ~sep:" ][ " print_expr fmt l; + pp fmt " ]" + and print_axis fmt a = pp fmt "%s" (match a with Self -> "self" | Child -> "child" @@ -81,7 +116,8 @@ struct | AncestorOrSelf -> "ancestor-or-self" | PrecedingSibling -> "preceding-sibling" | Parent -> "parent" - | _ -> assert false + | Preceding -> "preceding" + | Following -> "following" ) and print_test fmt ts = try @@ -97,272 +133,31 @@ struct else "" ) - and print_predicate fmt = function - | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q - | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q - | Not p -> pp fmt "not "; print_predicate fmt p - | Expr e -> print_expression fmt e - - and print_expression fmt = function - | Path p -> print fmt p - | Function (f,l) -> - pp fmt "%s(" f; - Pretty.print_list ~sep:"," print_expression fmt l; - pp fmt ")" - | Int i -> pp fmt "%i" i - | String s -> pp fmt "\"%s\"" s - | t -> pp fmt "%b" (t== True) - -end -module Parser = -struct - open Ast - open Ulexer - let predopt = function None -> Expr True | Some p -> p - - module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer) - let query = Gram.Entry.mk "query" - - exception Error of Gram.Loc.t*string - let test_of_keyword t loc = - match t with - | "text()" -> text - | "node()" -> node - | "*" -> star - | "and" | "not" | "or" -> QNameSet.singleton (QName.of_string t) - | _ -> raise (Error(loc,"Invalid test name "^t )) - - let axis_to_string a = let r = Format.str_formatter in - print_axis r a; Format.flush_str_formatter() - - -EXTEND Gram - -GLOBAL: query; - - query : [ [ p = path; `EOI -> p ]] -; - - path : [ - [ "/" ; l = slist -> Absolute (List.rev l) ] - | [ l = slist -> Relative (List.rev l) ] - ] -; - -slist: [ - [ l = slist ;"/"; s = step -> s @ l ] -| [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, t_node ,Expr True)]@l] -| [ s = step -> s ] -]; - -step : [ - (* yurk, this is done to parse stuff like - a/b/descendant/a where descendant is actually a tag name :( - if OPT is None then this is a child::descendant if not, this is a real axis name - *) - - -[ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred -> - let a,t,p = - match o with - | Some(t) -> (axis,t,p) - | None -> (Child,Simple (QNameSet.singleton (QName.of_string (axis_to_string axis))),p) - in match a with - | Following -> [ (DescendantOrSelf,t,p); - (FollowingSibling, t_star,Expr(True)); - (Ancestor, t_star ,Expr(True)) ] - - | Preceding -> [ (DescendantOrSelf,t,p); - (PrecedingSibling,t_star,Expr(True)); - (Ancestor,t_star,Expr(True)) ] - | _ -> [ a,t,p ] - -] - -| [ "." ; p = top_pred -> [(Self, t_node,p)] ] -| [ ".." ; p = top_pred -> [(Parent,t_star,p)] ] -| [ test = test; p = top_pred -> [(Child,test, p)] ] -| [ att = ATT ; p = top_pred -> - match att with - | "*" -> [(Attribute,t_star,p)] - | _ -> [(Attribute, Simple (QNameSet.singleton (QName.of_string att)) ,p )]] -] -; -top_pred : [ - [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ] -] -; -axis : [ - [ "self" -> Self | "child" -> Child | "descendant" -> Descendant - | "descendant-or-self" -> DescendantOrSelf - | "ancestor-or-self" -> AncestorOrSelf - | "following-sibling" -> FollowingSibling - | "attribute" -> Attribute - | "parent" -> Parent - | "ancestor" -> Ancestor - | "preceding-sibling" -> PrecedingSibling - | "preceding" -> Preceding - | "following" -> Following - ] - - -]; -test : [ - [ s = KWD -> Simple (test_of_keyword s _loc) ] -| [ t = TAG -> Simple (QNameSet.singleton (QName.of_string t)) ] -]; - - -predicate: [ - - [ p = predicate; "or"; q = predicate -> Or(p,q) ] -| [ p = predicate; "and"; q = predicate -> And(p,q) ] -| [ "not" ; p = predicate -> Not p ] -| [ "("; p = predicate ;")" -> p ] -| [ e = expression -> Expr e ] -]; - -expression: [ - [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)] -| [ `INT(i) -> Int (i) ] -| [ s = STRING -> String s ] -| [ p = path -> Path p ] -| [ "("; e = expression ; ")" -> e ] -] -; -END -;; -(* - -GLOBAL: query; - - query : [ [ p = location_path; `EOI -> p ]] -; - - - location_path : [ - [ "/" ; l = OPT relative_location_path -> - let l = match l with None -> [] | Some l' -> l' in Absolute l ] - | [ l = relative_location_path -> Relative l ] - | [ l = abbrev_absolute_location_path -> l ] - - ] -; - - relative_location_path : [ - [ s = step -> [ s ] ] - | [ l = relative_location_path ; "/"; s = step -> l @ [ s ] ] - | [ l = abbrev_relative_location_path -> l ] - ] -; - - - step : [ - [ a = axis_specifier ; n = node_test ; p = OPT predicate -> - let p = match p with Some p' -> p' | None -> Expr(True) in - a, n, p - ] - | [ a = abbrev_step -> a ] - ] -; - axis_specifier : [ - [ a = axis_name ; "::" -> a ] - | [ a = abbrev_axis_specifier -> a ] - ]; - - axis_name : [ - [ "self" -> Self | "child" -> Child | "descendant" -> Descendant - | "descendant-or-self" -> DescendantOrSelf - | "ancestor-or-self" -> AncestorOrSelf - | "following-sibling" -> FollowingSibling - | "attribute" -> Attribute - | "parent" -> Parent - | "ancestor" -> Ancestor - | "preceding-sibling" -> PrecedingSibling - | "preceding" -> Preceding - | "following" -> Following - ] - ] -; - node_test : [ - [ n = name_test -> n ] - | [ n = node_type ; "("; ")" -> n ] - (* | [ "processing-instruction" ; "(" ... ")" ] *) - ] -; - name_test : [ - [ "*" -> Simple(TagSet.star) ] - | [ t = axis_name -> Simple(TagSet.singleton (Tag.tag (axis_to_string t))) ] - | [ t = TAG -> Simple(TagSet.singleton (Tag.tag t)) ] - ] -; - node_type : [ - [ "text" -> Simple(TagSet.pcdata) ] - | [ "node" -> Simple(TagSet.node) ] - ] -; - predicate : [ - [ "["; e = expr ; "]" -> e ] - ] -; - abbrev_absolute_location_path : [ - [ "//"; l = relative_location_path -> AbsoluteDoS l ] - ]; - - abbrev_relative_location_path : [ - [ l = relative_location_path; "//"; s = step -> - l @ [ (DescendantOrSelf,Simple(TagSet.node),Expr(True)); s ] - ] - ]; - - abbrev_step : [ - [ "." -> (Self, Simple(TagSet.node), Expr(True)) ] - | [ ".." -> (Parent, Simple(TagSet.node), Expr(True)) ] - ]; - - abbrev_axis_specifier: [ - [ a = OPT "@" -> match a with None -> Attribute | _ -> Child ] - ]; - - expr : [ - [ o = or_expr -> o ] - ]; - - primary_expr : [ - [ "("; e = expr ; ")" -> e ] - | [ s = STRING -> Expr (String s) ] - | [ `INT(i) -> Expr (Int (i)) ] - | [ f = TAG; "("; args = LIST0 expr SEP "," ; ")" -> - Expr(Function(f, List.map (function Expr e -> e | _ -> assert false) args))] - ] -; - - or_expr : [ - [ o1 = or_expr ; "or" ; o2 = and_expr -> Or(o1, o2) ] - | [ a = and_expr -> a ] - ] - ; - - and_expr : [ - [ a1 = and_expr; "and"; a2 = unary_expr -> And(a1, a2) ] - | [ p = unary_expr -> p ] - ] -; - unary_expr : [ - [ l = location_path -> Expr(Path l) ] - | [ "not"; "("; e = expr ; ")" -> Not e ] - | [ p = primary_expr -> p ] - - ]; - -END -;; - -*) - - let parse_string = Gram.parse_string query (Ulexer.Loc.mk "") - let parse_file fd = parse_string (input_line fd) + and print_expr fmt = function + | Number (`Int(i)) -> pp fmt "%i" i + | Number (`Float(f)) -> pp fmt "%f" f + | String s -> pp fmt "'%S'" s + | Fun_call (n, args) -> + pp fmt "%a(" QName.print n; + Pretty.print_list ~sep:", " print_expr fmt args; + pp fmt ")" + | Path p -> print_path fmt p + | Binop (e1, op, e2) as e -> + let pe = prio e in + let need_par1 = prio e1 < pe in + if need_par1 then pp fmt "("; + pp fmt "%a" print_expr e1; + if need_par1 then pp fmt ")"; + pp fmt " %a " print_binop op; + let need_par2 = prio e2 < pe in + if need_par2 then pp fmt "("; + pp fmt "%a" print_expr e2; + if need_par2 then pp fmt ")" + | Unop (op, e0) as e -> + let need_par0 = prio e0 < prio e in + print_unop fmt op; + if need_par0 then pp fmt "("; + print_expr fmt e0; + if need_par0 then pp fmt ")" end -let parse_string = Parser.parse_string -let parse_file = Parser.parse_file diff --git a/src/xPath.mli b/src/xPath.mli index de89955..e80a88a 100644 --- a/src/xPath.mli +++ b/src/xPath.mli @@ -18,32 +18,24 @@ module Ast : sig - type path = Absolute of step list | Relative of step list + type path = single_path list + and single_path = Absolute of step list | Relative of step list + and step = axis * test * expr list + and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling + | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following - and step = axis * test * predicate - and axis = Self | Attribute | Child | Descendant | DescendantOrSelf - | FollowingSibling - | Parent | Ancestor | AncestorOrSelf |PrecedingSibling - | Preceding | Following - and test = Simple of QNameSet.t - and predicate = Or of predicate*predicate - | And of predicate*predicate - | Not of predicate - | Expr of expression - and expression = Path of path - | Function of string*expression list - | Int of int - | String of string - | True | False - type t = path - val print : Format.formatter -> path -> unit - val print_step : Format.formatter -> step -> unit - val print_axis : Format.formatter -> axis -> unit - val print_test : Format.formatter -> test -> unit - val print_predicate : Format.formatter -> predicate -> unit - val print_expression : Format.formatter -> expression -> unit -end + and test = Simple of QNameSet.t + + and binop = Eq | Neq | Lt | Gt | Lte | Gte | Or | And | Add | Sub | Mult | Div | Mod + and unop = Neg + and expr = + | Number of [ `Int of int | `Float of float ] + | String of string + | Fun_call of QName.t * expr list + | Path of path + | Binop of expr * binop * expr + | Unop of unop * expr -val parse_string : string -> Ast.path -val parse_file : in_channel -> Ast.path + type t = path +end -- 2.17.1