(***********************************************************************) (* *) (* TAToo *) (* *) (* Kim Nguyen, LRI UMR8623 *) (* Université Paris-Sud & CNRS *) (* *) (* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) (* Recherche Scientifique. All rights reserved. This file is *) (* distributed under the terms of the GNU Lesser General Public *) (* License, with the special exception on linking described in file *) (* ../LICENSE. *) (* *) (***********************************************************************) #load "pa_extend.cmo";; module Ast = struct type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list and step = axis * test *predicate and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following and test = Simple of QNameSet.t and predicate = Or of predicate*predicate | And of predicate*predicate | Not of predicate | Expr of expression and expression = Path of path | Function of string*expression list | Int of int | String of string | True | False type t = path let text = QNameSet.singleton QName.text let node = QNameSet.any let star = QNameSet.complement ( QNameSet.from_list [ QName.text; QName.document; QName.cdata_section; QName.comment]) let t_text = Simple text let t_node = Simple node let t_star = Simple star let pp fmt = Format.fprintf fmt let print_list printer fmt sep l = match l with [] -> () | [e] -> printer fmt e | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es let rec print fmt p = let l = match p with | Absolute l -> pp fmt "/"; l | AbsoluteDoS l -> pp fmt "/"; print_step fmt (DescendantOrSelf,Simple QNameSet.any,Expr True); pp fmt "/"; l | Relative l -> l in Pretty.print_list ~sep:"/" print_step fmt l and print_step fmt (axis, test, predicate) = print_axis fmt axis;pp fmt "::";print_test fmt test; match predicate with Expr True -> () | _ -> pp fmt "["; print_predicate fmt predicate; pp fmt "]" and print_axis fmt a = pp fmt "%s" (match a with Self -> "self" | Child -> "child" | Descendant -> "descendant" | DescendantOrSelf -> "descendant-or-self" | FollowingSibling -> "following-sibling" | Attribute -> "attribute" | Ancestor -> "ancestor" | AncestorOrSelf -> "ancestor-or-self" | PrecedingSibling -> "preceding-sibling" | Parent -> "parent" | _ -> assert false ) and print_test fmt ts = try pp fmt "%s" (List.assoc ts [ t_text,"text()"; t_node,"node()"; t_star, "*" ] ) with Not_found -> pp fmt "%s" (match ts with Simple t -> if QNameSet.is_finite t then QName.to_string (QNameSet.choose t) else "" ) and print_predicate fmt = function | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q | Not p -> pp fmt "not "; print_predicate fmt p | Expr e -> print_expression fmt e and print_expression fmt = function | Path p -> print fmt p | Function (f,l) -> pp fmt "%s(" f; Pretty.print_list ~sep:"," print_expression fmt l; pp fmt ")" | Int i -> pp fmt "%i" i | String s -> pp fmt "\"%s\"" s | t -> pp fmt "%b" (t== True) end module Parser = struct open Ast open Ulexer let predopt = function None -> Expr True | Some p -> p module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer) let query = Gram.Entry.mk "query" exception Error of Gram.Loc.t*string let test_of_keyword t loc = match t with | "text()" -> text | "node()" -> node | "*" -> star | "and" | "not" | "or" -> QNameSet.singleton (QName.of_string t) | _ -> raise (Error(loc,"Invalid test name "^t )) let axis_to_string a = let r = Format.str_formatter in print_axis r a; Format.flush_str_formatter() EXTEND Gram GLOBAL: query; query : [ [ p = path; `EOI -> p ]] ; path : [ [ "//" ; l = slist -> AbsoluteDoS (List.rev l) ] | [ "/" ; l = slist -> Absolute (List.rev l) ] | [ l = slist -> Relative (List.rev l) ] ] ; slist: [ [ l = slist ;"/"; s = step -> s @ l ] | [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, t_node ,Expr True)]@l] | [ s = step -> s ] ]; step : [ (* yurk, this is done to parse stuff like a/b/descendant/a where descendant is actually a tag name :( if OPT is None then this is a child::descendant if not, this is a real axis name *) [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred -> let a,t,p = match o with | Some(t) -> (axis,t,p) | None -> (Child,Simple (QNameSet.singleton (QName.of_string (axis_to_string axis))),p) in match a with | Following -> [ (DescendantOrSelf,t,p); (FollowingSibling, t_star,Expr(True)); (Ancestor, t_star ,Expr(True)) ] | Preceding -> [ (DescendantOrSelf,t,p); (PrecedingSibling,t_star,Expr(True)); (Ancestor,t_star,Expr(True)) ] | _ -> [ a,t,p ] ] | [ "." ; p = top_pred -> [(Self, t_node,p)] ] | [ ".." ; p = top_pred -> [(Parent,t_star,p)] ] | [ test = test; p = top_pred -> [(Child,test, p)] ] | [ att = ATT ; p = top_pred -> match att with | "*" -> [(Attribute,t_star,p)] | _ -> [(Attribute, Simple (QNameSet.singleton (QName.of_string att)) ,p )]] ] ; top_pred : [ [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ] ] ; axis : [ [ "self" -> Self | "child" -> Child | "descendant" -> Descendant | "descendant-or-self" -> DescendantOrSelf | "ancestor-or-self" -> AncestorOrSelf | "following-sibling" -> FollowingSibling | "attribute" -> Attribute | "parent" -> Parent | "ancestor" -> Ancestor | "preceding-sibling" -> PrecedingSibling | "preceding" -> Preceding | "following" -> Following ] ]; test : [ [ s = KWD -> Simple (test_of_keyword s _loc) ] | [ t = TAG -> Simple (QNameSet.singleton (QName.of_string t)) ] ]; predicate: [ [ p = predicate; "or"; q = predicate -> Or(p,q) ] | [ p = predicate; "and"; q = predicate -> And(p,q) ] | [ "not" ; p = predicate -> Not p ] | [ "("; p = predicate ;")" -> p ] | [ e = expression -> Expr e ] ]; expression: [ [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)] | [ `INT(i) -> Int (i) ] | [ s = STRING -> String s ] | [ p = path -> Path p ] | [ "("; e = expression ; ")" -> e ] ] ; END ;; (* GLOBAL: query; query : [ [ p = location_path; `EOI -> p ]] ; location_path : [ [ "/" ; l = OPT relative_location_path -> let l = match l with None -> [] | Some l' -> l' in Absolute l ] | [ l = relative_location_path -> Relative l ] | [ l = abbrev_absolute_location_path -> l ] ] ; relative_location_path : [ [ s = step -> [ s ] ] | [ l = relative_location_path ; "/"; s = step -> l @ [ s ] ] | [ l = abbrev_relative_location_path -> l ] ] ; step : [ [ a = axis_specifier ; n = node_test ; p = OPT predicate -> let p = match p with Some p' -> p' | None -> Expr(True) in a, n, p ] | [ a = abbrev_step -> a ] ] ; axis_specifier : [ [ a = axis_name ; "::" -> a ] | [ a = abbrev_axis_specifier -> a ] ]; axis_name : [ [ "self" -> Self | "child" -> Child | "descendant" -> Descendant | "descendant-or-self" -> DescendantOrSelf | "ancestor-or-self" -> AncestorOrSelf | "following-sibling" -> FollowingSibling | "attribute" -> Attribute | "parent" -> Parent | "ancestor" -> Ancestor | "preceding-sibling" -> PrecedingSibling | "preceding" -> Preceding | "following" -> Following ] ] ; node_test : [ [ n = name_test -> n ] | [ n = node_type ; "("; ")" -> n ] (* | [ "processing-instruction" ; "(" ... ")" ] *) ] ; name_test : [ [ "*" -> Simple(TagSet.star) ] | [ t = axis_name -> Simple(TagSet.singleton (Tag.tag (axis_to_string t))) ] | [ t = TAG -> Simple(TagSet.singleton (Tag.tag t)) ] ] ; node_type : [ [ "text" -> Simple(TagSet.pcdata) ] | [ "node" -> Simple(TagSet.node) ] ] ; predicate : [ [ "["; e = expr ; "]" -> e ] ] ; abbrev_absolute_location_path : [ [ "//"; l = relative_location_path -> AbsoluteDoS l ] ]; abbrev_relative_location_path : [ [ l = relative_location_path; "//"; s = step -> l @ [ (DescendantOrSelf,Simple(TagSet.node),Expr(True)); s ] ] ]; abbrev_step : [ [ "." -> (Self, Simple(TagSet.node), Expr(True)) ] | [ ".." -> (Parent, Simple(TagSet.node), Expr(True)) ] ]; abbrev_axis_specifier: [ [ a = OPT "@" -> match a with None -> Attribute | _ -> Child ] ]; expr : [ [ o = or_expr -> o ] ]; primary_expr : [ [ "("; e = expr ; ")" -> e ] | [ s = STRING -> Expr (String s) ] | [ `INT(i) -> Expr (Int (i)) ] | [ f = TAG; "("; args = LIST0 expr SEP "," ; ")" -> Expr(Function(f, List.map (function Expr e -> e | _ -> assert false) args))] ] ; or_expr : [ [ o1 = or_expr ; "or" ; o2 = and_expr -> Or(o1, o2) ] | [ a = and_expr -> a ] ] ; and_expr : [ [ a1 = and_expr; "and"; a2 = unary_expr -> And(a1, a2) ] | [ p = unary_expr -> p ] ] ; unary_expr : [ [ l = location_path -> Expr(Path l) ] | [ "not"; "("; e = expr ; ")" -> Not e ] | [ p = primary_expr -> p ] ]; END ;; *) let parse = Gram.parse_string query (Ulexer.Loc.mk "") end let parse = Parser.parse