--- /dev/null
+(***********************************************************************)
+(* *)
+(* TAToo *)
+(* *)
+(* Kim Nguyen, LRI UMR8623 *)
+(* Université Paris-Sud & CNRS *)
+(* *)
+(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
+(* Recherche Scientifique. All rights reserved. This file is *)
+(* distributed under the terms of the GNU Lesser General Public *)
+(* License, with the special exception on linking described in file *)
+(* ../LICENSE. *)
+(* *)
+(***********************************************************************)
+
+#load "pa_extend.cmo";;
+module Ast =
+struct
+
+ type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list
+ and step = axis * test *predicate
+ and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling
+ | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following
+
+ and test = Simple of QNameSet.t
+
+
+ and predicate = Or of predicate*predicate
+ | And of predicate*predicate
+ | Not of predicate
+ | Expr of expression
+ and expression = Path of path
+ | Function of string*expression list
+ | Int of int
+ | String of string
+ | True | False
+ type t = path
+
+
+ let text = QNameSet.singleton QName.text
+ let node = QNameSet.any
+ let star =
+ QNameSet.complement (
+ QNameSet.from_list [ QName.text;
+ QName.document;
+ QName.cdata_section;
+ QName.comment])
+ let t_text = Simple text
+ let t_node = Simple node
+ let t_star = Simple star
+
+
+
+ let pp fmt = Format.fprintf fmt
+ let print_list printer fmt sep l =
+ match l with
+ [] -> ()
+ | [e] -> printer fmt e
+ | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es
+
+
+ let rec print fmt p =
+ let l = match p with
+ | Absolute l -> pp fmt "/"; l
+ | AbsoluteDoS l -> pp fmt "/";
+ print_step fmt (DescendantOrSelf,Simple QNameSet.any,Expr True);
+ pp fmt "/"; l
+ | Relative l -> l
+ in
+ Pretty.print_list ~sep:"/" print_step fmt l
+ and print_step fmt (axis, test, predicate) =
+ print_axis fmt axis;pp fmt "::";print_test fmt test;
+ match predicate with
+ Expr True -> ()
+ | _ -> pp fmt "["; print_predicate fmt predicate; pp fmt "]"
+ and print_axis fmt a = pp fmt "%s" (match a with
+ Self -> "self"
+ | Child -> "child"
+ | Descendant -> "descendant"
+ | DescendantOrSelf -> "descendant-or-self"
+ | FollowingSibling -> "following-sibling"
+ | Attribute -> "attribute"
+ | Ancestor -> "ancestor"
+ | AncestorOrSelf -> "ancestor-or-self"
+ | PrecedingSibling -> "preceding-sibling"
+ | Parent -> "parent"
+ | _ -> assert false
+ )
+ and print_test fmt ts =
+ try
+ pp fmt "%s" (List.assoc ts
+ [ t_text,"text()";
+ t_node,"node()";
+ t_star, "*" ] )
+ with
+ Not_found -> pp fmt "%s"
+ (match ts with
+ Simple t -> if QNameSet.is_finite t
+ then QName.to_string (QNameSet.choose t)
+ else "<INFINITE>"
+ )
+
+ and print_predicate fmt = function
+ | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q
+ | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q
+ | Not p -> pp fmt "not "; print_predicate fmt p
+ | Expr e -> print_expression fmt e
+
+ and print_expression fmt = function
+ | Path p -> print fmt p
+ | Function (f,l) ->
+ pp fmt "%s(" f;
+ Pretty.print_list ~sep:"," print_expression fmt l;
+ pp fmt ")"
+ | Int i -> pp fmt "%i" i
+ | String s -> pp fmt "\"%s\"" s
+ | t -> pp fmt "%b" (t== True)
+
+end
+module Parser =
+struct
+ open Ast
+ open Ulexer
+ let predopt = function None -> Expr True | Some p -> p
+
+ module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer)
+ let query = Gram.Entry.mk "query"
+
+ exception Error of Gram.Loc.t*string
+ let test_of_keyword t loc =
+ match t with
+ | "text()" -> text
+ | "node()" -> node
+ | "*" -> star
+ | "and" | "not" | "or" -> QNameSet.singleton (QName.of_string t)
+ | _ -> raise (Error(loc,"Invalid test name "^t ))
+
+ let axis_to_string a = let r = Format.str_formatter in
+ print_axis r a; Format.flush_str_formatter()
+
+
+
+
+EXTEND Gram
+
+GLOBAL: query;
+
+ query : [ [ p = path; `EOI -> p ]]
+;
+
+ path : [
+ [ "//" ; l = slist -> AbsoluteDoS (List.rev l) ]
+ | [ "/" ; l = slist -> Absolute (List.rev l) ]
+ | [ l = slist -> Relative (List.rev l) ]
+ ]
+;
+
+slist: [
+ [ l = slist ;"/"; s = step -> s @ l ]
+| [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, t_node ,Expr True)]@l]
+| [ s = step -> s ]
+];
+
+step : [
+ (* yurk, this is done to parse stuff like
+ a/b/descendant/a where descendant is actually a tag name :(
+ if OPT is None then this is a child::descendant if not, this is a real axis name
+ *)
+
+
+[ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred ->
+ let a,t,p =
+ match o with
+ | Some(t) -> (axis,t,p)
+ | None -> (Child,Simple (QNameSet.singleton (QName.of_string (axis_to_string axis))),p)
+ in match a with
+ | Following -> [ (DescendantOrSelf,t,p);
+ (FollowingSibling, t_star,Expr(True));
+ (Ancestor, t_star ,Expr(True)) ]
+
+ | Preceding -> [ (DescendantOrSelf,t,p);
+ (PrecedingSibling,t_star,Expr(True));
+ (Ancestor,t_star,Expr(True)) ]
+ | _ -> [ a,t,p ]
+
+]
+
+| [ "." ; p = top_pred -> [(Self, t_node,p)] ]
+| [ ".." ; p = top_pred -> [(Parent,t_star,p)] ]
+| [ test = test; p = top_pred -> [(Child,test, p)] ]
+| [ att = ATT ; p = top_pred ->
+ match att with
+ | "*" -> [(Attribute,t_star,p)]
+ | _ -> [(Attribute, Simple (QNameSet.singleton (QName.of_string att)) ,p )]]
+]
+;
+top_pred : [
+ [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ]
+]
+;
+axis : [
+ [ "self" -> Self | "child" -> Child | "descendant" -> Descendant
+ | "descendant-or-self" -> DescendantOrSelf
+ | "ancestor-or-self" -> AncestorOrSelf
+ | "following-sibling" -> FollowingSibling
+ | "attribute" -> Attribute
+ | "parent" -> Parent
+ | "ancestor" -> Ancestor
+ | "preceding-sibling" -> PrecedingSibling
+ | "preceding" -> Preceding
+ | "following" -> Following
+ ]
+
+
+];
+test : [
+ [ s = KWD -> Simple (test_of_keyword s _loc) ]
+| [ t = TAG -> Simple (QNameSet.singleton (QName.of_string t)) ]
+];
+
+
+predicate: [
+
+ [ p = predicate; "or"; q = predicate -> Or(p,q) ]
+| [ p = predicate; "and"; q = predicate -> And(p,q) ]
+| [ "not" ; p = predicate -> Not p ]
+| [ "("; p = predicate ;")" -> p ]
+| [ e = expression -> Expr e ]
+];
+
+expression: [
+ [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)]
+| [ `INT(i) -> Int (i) ]
+| [ s = STRING -> String s ]
+| [ p = path -> Path p ]
+| [ "("; e = expression ; ")" -> e ]
+]
+;
+END
+;;
+(*
+
+GLOBAL: query;
+
+ query : [ [ p = location_path; `EOI -> p ]]
+;
+
+
+ location_path : [
+ [ "/" ; l = OPT relative_location_path ->
+ let l = match l with None -> [] | Some l' -> l' in Absolute l ]
+ | [ l = relative_location_path -> Relative l ]
+ | [ l = abbrev_absolute_location_path -> l ]
+
+ ]
+;
+
+ relative_location_path : [
+ [ s = step -> [ s ] ]
+ | [ l = relative_location_path ; "/"; s = step -> l @ [ s ] ]
+ | [ l = abbrev_relative_location_path -> l ]
+ ]
+;
+
+
+ step : [
+ [ a = axis_specifier ; n = node_test ; p = OPT predicate ->
+ let p = match p with Some p' -> p' | None -> Expr(True) in
+ a, n, p
+ ]
+ | [ a = abbrev_step -> a ]
+ ]
+;
+ axis_specifier : [
+ [ a = axis_name ; "::" -> a ]
+ | [ a = abbrev_axis_specifier -> a ]
+ ];
+
+ axis_name : [
+ [ "self" -> Self | "child" -> Child | "descendant" -> Descendant
+ | "descendant-or-self" -> DescendantOrSelf
+ | "ancestor-or-self" -> AncestorOrSelf
+ | "following-sibling" -> FollowingSibling
+ | "attribute" -> Attribute
+ | "parent" -> Parent
+ | "ancestor" -> Ancestor
+ | "preceding-sibling" -> PrecedingSibling
+ | "preceding" -> Preceding
+ | "following" -> Following
+ ]
+ ]
+;
+ node_test : [
+ [ n = name_test -> n ]
+ | [ n = node_type ; "("; ")" -> n ]
+ (* | [ "processing-instruction" ; "(" ... ")" ] *)
+ ]
+;
+ name_test : [
+ [ "*" -> Simple(TagSet.star) ]
+ | [ t = axis_name -> Simple(TagSet.singleton (Tag.tag (axis_to_string t))) ]
+ | [ t = TAG -> Simple(TagSet.singleton (Tag.tag t)) ]
+ ]
+;
+ node_type : [
+ [ "text" -> Simple(TagSet.pcdata) ]
+ | [ "node" -> Simple(TagSet.node) ]
+ ]
+;
+ predicate : [
+ [ "["; e = expr ; "]" -> e ]
+ ]
+;
+ abbrev_absolute_location_path : [
+ [ "//"; l = relative_location_path -> AbsoluteDoS l ]
+ ];
+
+ abbrev_relative_location_path : [
+ [ l = relative_location_path; "//"; s = step ->
+ l @ [ (DescendantOrSelf,Simple(TagSet.node),Expr(True)); s ]
+ ]
+ ];
+
+ abbrev_step : [
+ [ "." -> (Self, Simple(TagSet.node), Expr(True)) ]
+ | [ ".." -> (Parent, Simple(TagSet.node), Expr(True)) ]
+ ];
+
+ abbrev_axis_specifier: [
+ [ a = OPT "@" -> match a with None -> Attribute | _ -> Child ]
+ ];
+
+ expr : [
+ [ o = or_expr -> o ]
+ ];
+
+ primary_expr : [
+ [ "("; e = expr ; ")" -> e ]
+ | [ s = STRING -> Expr (String s) ]
+ | [ `INT(i) -> Expr (Int (i)) ]
+ | [ f = TAG; "("; args = LIST0 expr SEP "," ; ")" ->
+ Expr(Function(f, List.map (function Expr e -> e | _ -> assert false) args))]
+ ]
+;
+
+ or_expr : [
+ [ o1 = or_expr ; "or" ; o2 = and_expr -> Or(o1, o2) ]
+ | [ a = and_expr -> a ]
+ ]
+ ;
+
+ and_expr : [
+ [ a1 = and_expr; "and"; a2 = unary_expr -> And(a1, a2) ]
+ | [ p = unary_expr -> p ]
+ ]
+;
+ unary_expr : [
+ [ l = location_path -> Expr(Path l) ]
+ | [ "not"; "("; e = expr ; ")" -> Not e ]
+ | [ p = primary_expr -> p ]
+
+ ];
+
+END
+;;
+
+*)
+
+ let parse = Gram.parse_string query (Ulexer.Loc.mk "<string>")
+end
+let parse = Parser.parse