Cherry pick use of menhir features from branch feature/menhir.
[tatoo.git] / src / xpath / xpath_internal_parser.mly
1 %{
2 (***********************************************************************)
3 (*                                                                     *)
4 (*                               TAToo                                 *)
5 (*                                                                     *)
6 (*                     Kim Nguyen, LRI UMR8623                         *)
7 (*                   Université Paris-Sud & CNRS                       *)
8 (*                                                                     *)
9 (*  Copyright 2010-2012 Université Paris-Sud and Centre National de la *)
10 (*  Recherche Scientifique. All rights reserved.  This file is         *)
11 (*  distributed under the terms of the GNU Lesser General Public       *)
12 (*  License, with the special exception on linking described in file   *)
13 (*  ../LICENSE.                                                        *)
14 (*                                                                     *)
15 (***********************************************************************)
16
17   open Ast
18   open Tree
19 %}
20
21 %token <string> TAG
22 %token <string> PI
23 %token <string> ATTNAME
24 %token <string> STRING
25 %token <int>  INT
26 %token <float> FLOAT
27 %token <Ast.axis> AXIS
28 %token RB LB LP RP
29 %token SLASH SLASHSLASH COLONCOLON STAR PIPE DOT DOTDOT
30 %token EQ NEQ LT GT LTE GTE OR AND ADD SUB DIV MOD
31 %token NODE TEXT COMMENT
32 %token COMMA
33 %token EOF
34
35 %left OR
36 %left AND
37 %left EQ NEQ
38 %left LT GT LTE GTE
39 %left ADD SUB
40 %left MOD DIV STAR
41 %nonassoc uminus
42
43 %start xpath_query
44 %type <Ast.path> xpath_query
45
46
47 %%
48 xpath_query:
49 path EOF          { $1 }
50 ;
51
52 path:
53   separated_nonempty_list(PIPE, simple_path)  { $1 }
54 ;
55
56 simple_path:
57    absolute_path  { Absolute  (List.rev $1) }
58 |  relative_path  { Relative  (List.rev $1) }
59 ;
60
61 absolute_path:
62   SLASH relative_path { $2 }
63 | SLASHSLASH relative_path { $2 @
64                                [(Descendant true,
65                                  (node, NodeKind.Node),
66                                  [])] }
67 ;
68
69 /*
70   step is always a small list, of size 1-3 so @ is
71   cheap
72 */
73
74 relative_path:
75   step { $1 }
76 | relative_path SLASH step { $3 @ $1 }
77 | relative_path SLASHSLASH step { $3 @
78                                     ((Descendant true,
79                                       (node, NodeKind.Node),
80                                       [])
81                                      :: $1) }
82 ;
83
84 step:
85   DOT                    { [ (Self, (node, NodeKind.Node), []) ] }
86 | DOTDOT                 { [ (Parent, (node, NodeKind.Node), []) ] }
87 | axis_test pred_list    {
88     match $1 with
89       (a,b) :: r -> (a,b,$2) :: (List.map (fun (a,b) -> (a,b,[])) r)
90     | [] -> assert false
91   }
92 ;
93
94 axis_test:
95   AXIS COLONCOLON test  { let a, (t,k) = $1, $3 in
96                           match a with
97                             Attribute when QNameSet.is_finite t ->
98                               [ a, ((QNameSet.fold
99                                        (fun t a ->
100                                          QNameSet.add
101                                            (QName.attribute t) a)
102                                        t QNameSet.empty), k) ]
103                           | Preceding|Following ->
104                               [ (Descendant true, (t,k));
105                                 if a == Preceding then
106                                   (PrecedingSibling, (node, NodeKind.Node))
107                                 else
108                                   (FollowingSibling, (node, NodeKind.Node));
109                                 (Ancestor true, (node, NodeKind.Node)) ]
110
111                           | _ -> [ a, (t,k) ]
112                         }
113 | test                  { [ Child, $1 ] }
114 | AXIS            {
115   let _ = Format.flush_str_formatter () in
116   let () = Format.fprintf Format.str_formatter "%a" Ast.print_axis $1 in
117   let a = Format.flush_str_formatter () in
118   [Child, (QNameSet.singleton (QName.of_string a),NodeKind.Element)]
119 }
120 | ATTNAME             {  [(Attribute,
121                            (QNameSet.singleton (QName.of_string $1),
122                             NodeKind.Attribute))] }
123 ;
124
125 test:
126   NODE                { node, NodeKind.Node }
127 | TEXT                { text, NodeKind.Text }
128 | STAR                { node, NodeKind.Element }
129 | COMMENT             { QNameSet.singleton(QName.comment),
130                         NodeKind.Comment
131                       }
132 | PI                  { (if $1 = "" then star
133                          else QNameSet.singleton(
134                            QName.processing_instruction (
135                              QName.of_string $1)
136                          )), NodeKind.ProcessingInstruction
137                       }
138 | TAG                 { QNameSet.singleton(QName.of_string $1),
139                         NodeKind.Element
140                       }
141 ;
142
143 pred_list:
144   pred_list_rev             { List.rev $1 }
145 ;
146
147 pred_list_rev:
148              { [] }
149 | pred_list LB expr RB   { $3 :: $1 }
150 ;
151
152 expr:
153   INT                       { Number(`Int($1)) }
154 | FLOAT                     { Number(`Float($1)) }
155 | STRING                    { String $1 }
156 | SUB expr     %prec uminus { Unop(Neg, $2) }
157 | e1 = expr; op = binop; e2 = expr             { Binop(e1, op, e2) }
158 | TAG LP arg_list RP        { Fun_call(QName.of_string $1, $3) }
159 | LP expr RP                { $2 }
160 | path                      { Path $1 }
161 ;
162
163 %inline binop:
164 |  AND              { And }
165 |  OR               { Or }
166 |  ADD              { Add }
167 |  SUB              { Sub }
168 |  STAR             { Mult }
169 |  DIV              { Div }
170 |  MOD              { Mod }
171 |  EQ               { Eq }
172 |  NEQ              { Neq }
173 |  LT               { Lt }
174 |  LTE              { Lte }
175 |  GT               { Gt }
176 |  GTE              { Gte }
177 ;
178 arg_list:
179                             { [] }
180 | arg_list1                 { List.rev $1 }
181 ;
182
183 arg_list1:
184   expr                     { [ $1 ] }
185 | arg_list1 COMMA expr     { $3 :: $1 }
186 ;