--- /dev/null
+%{
+
+ open XPath.Ast
+ let f () = ()
+%}
+
+%token <string> TAG
+%token <string> STRING
+%token <int> INT
+%token <float> FLOAT
+%token <XPath.Ast.axis> AXIS
+%token RB LB LP RP
+%token SLASH SLASHSLASH COLONCOLON STAR PIPE
+%token EQ NEQ LT GT LTE GTE OR AND ADD SUB DIV MOD
+%token NODE TEXT
+%token COMMA
+%token EOF
+
+%left OR
+%left AND
+%left EQ NEQ
+%left LT GT LTE GTE
+%left ADD SUB
+%left MOD DIV STAR
+%nonassoc uminus
+
+%start xpath
+%type <XPath.Ast.path> xpath
+
+
+%%
+xpath:
+path EOF { $1 }
+;
+
+path:
+ path_rev { List.rev $1 }
+;
+
+path_rev:
+ simple_path { [ $1 ] }
+| path_rev PIPE simple_path { $3 :: $1 }
+;
+
+
+simple_path:
+ absolute_path { Absolute (List.rev $1) }
+| relative_path { Relative (List.rev $1) }
+;
+
+absolute_path:
+ SLASH relative_path { $2 }
+| SLASHSLASH relative_path { (DescendantOrSelf, node, []) :: $2 }
+;
+
+relative_path:
+ step { [ $1 ] }
+| relative_path SLASH step { $3 :: $1 }
+| relative_path SLASHSLASH step { $3
+ :: (DescendantOrSelf, node, [])
+ :: $1 }
+;
+
+step:
+ axis_test pred_list { let a, b = $1 in a, b, $2 }
+;
+
+axis_test:
+ AXIS COLONCOLON test { $1, $3 }
+| test { Child, $1 }
+| AXIS {
+ let _ = Format.flush_str_formatter () in
+ let () = Format.fprintf Format.str_formatter "%a" XPath.Ast.print_axis $1 in
+ let a = Format.flush_str_formatter () in
+ Child, QNameSet.singleton (QName.of_string a)
+}
+;
+
+test:
+ NODE { node }
+| TEXT { text }
+| STAR { star }
+| TAG { QNameSet.singleton(QName.of_string $1) }
+;
+
+pred_list:
+ pred_list_rev { List.rev $1 }
+;
+
+pred_list_rev:
+ { [] }
+| pred_list LB expr RB { $3 :: $1 }
+;
+
+expr:
+ INT { Number(`Int($1)) }
+| FLOAT { Number(`Float($1)) }
+| STRING { String $1 }
+| SUB expr %prec uminus { Unop(Neg, $2) }
+| expr AND expr { Binop($1, And, $3) }
+| expr OR expr { Binop($1, Or, $3) }
+| expr ADD expr { Binop($1, Add, $3) }
+| expr SUB expr { Binop($1, Sub, $3) }
+| expr STAR expr { Binop($1, Mult, $3) }
+| expr DIV expr { Binop($1, Div, $3) }
+| expr MOD expr { Binop($1, Mod, $3) }
+| expr EQ expr { Binop($1, Eq, $3) }
+| expr NEQ expr { Binop($1, Neq, $3) }
+| expr LT expr { Binop($1, Lt, $3) }
+| expr LTE expr { Binop($1, Lte, $3) }
+| expr GT expr { Binop($1, Gt, $3) }
+| expr GTE expr { Binop($1, Gte, $3) }
+| TAG LP arg_list RP { Fun_call(QName.of_string $1, $3) }
+| LP expr RP { $2 }
+| path { Path $1 }
+;
+
+arg_list:
+ { [] }
+| arg_list1 { List.rev $1 }
+;
+
+arg_list1:
+ expr { [ $1 ] }
+| arg_list1 COMMA expr { $3 :: $1 }
+;
+
(* Copyright NICTA 2008 *)
(* Distributed under the terms of the LGPL (see LICENCE) *)
(******************************************************************************)
-open Camlp4.PreCast
-
-module Loc = struct
- type t = int * int
-
- let mk _ = (0,0)
- let ghost = (-1,-1)
-
- let of_lexing_position _ = assert false
- let to_ocaml_location _ = assert false
- let of_ocaml_location _ = assert false
- let of_lexbuf _ = assert false
- let of_tuple _ = assert false
- let to_tuple _ = assert false
-
- let merge (x1, x2) (y1, y2) = (min x1 y1, max x2 y2)
- let join (x1, _) = (x1, x1)
- let move _ _ _ = assert false
- let shift _ _ = assert false
- let move_line _ _ = assert false
- let file_name _ = assert false
- let start_line _ = assert false
- let stop_line _ = assert false
- let start_bol _ = assert false
- let stop_bol _ = assert false
- let start_off = fst
- let stop_off = snd
- let start_pos _ = assert false
- let stop_pos _ = assert false
- let is_ghost _ = assert false
- let ghostify _ = assert false
- let set_file_name _ = assert false
- let strictly_before _ = assert false
- let make_absolute _ = assert false
- let print _ = assert false
- let dump _ = assert false
- let to_string _ = assert false
- exception Exc_located of t * exn
- let raise loc exn =
- match exn with
- | Exc_located _ -> raise exn
- | _ -> raise (Exc_located (loc, exn))
- let name = ref "_loc"
-end
-
-type token =
- | TAG of string
- | STRING of string
- | INT of int
- | KWD of string
- | ATT of string
- | EOI
-
-module Token = struct
- open Format
- module Loc = Loc
- type t = token
- type token = t
-
- let sf = Printf.sprintf
-
- let to_string =
- function
- | TAG s -> sf "TAG <%s>" s
- | STRING s -> sf "STRING \"%s\"" s
- | KWD s -> sf "KWD %s" s
- | INT i -> sf "INT %i" i
- | ATT s -> sf "ATT %s" s
- | EOI -> sf "EOI"
-
- let print ppf x = pp_print_string ppf (to_string x)
-
- let match_keyword kwd =
- function
- | KWD kwd' when kwd = kwd' -> true
- | _ -> false
-
- let extract_string =
- function
- | KWD s | STRING s | TAG s | ATT s -> s
- | INT i -> string_of_int i
- | tok ->
- invalid_arg ("Cannot extract a string from this token: "^
- to_string tok)
-
- module Error = struct
- type t = string
- exception E of string
- let print = pp_print_string
- let to_string x = x
- end
-
- module Filter = struct
- type token_filter = (t, Loc.t) Camlp4.Sig.stream_filter
-
- type t =
- { is_kwd : string -> bool;
- mutable filter : token_filter }
-
- let mk is_kwd =
- { is_kwd = is_kwd;
- filter = (fun s -> s) }
-
- let filter x =
- let f tok loc =
- let tok' = tok in
- (tok', loc)
- in
- let rec filter =
- parser
- | [< '(tok, loc); s >] -> [< ' f tok loc; filter s >]
- | [< >] -> [< >]
- in
- fun strm -> x.filter (filter strm)
-
- let define_filter x f = x.filter <- f x.filter
-
- let keyword_added _ _ _ = ()
- let keyword_removed _ _ = ()
- end
-
-end
-module Error = Camlp4.Struct.EmptyError
+open Parser
module L = Ulexing
(***********************************************************)
(* Buffer for string literals *)
-
+
let string_buff = Buffer.create 1024
-let store_lexeme lexbuf =
+let store_lexeme lexbuf =
Buffer.add_string string_buff (Ulexing.utf8_lexeme lexbuf)
let store_ascii = Buffer.add_char string_buff
let store_code = Utf8.store string_buff
let illegal lexbuf =
error
(L.lexeme_start lexbuf)
- (L.lexeme_end lexbuf)
+ (L.lexeme_end lexbuf)
"Illegal character"
let return lexbuf tok = (tok, L.loc lexbuf)
let return_loc i j tok = (tok, (i,j))
-let regexp ncname_char =
+let regexp ncname_char =
xml_letter | xml_digit | [ '-' '_' ] | xml_combining_char | xml_extender | "\\."
let hexa_digit = function
| _ -> -1
let regexp ncname = ( xml_letter ncname_char* ) | ('_' ncname_char+)
+let regexp digit = ['0'-'9']
+let regexp float = '-'? digit+ ('.' digit+ (['e''E'] digit+)?)?
let parse_char lexbuf base i =
- let s = L.latin1_sub_lexeme lexbuf i (L.lexeme_length lexbuf - i - 1) in
+ let s = L.latin1_sub_lexeme lexbuf i (L.lexeme_length lexbuf - i - 1) in
let r = ref 0 in
for i = 0 to String.length s - 1 do
let c = hexa_digit s.[i] in
- if (c >= base) || (c < 0) then
+ if (c >= base) || (c < 0) then
error (L.lexeme_start lexbuf) (L.lexeme_end lexbuf) "invalid digit";
r := !r * base + c;
done;
!r
+let keyword_or_tag s =
+ try
+ List.assoc s [
+ "self", AXIS XPath.Ast.Self;
+ "descendant", AXIS XPath.Ast.Descendant;
+ "child", AXIS XPath.Ast.Child;
+ "descendant-or-self", AXIS XPath.Ast.DescendantOrSelf;
+ "attribute", AXIS XPath.Ast.Attribute;
+ "following-sibling", AXIS XPath.Ast.FollowingSibling;
+ "preceding-sibling", AXIS XPath.Ast.PrecedingSibling;
+ "parent", AXIS XPath.Ast.Parent;
+ "ancestor", AXIS XPath.Ast.Ancestor;
+ "ancestor-or-self", AXIS XPath.Ast.AncestorOrSelf;
+ "preceding", AXIS XPath.Ast.Preceding;
+ "following", AXIS XPath.Ast.Following;
+ "and", AND;
+ "or" , OR;
+ "div", DIV;
+ "mod", MOD;
+ ]
+ with
+ _ -> TAG s
+
+
let rec token = lexer
| [' ' '\t' '\n'] -> token lexbuf
- | "and" | "not" | "or" | "text()" | "node()"
- | "self" | "descendant" | "child" | "descendant-or-self"
- | "attribute" | "following-sibling" | "preceding-sibling"
- | "parent" | "ancestor" | "ancestor-or-self" | "preceding" | "following"
- | "(" |")" | "," | "::" | "/" | "//" | "[" | "]" | "*" | "." | ".." | "@"
- -> return lexbuf (KWD (L.utf8_lexeme lexbuf))
- | ncname -> return lexbuf (TAG(L.utf8_lexeme lexbuf))
- | '-'? ['0'-'9']+ -> let i = INT (int_of_string(L.utf8_lexeme lexbuf)) in return lexbuf i
+ | "*" -> STAR
+ | "/" -> SLASH
+ | "//" -> SLASHSLASH
+ | "::" -> COLONCOLON
+ | "(" -> LP
+ | ")" -> RP
+ | "[" -> LB
+ | "]" -> RB
+ | "," -> COMMA
+ | "|" -> PIPE
+ | "+" -> ADD
+ | "-" -> SUB
+ | "<" -> LT
+ | "<=" -> LTE
+ | ">" -> GT
+ | ">=" -> GTE
+ | "=" -> EQ
+ | "!=" -> NEQ
+ | "node()" -> NODE
+ | "text()" -> TEXT
+ | ncname -> keyword_or_tag (L.utf8_lexeme lexbuf)
+ | float ->
+ let s = L.utf8_lexeme lexbuf in
+ (try
+ INT (int_of_string s)
+ with
+ _ -> FLOAT (float_of_string s))
| '"' | "'" ->
- let start = L.lexeme_start lexbuf in
let double_quote = L.latin1_lexeme_char lexbuf 0 = '"' in
string (L.lexeme_start lexbuf) double_quote lexbuf;
let s = get_stored_string () in
- return_loc start (L.lexeme_end lexbuf) (STRING s)
+ STRING s
- | eof -> return lexbuf EOI
+ | eof -> EOF
| _ -> illegal lexbuf
and string start double = lexer
| '\\' ['\\' '"' '\''] ->
store_ascii (L.latin1_lexeme_char lexbuf 1);
string start double lexbuf
- | "\\n" ->
+ | "\\n" ->
store_ascii '\n'; string start double lexbuf
- | "\\t" ->
+ | "\\t" ->
store_ascii '\t'; string start double lexbuf
- | "\\r" ->
+ | "\\r" ->
store_ascii '\r'; string start double lexbuf
| '\\' ['0'-'9']+ ';' ->
store_code (parse_char lexbuf 10 1);
string start double lexbuf
-
-(***********************************************************)
-
-let enc = ref L.Latin1
-let lexbuf = ref None
-let last_tok = ref (KWD "DUMMY")
-
-let raise_clean e =
- clear_buff ();
- raise e
-
-let mk () _loc cs =
- let lb = L.from_var_enc_stream enc cs in
- lexbuf := Some lb;
- let next _ =
- let tok, loc =
- try token lb
- with
- | Ulexing.Error ->
- raise_clean (Error (Ulexing.lexeme_end lb, Ulexing.lexeme_end lb,
- "Unexpected character"))
- | Ulexing.InvalidCodepoint i ->
- raise_clean (Error (Ulexing.lexeme_end lb, Ulexing.lexeme_end lb,
- "Code point invalid for the current encoding"))
- | e -> raise_clean e
- in
- last_tok := tok;
- Some (tok, loc)
- in
- Stream.from next