1 (***********************************************************************)
5 (* Kim Nguyen, LRI UMR8623 *)
6 (* Université Paris-Sud & CNRS *)
8 (* Copyright 2010-2013 Université Paris-Sud and Centre National de la *)
9 (* Recherche Scientifique. All rights reserved. This file is *)
10 (* distributed under the terms of the GNU Lesser General Public *)
11 (* License, with the special exception on linking described in file *)
14 (***********************************************************************)
17 Time-stamp: <Last modified on 2013-03-13 14:14:15 CET by Kim Nguyen>
20 open Xpath_internal_parser
24 exception Error of int * int * string
26 let error i j s = raise (Error (i,j,s))
28 (***********************************************************)
29 (* Buffer for string literals *)
31 let string_buff = Buffer.create 1024
33 let store_lexeme lexbuf =
34 Buffer.add_string string_buff (Ulexing.utf8_lexeme lexbuf)
35 let store_ascii = Buffer.add_char string_buff
36 let store_code = Utf8.store string_buff
37 let clear_buff () = Buffer.clear string_buff
38 let get_stored_string () =
39 let s = Buffer.contents string_buff in
41 Buffer.clear string_buff;
44 (***********************************************************)
49 (L.lexeme_start lexbuf)
53 let return lexbuf tok = (tok, L.loc lexbuf)
54 let return_loc i j tok = (tok, (i,j))
56 let regexp ncname_char =
57 xml_letter | xml_digit | [ '-' '_' '.'] | xml_combining_char | xml_extender
59 let hexa_digit = function
60 | '0'..'9' as c -> (Char.code c) - (Char.code '0')
61 | 'a'..'f' as c -> (Char.code c) - (Char.code 'a') + 10
62 | 'A'..'F' as c -> (Char.code c) - (Char.code 'A') + 10
65 let regexp ncname = ( xml_letter ncname_char* ) | ('_' ncname_char+)
66 let regexp digit = ['0'-'9']
67 let regexp float = '-'? digit+ ('.' digit+ (['e''E'] digit+)?)?
69 let parse_char lexbuf base i =
70 let s = L.latin1_sub_lexeme lexbuf i (L.lexeme_length lexbuf - i - 1) in
72 for i = 0 to String.length s - 1 do
73 let c = hexa_digit s.[i] in
74 if (c >= base) || (c < 0) then
75 error (L.lexeme_start lexbuf) (L.lexeme_end lexbuf) "invalid digit";
80 let keyword_or_tag s =
83 "self", AXIS Ast.Self;
84 "descendant", AXIS (Ast.Descendant false);
85 "child", AXIS Ast.Child;
86 "descendant-or-self", AXIS (Ast.Descendant true);
87 "attribute", AXIS Ast.Attribute;
88 "following-sibling", AXIS Ast.FollowingSibling;
89 "preceding-sibling", AXIS Ast.PrecedingSibling;
90 "parent", AXIS Ast.Parent;
91 "ancestor", AXIS (Ast.Ancestor false);
92 "ancestor-or-self", AXIS (Ast.Ancestor true);
93 "preceding", AXIS Ast.Preceding;
94 "following", AXIS Ast.Following;
104 let rec token = lexer
105 | [' ' '\t' '\n'] -> token lexbuf
128 | "comment()" -> COMMENT
129 | '@' ncname -> ATTNAME (L.utf8_lexeme lexbuf)
130 | "processing-instruction()" -> PI ""
131 | "processing-instruction('"ncname"')"
132 | "processing-instruction(\""ncname"\")"->
133 let s = L.utf8_lexeme lexbuf in
134 PI (String.sub s 24 (String.length s - 26))
135 | ncname -> keyword_or_tag (L.utf8_lexeme lexbuf)
137 let s = L.utf8_lexeme lexbuf in
139 INT (int_of_string s)
141 _ -> FLOAT (float_of_string s))
143 let double_quote = L.latin1_lexeme_char lexbuf 0 = '"' in
144 string (L.lexeme_start lexbuf) double_quote lexbuf;
145 let s = get_stored_string () in
149 | _ -> illegal lexbuf
151 and string start double = lexer
153 let d = L.latin1_lexeme_char lexbuf 0 = '"' in
154 if d != double then (store_lexeme lexbuf; string start double lexbuf)
155 | '\\' ['\\' '"' '\''] ->
156 store_ascii (L.latin1_lexeme_char lexbuf 1);
157 string start double lexbuf
159 store_ascii '\n'; string start double lexbuf
161 store_ascii '\t'; string start double lexbuf
163 store_ascii '\r'; string start double lexbuf
164 | '\\' ['0'-'9']+ ';' ->
165 store_code (parse_char lexbuf 10 1);
166 string start double lexbuf
167 | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F']+ ';' ->
168 store_code (parse_char lexbuf 16 2);
169 string start double lexbuf
173 error start (start+1) "Unterminated string"
176 string start double lexbuf