1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
11 exception Error of int * int * string
13 let error i j s = raise (Error (i,j,s))
15 (***********************************************************)
16 (* Buffer for string literals *)
18 let string_buff = Buffer.create 1024
20 let store_lexeme lexbuf =
21 Buffer.add_string string_buff (Ulexing.utf8_lexeme lexbuf)
22 let store_ascii = Buffer.add_char string_buff
23 let store_code = Utf8.store string_buff
24 let clear_buff () = Buffer.clear string_buff
25 let get_stored_string () =
26 let s = Buffer.contents string_buff in
28 Buffer.clear string_buff;
31 (***********************************************************)
36 (L.lexeme_start lexbuf)
40 let return lexbuf tok = (tok, L.loc lexbuf)
41 let return_loc i j tok = (tok, (i,j))
43 let regexp ncname_char =
44 xml_letter | xml_digit | [ '-' '_' ] | xml_combining_char | xml_extender | "\\."
46 let hexa_digit = function
47 | '0'..'9' as c -> (Char.code c) - (Char.code '0')
48 | 'a'..'f' as c -> (Char.code c) - (Char.code 'a') + 10
49 | 'A'..'F' as c -> (Char.code c) - (Char.code 'A') + 10
52 let regexp ncname = ( xml_letter ncname_char* ) | ('_' ncname_char+)
53 let regexp digit = ['0'-'9']
54 let regexp float = '-'? digit+ ('.' digit+ (['e''E'] digit+)?)?
56 let parse_char lexbuf base i =
57 let s = L.latin1_sub_lexeme lexbuf i (L.lexeme_length lexbuf - i - 1) in
59 for i = 0 to String.length s - 1 do
60 let c = hexa_digit s.[i] in
61 if (c >= base) || (c < 0) then
62 error (L.lexeme_start lexbuf) (L.lexeme_end lexbuf) "invalid digit";
67 let keyword_or_tag s =
70 "self", AXIS XPath.Ast.Self;
71 "descendant", AXIS XPath.Ast.Descendant;
72 "child", AXIS XPath.Ast.Child;
73 "descendant-or-self", AXIS XPath.Ast.DescendantOrSelf;
74 "attribute", AXIS XPath.Ast.Attribute;
75 "following-sibling", AXIS XPath.Ast.FollowingSibling;
76 "preceding-sibling", AXIS XPath.Ast.PrecedingSibling;
77 "parent", AXIS XPath.Ast.Parent;
78 "ancestor", AXIS XPath.Ast.Ancestor;
79 "ancestor-or-self", AXIS XPath.Ast.AncestorOrSelf;
80 "preceding", AXIS XPath.Ast.Preceding;
81 "following", AXIS XPath.Ast.Following;
92 | [' ' '\t' '\n'] -> token lexbuf
113 | ncname -> keyword_or_tag (L.utf8_lexeme lexbuf)
115 let s = L.utf8_lexeme lexbuf in
117 INT (int_of_string s)
119 _ -> FLOAT (float_of_string s))
121 let double_quote = L.latin1_lexeme_char lexbuf 0 = '"' in
122 string (L.lexeme_start lexbuf) double_quote lexbuf;
123 let s = get_stored_string () in
127 | _ -> illegal lexbuf
129 and string start double = lexer
131 let d = L.latin1_lexeme_char lexbuf 0 = '"' in
132 if d != double then (store_lexeme lexbuf; string start double lexbuf)
133 | '\\' ['\\' '"' '\''] ->
134 store_ascii (L.latin1_lexeme_char lexbuf 1);
135 string start double lexbuf
137 store_ascii '\n'; string start double lexbuf
139 store_ascii '\t'; string start double lexbuf
141 store_ascii '\r'; string start double lexbuf
142 | '\\' ['0'-'9']+ ';' ->
143 store_code (parse_char lexbuf 10 1);
144 string start double lexbuf
145 | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F']+ ';' ->
146 store_code (parse_char lexbuf 16 2);
147 string start double lexbuf
151 error start (start+1) "Unterminated string"
154 string start double lexbuf