-(***********************************************************************)
-(* *)
-(* TAToo *)
-(* *)
-(* Kim Nguyen, LRI UMR8623 *)
-(* Université Paris-Sud & CNRS *)
-(* *)
-(* Copyright 2010-2013 Université Paris-Sud and Centre National de la *)
-(* Recherche Scientifique. All rights reserved. This file is *)
-(* distributed under the terms of the GNU Lesser General Public *)
-(* License, with the special exception on linking described in file *)
-(* ../LICENSE. *)
-(* *)
-(***********************************************************************)
-
-(*
- Time-stamp: <Last modified on 2013-01-30 19:06:07 CET by Kim Nguyen>
-*)
-
-open Parser
-
-module L = Ulexing
-
-exception Error of int * int * string
-
-let error i j s = raise (Error (i,j,s))
-
-(***********************************************************)
-(* Buffer for string literals *)
-
-let string_buff = Buffer.create 1024
-
-let store_lexeme lexbuf =
- Buffer.add_string string_buff (Ulexing.utf8_lexeme lexbuf)
-let store_ascii = Buffer.add_char string_buff
-let store_code = Utf8.store string_buff
-let clear_buff () = Buffer.clear string_buff
-let get_stored_string () =
- let s = Buffer.contents string_buff in
- clear_buff ();
- Buffer.clear string_buff;
- s
-
-(***********************************************************)
-(* Lexer *)
-
-let illegal lexbuf =
- error
- (L.lexeme_start lexbuf)
- (L.lexeme_end lexbuf)
- "Illegal character"
-
-let return lexbuf tok = (tok, L.loc lexbuf)
-let return_loc i j tok = (tok, (i,j))
-
-let regexp ncname_char =
- xml_letter | xml_digit | [ '-' '_' ] | xml_combining_char | xml_extender | "\\."
-
-let hexa_digit = function
- | '0'..'9' as c -> (Char.code c) - (Char.code '0')
- | 'a'..'f' as c -> (Char.code c) - (Char.code 'a') + 10
- | 'A'..'F' as c -> (Char.code c) - (Char.code 'A') + 10
- | _ -> -1
-
-let regexp ncname = ( xml_letter ncname_char* ) | ('_' ncname_char+)
-let regexp digit = ['0'-'9']
-let regexp float = '-'? digit+ ('.' digit+ (['e''E'] digit+)?)?
-
-let parse_char lexbuf base i =
- let s = L.latin1_sub_lexeme lexbuf i (L.lexeme_length lexbuf - i - 1) in
- let r = ref 0 in
- for i = 0 to String.length s - 1 do
- let c = hexa_digit s.[i] in
- if (c >= base) || (c < 0) then
- error (L.lexeme_start lexbuf) (L.lexeme_end lexbuf) "invalid digit";
- r := !r * base + c;
- done;
- !r
-
-let keyword_or_tag s =
- try
- List.assoc s [
- "self", AXIS XPath.Ast.Self;
- "descendant", AXIS XPath.Ast.Descendant;
- "child", AXIS XPath.Ast.Child;
- "descendant-or-self", AXIS XPath.Ast.DescendantOrSelf;
- "attribute", AXIS XPath.Ast.Attribute;
- "following-sibling", AXIS XPath.Ast.FollowingSibling;
- "preceding-sibling", AXIS XPath.Ast.PrecedingSibling;
- "parent", AXIS XPath.Ast.Parent;
- "ancestor", AXIS XPath.Ast.Ancestor;
- "ancestor-or-self", AXIS XPath.Ast.AncestorOrSelf;
- "preceding", AXIS XPath.Ast.Preceding;
- "following", AXIS XPath.Ast.Following;
- "and", AND;
- "or" , OR;
- "div", DIV;
- "mod", MOD;
- ]
- with
- _ -> TAG s
-
-
-let rec token = lexer
- | [' ' '\t' '\n'] -> token lexbuf
- | "*" -> STAR
- | "/" -> SLASH
- | "//" -> SLASHSLASH
- | "::" -> COLONCOLON
- | "(" -> LP
- | ")" -> RP
- | "[" -> LB
- | "]" -> RB
- | "," -> COMMA
- | "|" -> PIPE
- | "+" -> ADD
- | "-" -> SUB
- | "<" -> LT
- | "<=" -> LTE
- | ">" -> GT
- | ">=" -> GTE
- | "=" -> EQ
- | "!=" -> NEQ
- | "node()" -> NODE
- | "text()" -> TEXT
- | ncname -> keyword_or_tag (L.utf8_lexeme lexbuf)
- | float ->
- let s = L.utf8_lexeme lexbuf in
- (try
- INT (int_of_string s)
- with
- _ -> FLOAT (float_of_string s))
- | '"' | "'" ->
- let double_quote = L.latin1_lexeme_char lexbuf 0 = '"' in
- string (L.lexeme_start lexbuf) double_quote lexbuf;
- let s = get_stored_string () in
- STRING s
-
- | eof -> EOF
- | _ -> illegal lexbuf
-
-and string start double = lexer
- | '"' | "'" ->
- let d = L.latin1_lexeme_char lexbuf 0 = '"' in
- if d != double then (store_lexeme lexbuf; string start double lexbuf)
- | '\\' ['\\' '"' '\''] ->
- store_ascii (L.latin1_lexeme_char lexbuf 1);
- string start double lexbuf
- | "\\n" ->
- store_ascii '\n'; string start double lexbuf
- | "\\t" ->
- store_ascii '\t'; string start double lexbuf
- | "\\r" ->
- store_ascii '\r'; string start double lexbuf
- | '\\' ['0'-'9']+ ';' ->
- store_code (parse_char lexbuf 10 1);
- string start double lexbuf
- | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F']+ ';' ->
- store_code (parse_char lexbuf 16 2);
- string start double lexbuf
- | '\\' ->
- illegal lexbuf;
- | eof ->
- error start (start+1) "Unterminated string"
- | _ ->
- store_lexeme lexbuf;
- string start double lexbuf
-
-