Simplify the automaton encoding a bit (remove redundant predicates in formulae).

[tatoo.git] / src / xpath / ulexer.ml
diff --git a/src/xpath/ulexer.ml b/src/xpath/ulexer.ml

index 2153e18..02b6e02 100644 (file)
--- a/src/xpath/ulexer.ml
+++ b/src/xpath/ulexer.ml
@@ -13,10 +13,6 @@
  (*                                                                     *)
  (***********************************************************************)
  
-(*
-  Time-stamp: <Last modified on 2013-02-14 15:38:56 CET by Kim Nguyen>
-*)
-
  open Xpath_internal_parser
  
  module L = Ulexing
@@ -54,7 +50,7 @@ let return lexbuf tok = (tok, L.loc lexbuf)
  let return_loc i j tok = (tok, (i,j))
  
  let regexp ncname_char =
-  xml_letter | xml_digit | [ '-' '_' ] | xml_combining_char | xml_extender | "\\."
+  xml_letter | xml_digit | [ '-' '_' '.'] | xml_combining_char | xml_extender
  
  let hexa_digit = function
    | '0'..'9' as c -> (Char.code c) - (Char.code '0')
@@ -121,8 +117,18 @@ let rec token = lexer
   | ">=" -> GTE
   | "="  -> EQ
   | "!=" -> NEQ
+ | ".." -> DOTDOT
+ | "."  -> DOT
   | "node()" -> NODE
   | "text()" -> TEXT
+ | "comment()" -> COMMENT
+ | '@' ncname -> let l = L.utf8_lexeme lexbuf in
+                 ATTNAME (String.sub l 1 (String.length l - 1))
+ | "processing-instruction()" -> PI ""
+ | "processing-instruction('"ncname"')"
+ | "processing-instruction(\""ncname"\")"->
+     let s = L.utf8_lexeme lexbuf in
+     PI (String.sub s 24 (String.length s - 26))
   | ncname -> keyword_or_tag (L.utf8_lexeme lexbuf)
   | float ->
       let s = L.utf8_lexeme lexbuf in
@@ -165,5 +171,3 @@ and string start double = lexer
    | _ ->
        store_lexeme lexbuf;
        string start double lexbuf
-
-