From 9c0b145d050a5981010435f54848dc862782709c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Tue, 5 Feb 2013 15:03:32 +0100 Subject: [PATCH] Refactor xpath parser and ast in a submodule. --- Makefile | 10 ++ _tags | 1 + main.itarget | 1 + src/XPath/ast.ml | 164 ++++++++++++++++++ src/XPath/ast.mli | 49 ++++++ src/{uparser.ml => XPath/parser.ml} | 9 +- src/{ => XPath}/ulexer.ml | 28 +-- .../xpath_internal_parser.mly} | 10 +- src/test.ml | 6 +- src/xPath.ml | 164 ------------------ src/xPath.mli | 53 ------ src/xPath.mlpack | 4 + 12 files changed, 256 insertions(+), 243 deletions(-) create mode 100644 Makefile create mode 100644 _tags create mode 100644 main.itarget create mode 100644 src/XPath/ast.ml create mode 100644 src/XPath/ast.mli rename src/{uparser.ml => XPath/parser.ml} (84%) rename src/{ => XPath}/ulexer.ml (86%) rename src/{parser.mly => XPath/xpath_internal_parser.mly} (94%) delete mode 100644 src/xPath.ml delete mode 100644 src/xPath.mli create mode 100644 src/xPath.mlpack diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..83aeb8f --- /dev/null +++ b/Makefile @@ -0,0 +1,10 @@ +SOURCE_DIR=src,src/XPath +TARGET=main.otarget + + +all: + ocamlbuild -Is $(SOURCE_DIR) $(TARGET) + + +clean: + ocamlbuild -clean diff --git a/_tags b/_tags new file mode 100644 index 0000000..cf9b611 --- /dev/null +++ b/_tags @@ -0,0 +1 @@ +: for-pack(XPath) diff --git a/main.itarget b/main.itarget new file mode 100644 index 0000000..8171f3b --- /dev/null +++ b/main.itarget @@ -0,0 +1 @@ +src/test.native diff --git a/src/XPath/ast.ml b/src/XPath/ast.ml new file mode 100644 index 0000000..a097024 --- /dev/null +++ b/src/XPath/ast.ml @@ -0,0 +1,164 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(* + Time-stamp: +*) + + +type path = single_path list +and single_path = Absolute of step list | Relative of step list +and step = axis * test * expr list +and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling + | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following + +and test = QNameSet.t + +and binop = Eq | Neq | Lt | Gt | Lte | Gte | Or | And | Add | Sub | Mult | Div | Mod +and unop = Neg +and expr = + | Number of [ `Int of int | `Float of float ] + | String of string + | Fun_call of QName.t * expr list + | Path of path + | Binop of expr * binop * expr + | Unop of unop * expr + + +type t = path + + +let text = QNameSet.singleton QName.text +let node = QNameSet.any +let star = + QNameSet.complement ( + QNameSet.from_list [ QName.text; + QName.document; + QName.cdata_section; + QName.comment]) + + + +let pp fmt e = Format.fprintf fmt e + +let prio e = + match e with + | Unop (Neg, _) -> 11 + | Path _ -> 10 + | Number _ | String _ | Fun_call _ -> 9 + | Binop (_,op,_) -> begin match op with + | Lt | Lte | Gt | Gte -> 7 + | Neq | Eq -> 6 + | And -> 5 + | Or -> 4 + | Mult | Div | Mod -> 3 + | Add | Sub -> 2 + end + +let print_binop fmt o = + pp fmt "%s" begin match o with + | Eq -> "=" + | Neq -> "!=" + | Lt -> "<" + | Gt -> ">" + | Lte -> "<=" + | Gte -> ">=" + | Or -> "or" + | And -> "and" + | Add -> "+" + | Sub -> "-" + | Mult -> "*" + | Div -> "div" + | Mod -> "mod" + end +let print_unop fmt o = + pp fmt "%s" begin match o with + | Neg -> "-" + end + +let rec print_path fmt p = + Pretty.print_list ~sep:" | " print_single_path fmt p + +and print_single_path fmt p = + let l = match p with + | Absolute l -> pp fmt "/"; l + | Relative l -> l + in + Pretty.print_list ~sep:"/" print_step fmt l + +and print_step fmt (axis, test, expr) = + pp fmt "%a::%a" print_axis axis print_test test; + match expr with + [] -> () + | l -> pp fmt "[ "; + Pretty.print_list ~sep:" ][ " print_expr fmt l; + pp fmt " ]" + +and print_axis fmt a = pp fmt "%s" begin + match a with + Self -> "self" + | Child -> "child" + | Descendant -> "descendant" + | DescendantOrSelf -> "descendant-or-self" + | FollowingSibling -> "following-sibling" + | Attribute -> "attribute" + | Ancestor -> "ancestor" + | AncestorOrSelf -> "ancestor-or-self" + | PrecedingSibling -> "preceding-sibling" + | Parent -> "parent" + | Preceding -> "preceding" + | Following -> "following" +end + +and print_test fmt ts = + try + pp fmt "%s" (List.assoc ts + [ text,"text()"; + node,"node()"; + star, "*" ] ) + with + Not_found -> pp fmt "%s" + (if QNameSet.is_finite ts + then QName.to_string (QNameSet.choose ts) + else "" + ) + +and print_expr fmt = function +| Number (`Int(i)) -> pp fmt "%i" i +| Number (`Float(f)) -> pp fmt "%f" f +| String s -> pp fmt "'%S'" s +| Fun_call (n, args) -> + pp fmt "%a(" QName.print n; + Pretty.print_list ~sep:", " print_expr fmt args; + pp fmt ")" +| Path p -> print_path fmt p +| Binop (e1, op, e2) as e -> + let pe = prio e in + let need_par1 = prio e1 < pe in + if need_par1 then pp fmt "("; + pp fmt "%a" print_expr e1; + if need_par1 then pp fmt ")"; + pp fmt " %a " print_binop op; + let need_par2 = prio e2 < pe in + if need_par2 then pp fmt "("; + pp fmt "%a" print_expr e2; + if need_par2 then pp fmt ")" +| Unop (op, e0) as e -> + let need_par0 = prio e0 < prio e in + print_unop fmt op; + if need_par0 then pp fmt "("; + print_expr fmt e0; + if need_par0 then pp fmt ")" + diff --git a/src/XPath/ast.mli b/src/XPath/ast.mli new file mode 100644 index 0000000..a2b0579 --- /dev/null +++ b/src/XPath/ast.mli @@ -0,0 +1,49 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2013 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(* + Time-stamp: +*) + +type path = single_path list +and single_path = Absolute of step list | Relative of step list +and step = axis * test * expr list +and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling + | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following + +and test = QNameSet.t + +and binop = Eq | Neq | Lt | Gt | Lte | Gte | Or | And | Add | Sub | Mult | Div | Mod +and unop = Neg +and expr = + | Number of [ `Int of int | `Float of float ] + | String of string + | Fun_call of QName.t * expr list + | Path of path + | Binop of expr * binop * expr + | Unop of unop * expr +type t = path +val text : QNameSet.t +val node : QNameSet.t +val star : QNameSet.t +val print_binop : Format.formatter -> binop -> unit +val print_unop : Format.formatter -> unop -> unit +val print_path : Format.formatter -> path -> unit +val print_single_path : Format.formatter -> single_path -> unit +val print_step : Format.formatter -> step -> unit +val print_axis : Format.formatter -> axis -> unit +val print_test : Format.formatter -> test -> unit +val print_expr : Format.formatter -> expr -> unit + diff --git a/src/uparser.ml b/src/XPath/parser.ml similarity index 84% rename from src/uparser.ml rename to src/XPath/parser.ml index c5aeb1a..69e76e1 100644 --- a/src/uparser.ml +++ b/src/XPath/parser.ml @@ -14,10 +14,11 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) -let xpath (f : Ulexing.lexbuf -> Parser.token) (l: Ulexing.lexbuf) = - Parser.xpath (fun _ -> f l) (Lexing.from_string "!!dummy!!") +include Xpath_internal_parser + +let parse (l : Ulexing.lexbuf) = + xpath (fun _ -> Ulexer.token l) (Lexing.from_string "!!dummy!!") - diff --git a/src/ulexer.ml b/src/XPath/ulexer.ml similarity index 86% rename from src/ulexer.ml rename to src/XPath/ulexer.ml index 4951023..aa66179 100644 --- a/src/ulexer.ml +++ b/src/XPath/ulexer.ml @@ -14,10 +14,10 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) -open Parser +open Xpath_internal_parser module L = Ulexing @@ -80,18 +80,18 @@ let parse_char lexbuf base i = let keyword_or_tag s = try List.assoc s [ - "self", AXIS XPath.Ast.Self; - "descendant", AXIS XPath.Ast.Descendant; - "child", AXIS XPath.Ast.Child; - "descendant-or-self", AXIS XPath.Ast.DescendantOrSelf; - "attribute", AXIS XPath.Ast.Attribute; - "following-sibling", AXIS XPath.Ast.FollowingSibling; - "preceding-sibling", AXIS XPath.Ast.PrecedingSibling; - "parent", AXIS XPath.Ast.Parent; - "ancestor", AXIS XPath.Ast.Ancestor; - "ancestor-or-self", AXIS XPath.Ast.AncestorOrSelf; - "preceding", AXIS XPath.Ast.Preceding; - "following", AXIS XPath.Ast.Following; + "self", AXIS Ast.Self; + "descendant", AXIS Ast.Descendant; + "child", AXIS Ast.Child; + "descendant-or-self", AXIS Ast.DescendantOrSelf; + "attribute", AXIS Ast.Attribute; + "following-sibling", AXIS Ast.FollowingSibling; + "preceding-sibling", AXIS Ast.PrecedingSibling; + "parent", AXIS Ast.Parent; + "ancestor", AXIS Ast.Ancestor; + "ancestor-or-self", AXIS Ast.AncestorOrSelf; + "preceding", AXIS Ast.Preceding; + "following", AXIS Ast.Following; "and", AND; "or" , OR; "div", DIV; diff --git a/src/parser.mly b/src/XPath/xpath_internal_parser.mly similarity index 94% rename from src/parser.mly rename to src/XPath/xpath_internal_parser.mly index 4d100ef..cd32ebb 100644 --- a/src/parser.mly +++ b/src/XPath/xpath_internal_parser.mly @@ -15,10 +15,10 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) - open XPath.Ast + open Ast let f () = () %} @@ -26,7 +26,7 @@ %token STRING %token INT %token FLOAT -%token AXIS +%token AXIS %token RB LB LP RP %token SLASH SLASHSLASH COLONCOLON STAR PIPE %token EQ NEQ LT GT LTE GTE OR AND ADD SUB DIV MOD @@ -43,7 +43,7 @@ %nonassoc uminus %start xpath -%type xpath +%type xpath %% @@ -88,7 +88,7 @@ axis_test: | test { Child, $1 } | AXIS { let _ = Format.flush_str_formatter () in - let () = Format.fprintf Format.str_formatter "%a" XPath.Ast.print_axis $1 in + let () = Format.fprintf Format.str_formatter "%a" Ast.print_axis $1 in let a = Format.flush_str_formatter () in Child, QNameSet.singleton (QName.of_string a) } diff --git a/src/test.ml b/src/test.ml index 0aaea6b..dec8918 100644 --- a/src/test.ml +++ b/src/test.ml @@ -14,7 +14,7 @@ (***********************************************************************) (* - Time-stamp: + Time-stamp: *) (** use: xml_file "XPath querie" @@ -33,9 +33,9 @@ let doc = close_in fd; d -let query = +let query = let arg2 = Sys.argv.(2) in - Uparser.xpath Ulexer.token (Ulexing.from_latin1_string arg2) + XPath.Parser.parse (Ulexing.from_latin1_string arg2) open Format diff --git a/src/xPath.ml b/src/xPath.ml deleted file mode 100644 index a825d84..0000000 --- a/src/xPath.ml +++ /dev/null @@ -1,164 +0,0 @@ -(***********************************************************************) -(* *) -(* TAToo *) -(* *) -(* Kim Nguyen, LRI UMR8623 *) -(* Université Paris-Sud & CNRS *) -(* *) -(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) -(* Recherche Scientifique. All rights reserved. This file is *) -(* distributed under the terms of the GNU Lesser General Public *) -(* License, with the special exception on linking described in file *) -(* ../LICENSE. *) -(* *) -(***********************************************************************) - -(* - Time-stamp: -*) - -module Ast = -struct - - type path = single_path list - and single_path = Absolute of step list | Relative of step list - and step = axis * test * expr list - and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling - | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following - - and test = QNameSet.t - - and binop = Eq | Neq | Lt | Gt | Lte | Gte | Or | And | Add | Sub | Mult | Div | Mod - and unop = Neg - and expr = - | Number of [ `Int of int | `Float of float ] - | String of string - | Fun_call of QName.t * expr list - | Path of path - | Binop of expr * binop * expr - | Unop of unop * expr - - - type t = path - - - let text = QNameSet.singleton QName.text - let node = QNameSet.any - let star = - QNameSet.complement ( - QNameSet.from_list [ QName.text; - QName.document; - QName.cdata_section; - QName.comment]) - - - - let pp fmt e = Format.fprintf fmt e - - let prio e = match e with - | Unop (Neg, _) -> 11 - | Path _ -> 10 - | Number _ | String _ | Fun_call _ -> 9 - | Binop (_,op,_) -> begin match op with - | Lt | Lte | Gt | Gte -> 7 - | Neq | Eq -> 6 - | And -> 5 - | Or -> 4 - | Mult | Div | Mod -> 3 - | Add | Sub -> 2 - end - - let print_binop fmt o = - pp fmt "%s" begin match o with - | Eq -> "=" - | Neq -> "!=" - | Lt -> "<" - | Gt -> ">" - | Lte -> "<=" - | Gte -> ">=" - | Or -> "or" - | And -> "and" - | Add -> "+" - | Sub -> "-" - | Mult -> "*" - | Div -> "div" - | Mod -> "mod" - end - let print_unop fmt o = - pp fmt "%s" begin match o with - | Neg -> "-" - end - - let rec print_path fmt p = - Pretty.print_list ~sep:" | " print_single_path fmt p - - and print_single_path fmt p = - let l = match p with - | Absolute l -> pp fmt "/"; l - | Relative l -> l - in - Pretty.print_list ~sep:"/" print_step fmt l - - and print_step fmt (axis, test, expr) = - pp fmt "%a::%a" print_axis axis print_test test; - match expr with - [] -> () - | l -> pp fmt "[ "; - Pretty.print_list ~sep:" ][ " print_expr fmt l; - pp fmt " ]" - - and print_axis fmt a = pp fmt "%s" (match a with - Self -> "self" - | Child -> "child" - | Descendant -> "descendant" - | DescendantOrSelf -> "descendant-or-self" - | FollowingSibling -> "following-sibling" - | Attribute -> "attribute" - | Ancestor -> "ancestor" - | AncestorOrSelf -> "ancestor-or-self" - | PrecedingSibling -> "preceding-sibling" - | Parent -> "parent" - | Preceding -> "preceding" - | Following -> "following" - ) - and print_test fmt ts = - try - pp fmt "%s" (List.assoc ts - [ text,"text()"; - node,"node()"; - star, "*" ] ) - with - Not_found -> pp fmt "%s" - (if QNameSet.is_finite ts - then QName.to_string (QNameSet.choose ts) - else "" - ) - - and print_expr fmt = function - | Number (`Int(i)) -> pp fmt "%i" i - | Number (`Float(f)) -> pp fmt "%f" f - | String s -> pp fmt "'%S'" s - | Fun_call (n, args) -> - pp fmt "%a(" QName.print n; - Pretty.print_list ~sep:", " print_expr fmt args; - pp fmt ")" - | Path p -> print_path fmt p - | Binop (e1, op, e2) as e -> - let pe = prio e in - let need_par1 = prio e1 < pe in - if need_par1 then pp fmt "("; - pp fmt "%a" print_expr e1; - if need_par1 then pp fmt ")"; - pp fmt " %a " print_binop op; - let need_par2 = prio e2 < pe in - if need_par2 then pp fmt "("; - pp fmt "%a" print_expr e2; - if need_par2 then pp fmt ")" - | Unop (op, e0) as e -> - let need_par0 = prio e0 < prio e in - print_unop fmt op; - if need_par0 then pp fmt "("; - print_expr fmt e0; - if need_par0 then pp fmt ")" - -end diff --git a/src/xPath.mli b/src/xPath.mli deleted file mode 100644 index 76a6ab3..0000000 --- a/src/xPath.mli +++ /dev/null @@ -1,53 +0,0 @@ -(***********************************************************************) -(* *) -(* TAToo *) -(* *) -(* Kim Nguyen, LRI UMR8623 *) -(* Université Paris-Sud & CNRS *) -(* *) -(* Copyright 2010-2013 Université Paris-Sud and Centre National de la *) -(* Recherche Scientifique. All rights reserved. This file is *) -(* distributed under the terms of the GNU Lesser General Public *) -(* License, with the special exception on linking described in file *) -(* ../LICENSE. *) -(* *) -(***********************************************************************) - -(* - Time-stamp: -*) - - - -module Ast : -sig - type path = single_path list - and single_path = Absolute of step list | Relative of step list - and step = axis * test * expr list - and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling - | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following - - and test = QNameSet.t - - and binop = Eq | Neq | Lt | Gt | Lte | Gte | Or | And | Add | Sub | Mult | Div | Mod - and unop = Neg - and expr = - | Number of [ `Int of int | `Float of float ] - | String of string - | Fun_call of QName.t * expr list - | Path of path - | Binop of expr * binop * expr - | Unop of unop * expr - type t = path - val text : QNameSet.t - val node : QNameSet.t - val star : QNameSet.t - val print_binop : Format.formatter -> binop -> unit - val print_unop : Format.formatter -> unop -> unit - val print_path : Format.formatter -> path -> unit - val print_single_path : Format.formatter -> single_path -> unit - val print_step : Format.formatter -> step -> unit - val print_axis : Format.formatter -> axis -> unit - val print_test : Format.formatter -> test -> unit - val print_expr : Format.formatter -> expr -> unit - end diff --git a/src/xPath.mlpack b/src/xPath.mlpack new file mode 100644 index 0000000..e062c25 --- /dev/null +++ b/src/xPath.mlpack @@ -0,0 +1,4 @@ +Ast +Parser +Ulexer +Xpath_internal_parser -- 2.17.1