From: Kim Nguyễn Date: Thu, 28 Jun 2012 12:46:11 +0000 (+0200) Subject: Usable version: X-Git-Tag: Core~34 X-Git-Url: http://git.nguyen.vg/gitweb/?p=tatoo.git;a=commitdiff_plain;h=cba2938d929fd5119b1491686ddc224d5af618c6 Usable version: - use ocamlbuild - cherry pick Pretty, State, StateSet and Formula from SXSI - Parsing of XML documents --- diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..12ca0e5 --- /dev/null +++ b/HACKING @@ -0,0 +1,5 @@ +To build: + +ocamlbuild src/test.native + +to build the native executable. \ No newline at end of file diff --git a/Makefile.am b/Makefile.am deleted file mode 100644 index 5fa30ff..0000000 --- a/Makefile.am +++ /dev/null @@ -1,2 +0,0 @@ -ACLOCAL_AMFLAGS = -I m4 - diff --git a/autogen.sh b/autogen.sh deleted file mode 100755 index e464f7e..0000000 --- a/autogen.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/sh -export AUTOMAKE="automake -a --foreign" -aclocal -I m4 -autoreconf -i \ No newline at end of file diff --git a/configure.ac b/configure.ac deleted file mode 100644 index 283f8e1..0000000 --- a/configure.ac +++ /dev/null @@ -1,28 +0,0 @@ -AC_INIT(tatoo,0.01) -AM_INIT_AUTOMAKE -AC_CONFIG_MACRO_DIR([m4]) - - -AC_PROG_OCAML -if test "$OCAMLC" = "no"; then - AC_MSG_ERROR([Please install the OCaml compiler]) -fi - -AC_PROG_FINDLIB -if test "$OCAMLFIND" = "no"; then - AC_MSG_ERROR([Please install OCaml findlib (the ocamlfind command)]) -fi - -AC_CHECK_OCAML_PKG([ulex]) -if test "$OCAML_PKG_ulex" = "no"; then - AC_MSG_ERROR([Please install OCaml findlib module 'ulex'.]) -fi - -AC_CHECK_OCAML_PKG([expat]) -if test "$OCAML_PKG_expat" = "no"; then - AC_MSG_ERROR([Please install OCaml findlib module 'expat'.]) -fi - -AC_CONFIG_FILES([Makefile]) - -AC_OUTPUT diff --git a/include/utils.ml b/include/utils.ml new file mode 100644 index 0000000..855c6d3 --- /dev/null +++ b/include/utils.ml @@ -0,0 +1,37 @@ +IFNDEF UTILS__ML__ +THEN +DEFINE UTILS__ML__ + +IFDEF WORDSIZE64 +THEN +INCLUDE "utils64.ml" +ELSE +INCLUDE "utils32.ml" +END + + +DEFINE HASHINT2 (x,y) = (((x)+HPARAM*(y)) land 0x3fffffff) +DEFINE HASHINT3 (x,y,z) = (((x) + (y) * HPARAM + (z) * HPARAM2) land 0x3fffffff) +DEFINE HASHINT4 (x,y,z,t) = (((x) + (y) * HPARAM + (z)*HPARAM2 + (t)* HPARAM3) land 0x3fffffff) +DEFINE HASHINT5 (x,y,z,t,u) = (((x) + (y) * HPARAM + (z)*HPARAM2 + (t)* HPARAM3 + (u)*HPARAM4) land 0x3fffffff) + +(* Magic Constants used for hashing *) +DEFINE PRIME1 = 7 +DEFINE PRIME2 = 19 +DEFINE PRIME3 = 83 +DEFINE PRIME4 = 223 +DEFINE PRIME5 = 491 +DEFINE PRIME6 = 733 +DEFINE PRIME7 = 1009 +DEFINE PRIME8 = 4093 +DEFINE PRIME9 = 65599 + +DEFINE SMALL_H_SIZE = PRIME2 +DEFINE MED_H_SIZE = PRIME5 +DEFINE BIG_H_SIZE = PRIME8 + +DEFINE SMALL_A_SIZE = 128 +DEFINE MED_A_SIZE = 2048 +DEFINE BIG_A_SIZE = 8192 + +END (* IFNDEF UTILS__ML__ *) diff --git a/include/utils32.ml b/include/utils32.ml new file mode 100644 index 0000000..2c610cd --- /dev/null +++ b/include/utils32.ml @@ -0,0 +1,15 @@ +IFNDEF UTILS32__ML__ +THEN +DEFINE UTILS32__ML__ + DEFINE WORDSIZE = 32 + + + DEFINE HALFWORDSIZE = 16 + DEFINE INTSIZE = 31 + DEFINE HALFINTSIZE = 15 + DEFINE HALF_MAX_INT = 536870911 + DEFINE HPARAM = 65599 + DEFINE HPARAM2 = 8261505 + DEFINE HPARAM3 = 780587199 + DEFINE HPARAM4 = 549173308 +END diff --git a/include/utils64.ml b/include/utils64.ml new file mode 100644 index 0000000..cfebeec --- /dev/null +++ b/include/utils64.ml @@ -0,0 +1,13 @@ +IFNDEF UTILS64__ML__ +THEN +DEFINE UTILS64__ML__ + DEFINE WORDSIZE = 64 + DEFINE HALFWORDSIZE = 32 + DEFINE INTSIZE = 63 + DEFINE HALFINTSIZE = 31 + DEFINE HALF_MAX_INT = 2305843009213693951 + DEFINE HPARAM = 65599 + DEFINE HPARAM2 = 4303228801 + DEFINE HPARAM3 = 282287506116799 + DEFINE HPARAM4 = 71034040046345985 +END diff --git a/m4/ocaml.m4 b/m4/ocaml.m4 deleted file mode 100644 index 479f3a8..0000000 --- a/m4/ocaml.m4 +++ /dev/null @@ -1,240 +0,0 @@ -dnl autoconf macros for OCaml -dnl -dnl Copyright © 2009 Richard W.M. Jones -dnl Copyright © 2009 Stefano Zacchiroli -dnl Copyright © 2000-2005 Olivier Andrieu -dnl Copyright © 2000-2005 Jean-Christophe Filliâtre -dnl Copyright © 2000-2005 Georges Mariano -dnl -dnl For documentation, please read the ocaml.m4 man page. - -AC_DEFUN([AC_PROG_OCAML], -[dnl - # checking for ocamlc - AC_CHECK_TOOL([OCAMLC],[ocamlc],[no]) - - if test "$OCAMLC" != "no"; then - OCAMLVERSION=`$OCAMLC -v | sed -n -e 's|.*version* *\(.*\)$|\1|p'` - AC_MSG_RESULT([OCaml version is $OCAMLVERSION]) - # If OCAMLLIB is set, use it - if test "$OCAMLLIB" = ""; then - OCAMLLIB=`$OCAMLC -where 2>/dev/null || $OCAMLC -v|tail -1|cut -d ' ' -f 4` - else - AC_MSG_RESULT([OCAMLLIB previously set; preserving it.]) - fi - AC_MSG_RESULT([OCaml library path is $OCAMLLIB]) - - AC_SUBST([OCAMLVERSION]) - AC_SUBST([OCAMLLIB]) - - # checking for ocamlopt - AC_CHECK_TOOL([OCAMLOPT],[ocamlopt],[no]) - OCAMLBEST=byte - if test "$OCAMLOPT" = "no"; then - AC_MSG_WARN([Cannot find ocamlopt; bytecode compilation only.]) - else - TMPVERSION=`$OCAMLOPT -v | sed -n -e 's|.*version* *\(.*\)$|\1|p' ` - if test "$TMPVERSION" != "$OCAMLVERSION" ; then - AC_MSG_RESULT([versions differs from ocamlc; ocamlopt discarded.]) - OCAMLOPT=no - else - OCAMLBEST=opt - fi - fi - - AC_SUBST([OCAMLBEST]) - - # checking for ocamlc.opt - AC_CHECK_TOOL([OCAMLCDOTOPT],[ocamlc.opt],[no]) - if test "$OCAMLCDOTOPT" != "no"; then - TMPVERSION=`$OCAMLCDOTOPT -v | sed -n -e 's|.*version* *\(.*\)$|\1|p' ` - if test "$TMPVERSION" != "$OCAMLVERSION" ; then - AC_MSG_RESULT([versions differs from ocamlc; ocamlc.opt discarded.]) - else - OCAMLC=$OCAMLCDOTOPT - fi - fi - - # checking for ocamlopt.opt - if test "$OCAMLOPT" != "no" ; then - AC_CHECK_TOOL([OCAMLOPTDOTOPT],[ocamlopt.opt],[no]) - if test "$OCAMLOPTDOTOPT" != "no"; then - TMPVERSION=`$OCAMLOPTDOTOPT -v | sed -n -e 's|.*version* *\(.*\)$|\1|p' ` - if test "$TMPVERSION" != "$OCAMLVERSION" ; then - AC_MSG_RESULT([version differs from ocamlc; ocamlopt.opt discarded.]) - else - OCAMLOPT=$OCAMLOPTDOTOPT - fi - fi - fi - - AC_SUBST([OCAMLOPT]) - fi - - AC_SUBST([OCAMLC]) - - # checking for ocaml toplevel - AC_CHECK_TOOL([OCAML],[ocaml],[no]) - - # checking for ocamldep - AC_CHECK_TOOL([OCAMLDEP],[ocamldep],[no]) - - # checking for ocamlmktop - AC_CHECK_TOOL([OCAMLMKTOP],[ocamlmktop],[no]) - - # checking for ocamlmklib - AC_CHECK_TOOL([OCAMLMKLIB],[ocamlmklib],[no]) - - # checking for ocamldoc - AC_CHECK_TOOL([OCAMLDOC],[ocamldoc],[no]) - - # checking for ocamlbuild - AC_CHECK_TOOL([OCAMLBUILD],[ocamlbuild],[no]) -]) - - -AC_DEFUN([AC_PROG_OCAMLLEX], -[dnl - # checking for ocamllex - AC_CHECK_TOOL([OCAMLLEX],[ocamllex],[no]) - if test "$OCAMLLEX" != "no"; then - AC_CHECK_TOOL([OCAMLLEXDOTOPT],[ocamllex.opt],[no]) - if test "$OCAMLLEXDOTOPT" != "no"; then - OCAMLLEX=$OCAMLLEXDOTOPT - fi - fi - AC_SUBST([OCAMLLEX]) -]) - -AC_DEFUN([AC_PROG_OCAMLYACC], -[dnl - AC_CHECK_TOOL([OCAMLYACC],[ocamlyacc],[no]) - AC_SUBST([OCAMLYACC]) -]) - - -AC_DEFUN([AC_PROG_CAMLP4], -[dnl - AC_REQUIRE([AC_PROG_OCAML])dnl - - # checking for camlp4 - AC_CHECK_TOOL([CAMLP4],[camlp4],[no]) - if test "$CAMLP4" != "no"; then - TMPVERSION=`$CAMLP4 -v 2>&1| sed -n -e 's|.*version *\(.*\)$|\1|p'` - if test "$TMPVERSION" != "$OCAMLVERSION" ; then - AC_MSG_RESULT([versions differs from ocamlc]) - CAMLP4=no - fi - fi - AC_SUBST([CAMLP4]) - - # checking for companion tools - AC_CHECK_TOOL([CAMLP4BOOT],[camlp4boot],[no]) - AC_CHECK_TOOL([CAMLP4O],[camlp4o],[no]) - AC_CHECK_TOOL([CAMLP4OF],[camlp4of],[no]) - AC_CHECK_TOOL([CAMLP4OOF],[camlp4oof],[no]) - AC_CHECK_TOOL([CAMLP4ORF],[camlp4orf],[no]) - AC_CHECK_TOOL([CAMLP4PROF],[camlp4prof],[no]) - AC_CHECK_TOOL([CAMLP4R],[camlp4r],[no]) - AC_CHECK_TOOL([CAMLP4RF],[camlp4rf],[no]) - AC_SUBST([CAMLP4BOOT]) - AC_SUBST([CAMLP4O]) - AC_SUBST([CAMLP4OF]) - AC_SUBST([CAMLP4OOF]) - AC_SUBST([CAMLP4ORF]) - AC_SUBST([CAMLP4PROF]) - AC_SUBST([CAMLP4R]) - AC_SUBST([CAMLP4RF]) -]) - - -AC_DEFUN([AC_PROG_FINDLIB], -[dnl - AC_REQUIRE([AC_PROG_OCAML])dnl - - # checking for ocamlfind - AC_CHECK_TOOL([OCAMLFIND],[ocamlfind],[no]) - AC_SUBST([OCAMLFIND]) -]) - - -dnl Thanks to Jim Meyering for working this next bit out for us. -dnl XXX We should define AS_TR_SH if it's not defined already -dnl (eg. for old autoconf). -AC_DEFUN([AC_CHECK_OCAML_PKG], -[dnl - AC_REQUIRE([AC_PROG_FINDLIB])dnl - - AC_MSG_CHECKING([for OCaml findlib package $1]) - - unset found - unset pkg - found=no - for pkg in $1 $2 ; do - if $OCAMLFIND query $pkg >/dev/null 2>/dev/null; then - AC_MSG_RESULT([found]) - AS_TR_SH([OCAML_PKG_$1])=$pkg - found=yes - break - fi - done - if test "$found" = "no" ; then - AC_MSG_RESULT([not found]) - AS_TR_SH([OCAML_PKG_$1])=no - fi - - AC_SUBST(AS_TR_SH([OCAML_PKG_$1])) -]) - - -AC_DEFUN([AC_CHECK_OCAML_MODULE], -[dnl - AC_MSG_CHECKING([for OCaml module $2]) - - cat > conftest.ml <&5 2>&5 ; then - found=yes - break - fi - done - - if test "$found" ; then - AC_MSG_RESULT([$$1]) - else - AC_MSG_RESULT([not found]) - $1=no - fi - AC_SUBST([$1]) -]) - - -dnl XXX Cross-compiling -AC_DEFUN([AC_CHECK_OCAML_WORD_SIZE], -[dnl - AC_REQUIRE([AC_PROG_OCAML])dnl - AC_MSG_CHECKING([for OCaml compiler word size]) - cat > conftest.ml < conftest.ml < (S [extra; _A e] )) l) +;; +let cxx_flags_for_ml = [ _S ~extra:(_A "-ccopt") cxx_flags ] +let cxx_flags = ref [ _S cxx_flags ] +let project_dirs = [ src_path; include_path ] +let cxx_include_flags = _S cxx_includes +let cxx_link_flags = ref [ _S cxx_lpaths; _S cxx_libs] +let native_link_flags = ref (List.map (fun s -> s ^ ".cmxa") ocaml_link) +let byte_link_flags = ref ("-custom" :: (List.map (fun s -> s ^ ".cma") ocaml_link)) +let link_flags = [ A"-linkpkg" ] +let libs_files = List.map (fun s -> "file:" ^ s) cxx_libs_objects + + +let native_compile_flags = ref [A"-fno-PIC"] +let compile_flags = ref [] + +let dwsize = sprintf "-DWORDSIZE%i" Sys.word_size + +(* Utils *) +let ( @= ) r l = r := !r @ l +let ( =:: ) r e = r := e :: !r + +(* Pre-processed files *) +let pp_macro_options = ref + [ A "-parser"; A "macro"; A dwsize; A "-I"; P include_path ] + +let include_full_path = Pathname.pwd / include_path +module Depends = +struct +open Scanf +let scan_include ml = + let ic = open_in ml and includes = ref [] in + begin + try + while true do + let s = input_line ic in + if String.length s > 0 then + try + sscanf s " INCLUDE \"%s@\"" (fun s -> includes =:: include_path /s) + with Scan_failure _ -> () + done + with End_of_file -> close_in ic + end; + !includes + +let ocaml ml = + let rec loop file = + let includes = scan_include file in + List.fold_left (fun a i -> (loop i) @ a) includes includes + in + let includes = loop ml in + dep [ "file:" ^ ml ] includes + +let parse_depends depfile = + let ichan = open_in depfile in + let iscan = Scanning.from_channel ichan in + let includes = ref [] in + bscanf iscan " %_s@: %s " ignore; + try + while true do + bscanf iscan " %s " ( + function "" -> raise End_of_file + | "\\" -> () + | s -> includes =::s) + done; [] + with + _ -> close_in ichan;!includes + +let cxx cpp = + let depfile = !Options.build_dir /" __cxx_depends.tmp" in + let cmd = Cmd (S[ A cxx_cmd ; S !cxx_flags; cxx_include_flags ; A"-MM"; + A "-MF"; P depfile; P cpp]) + in + let () = Command.execute ~quiet:true ~pretend:false cmd in + let includes = parse_depends depfile in + let includes' = (List.filter (Pathname.is_relative) includes) in + dep [ "compile"; "file:" ^ cpp ] includes' +end + +let cxx_compile env _build = + let cpp = env "%.cpp" and obj = env "%.o" in + let tags = (tags_of_pathname cpp) ++ "compile" ++ "c++" in + Cmd(S[T tags; A cxx_cmd; A "-o" ; P obj; A "-c"; S !cxx_flags; cxx_include_flags; P cpp]) + +(* Native compile and link action *) + +let ocamlfind x = S[ T (Tags.singleton "ocamlfind"); A"ocamlfind"; x ; A "-package"; A ocamlfind_packages ] + +let ppopt l = List.map (fun e -> S[ A"-ppopt"; e ]) l + +let () = dispatch begin + function + | Before_rules -> + + Options.ocamlc := ocamlfind (A"ocamlc"); + Options.ocamlopt := ocamlfind (A"ocamlopt"); + Options.ocamldep := ocamlfind (A"ocamldep"); + Options.ocamldoc := ocamlfind (A"ocamldoc"); + Options.ocamlmktop := ocamlfind (A"ocamlmktop"); + + if not (List.mem "log" !Options.tags) then begin + pp_macro_options @= [ A "-DNLOG" ]; + end; + if (List.mem "profile" !Options.tags) then begin + pp_macro_options @= [ A "-DPROFILE" ]; + native_compile_flags @= [A "-p" ]; + native_link_flags @= [ "-p" ]; + cxx_flags @= [ A "-pg" ]; + cxx_link_flags @= [ A "-pg" ]; + end; + + if (List.mem "debug" !Options.tags) then begin + pp_macro_options @= [ A "-DDEBUG" ]; + cxx_flags @= [ A "-O0"; A "-g" ]; + cxx_link_flags @= [ A "-g" ]; + end + else begin + compile_flags @= [A "-noassert"]; + pp_macro_options @= [ A "-unsafe" ]; + native_compile_flags @= [ A "-inline"; A ocaml_inline ]; + cxx_flags @= [ A "-O3" ] + end; + + let dir_path = Pathname.pwd / src_path in + let dir = Pathname.readdir dir_path in + + Array.iter (fun entry -> + if Pathname.check_extension entry "ml" then + Depends.ocaml (src_path / entry) + else if Pathname.check_extension entry "cpp" then + Depends.cxx (src_path / entry) + ) dir; + + | After_rules -> + dep [ "link" ] cstub_lib; + rule "c++: cpp & depends -> o" ~prod:"%.o" ~deps:[ "%.cpp" ] cxx_compile; + let syntax_flags = S ([ A "-syntax"; A "camlp4o"; + S (ppopt [A "-printer" ; A"Camlp4OCamlAstDumper"]); + S (ppopt !pp_macro_options) ]) + in + flag [ "ocaml"; "ocamldep"] syntax_flags; + flag [ "ocaml"; "compile" ] (S[ A "-cc"; A cxx_cmd; S cxx_flags_for_ml ; syntax_flags; S !compile_flags ]); + flag [ "ocaml"; "native"; "compile" ] (S !native_compile_flags); + flag [ "ocaml"; "link" ] + (S [ S link_flags ; A "-cc"; A cxx_cmd; S cxx_flags_for_ml; A "-cclib" ; + Quote (S [ _S cstub_lib; S !cxx_link_flags]) ]); + flag [ "ocaml"; "byte"; "link" ] (_S !byte_link_flags); + flag [ "ocaml"; "native"; "link" ] (_S !native_link_flags); + flag [ "c"; "ocamlmklib"] (S[ A "-custom"; ]) + | _ -> () +end diff --git a/myocamlbuild_config.ml b/myocamlbuild_config.ml new file mode 100644 index 0000000..fef4759 --- /dev/null +++ b/myocamlbuild_config.ml @@ -0,0 +1,15 @@ +let ocaml_inline = "1000";; +let include_path = "include";; +let src_path = "src";; +let ocaml_link = [ ];; +let ocamlfind_packages = "unix,ulex,expat,camlp4,camlp4.lib";; +let cxx_flags = [ "-fno-PIC"; "-std=c++0x"; "-O3" ];; +let main_targets = [ "native","src/main.native"; + "byte", "src/main.byte" ];; +let cstub_lib = [ ];; + +let cxx_cmd = "g++";; +let cxx_includes = [ ] +let cxx_lpaths = [ ] +let cxx_libs = [ ];; +let cxx_libs_objects = [];; diff --git a/src/finiteCofinite.ml b/src/finiteCofinite.ml new file mode 100644 index 0000000..1c8e6ef --- /dev/null +++ b/src/finiteCofinite.ml @@ -0,0 +1,219 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) +INCLUDE "utils.ml" + +module type S = + sig + include Sigs.FiniteCofiniteSet + include Hcons.S with type t := t + end + +module type HConsBuilder = + functor (H : Sigs.HashedType) -> Hcons.S with type data = H.t + +module Builder (HCB : HConsBuilder) (E : Ptset.S) : + S with type elt = E.elt and type set = E.t = +struct + type elt = E.elt + type node = Finite of E.t | CoFinite of E.t + type set = E.t + module Node = HCB(struct + type t = node + let equal a b = + match a, b with + Finite (s1), Finite (s2) + | CoFinite (s1), CoFinite (s2) -> E.equal s1 s2 + | _ -> false + + let hash = function + | Finite s -> HASHINT2 (PRIME1, E.hash s) + | CoFinite s -> HASHINT2 (PRIME3, E.hash s) + end) + include Node + let empty = make (Finite E.empty) + let any = make (CoFinite E.empty) + let finite x = make (Finite x) + let cofinite x = make (CoFinite x) + + let is_empty = function + | { node = Finite s } when E.is_empty s -> true + | _ -> false + + let is_any = function + | { node = CoFinite s } when E.is_empty s -> true + | _ -> false + + let is_finite t = match t.node with + | Finite _ -> true | _ -> false + + let kind t = match t.node with + | Finite _ -> `Finite + | _ -> `Cofinite + + let mem x t = match t.node with + | Finite s -> E.mem x s + | CoFinite s -> not (E.mem x s) + + let singleton x = finite (E.singleton x) + + let add e t = match t.node with + | Finite s -> finite (E.add e s) + | CoFinite s -> cofinite (E.remove e s) + + let remove e t = match t.node with + | Finite s -> finite (E.remove e s) + | CoFinite s -> cofinite (E.add e s) + + let union s t = match s.node, t.node with + | Finite s, Finite t -> finite (E.union s t) + | CoFinite s, CoFinite t -> cofinite ( E.inter s t) + | Finite s, CoFinite t -> cofinite (E.diff t s) + | CoFinite s, Finite t-> cofinite (E.diff s t) + + let inter s t = match s.node, t.node with + | Finite s, Finite t -> finite (E.inter s t) + | CoFinite s, CoFinite t -> cofinite (E.union s t) + | Finite s, CoFinite t -> finite (E.diff s t) + | CoFinite s, Finite t-> finite (E.diff t s) + + let diff s t = match s.node, t.node with + | Finite s, Finite t -> finite (E.diff s t) + | Finite s, CoFinite t -> finite(E.inter s t) + | CoFinite s, Finite t -> cofinite(E.union t s) + | CoFinite s, CoFinite t -> finite (E.diff t s) + + let complement t = match t.node with + | Finite s -> cofinite s + | CoFinite s -> finite s + + let compare s t = match s.node, t.node with + | Finite s , Finite t -> E.compare s t + | CoFinite s , CoFinite t -> E.compare t s + | Finite _, CoFinite _ -> -1 + | CoFinite _, Finite _ -> 1 + + let subset s t = match s.node, t.node with + | Finite s , Finite t -> E.subset s t + | CoFinite s , CoFinite t -> E.subset t s + | Finite s, CoFinite t -> E.is_empty (E.inter s t) + | CoFinite _, Finite _ -> false + + (* given a list l of type t list, + returns two sets (f,c) where : + - f is the union of all the finite sets of l + - c is the union of all the cofinite sets of l + - f and c are disjoint + Invariant : cup f c = List.fold_left cup empty l + We treat the CoFinite part explicitely : + *) + + let kind_split l = + + let rec next_finite_cofinite facc cacc = function + | [] -> finite facc, cofinite (E.diff cacc facc) + | { node = Finite s } ::r -> + next_finite_cofinite (E.union s facc) cacc r + | { node = CoFinite _ } ::r when E.is_empty cacc -> + next_finite_cofinite facc cacc r + | { node = CoFinite s } ::r -> + next_finite_cofinite facc (E.inter cacc s) r + in + let rec first_cofinite facc = function + | [] -> empty,empty + | { node = Finite s } :: r-> first_cofinite (E.union s facc) r + | { node = CoFinite s } :: r -> next_finite_cofinite facc s r + in + first_cofinite E.empty l + + let fold f t a = match t.node with + | Finite s -> E.fold f s a + | CoFinite _ -> raise Sigs.InfiniteSet + + let iter f t = match t.node with + | Finite t -> E.iter f t + | CoFinite _ -> raise Sigs.InfiniteSet + + let for_all f t = match t.node with + | Finite s -> E.for_all f s + | CoFinite _ -> raise Sigs.InfiniteSet + + let exists f t = match t.node with + | Finite s -> E.exists f s + | CoFinite _ -> raise Sigs.InfiniteSet + + let filter f t = match t.node with + | Finite s -> finite (E.filter f s) + | CoFinite _ -> raise Sigs.InfiniteSet + + let partition f t = match t.node with + | Finite s -> let a,b = E.partition f s in finite a,finite b + | CoFinite _ -> raise Sigs.InfiniteSet + + let cardinal t = match t.node with + | Finite s -> E.cardinal s + | CoFinite _ -> raise Sigs.InfiniteSet + + let elements t = match t.node with + | Finite s -> E.elements s + | CoFinite _ -> raise Sigs.InfiniteSet + + let from_list l = + finite (List.fold_left (fun x a -> E.add a x ) E.empty l) + + let choose t = match t.node with + Finite s -> E.choose s + | _ -> raise Sigs.InfiniteSet + + let is_singleton t = match t.node with + | Finite s -> E.is_singleton s + | _ -> false + + let intersect s t = match s.node, t.node with + | Finite s, Finite t -> E.intersect s t + | CoFinite s, Finite t -> not (E.subset t s) + | Finite s, CoFinite t -> not (E.subset s t) + | CoFinite s, CoFinite t -> true + + let split x s = match s.node with + | Finite s -> + let s1, b, s2 = E.split x s in + finite s1, b, finite s2 + + | _ -> raise Sigs.InfiniteSet + + let min_elt s = match s.node with + | Finite s -> E.min_elt s + | _ -> raise Sigs.InfiniteSet + + let max_elt s = match s.node with + | Finite s -> E.min_elt s + | _ -> raise Sigs.InfiniteSet + + let positive t = + match t.node with + | Finite x -> x + | _ -> E.empty + + let negative t = + match t.node with + | CoFinite x -> x + | _ -> E.empty + + let inj_positive t = finite t + let inj_negative t = cofinite t +end + +module Make = Builder(Hcons.Make) +module Weak = Builder(Hcons.Weak) diff --git a/src/finiteCofinite.mli b/src/finiteCofinite.mli new file mode 100644 index 0000000..198e6e2 --- /dev/null +++ b/src/finiteCofinite.mli @@ -0,0 +1,35 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Implementation of hashconsed finite or cofinite sets. +*) + +module type S = +sig + include Sigs.FiniteCofiniteSet + include Hcons.S with type t := t +end +(** Output signature of the {!FiniteCofinite.Make} and + {!FiniteCofinite.Weak} functors.*) + +module Make (E : Ptset.S) : S with type elt = E.elt and type set = E.t +(** Builds an implementation of hashconsed sets of hashconsed elements. + See {!Hcons.Make}. +*) + +module Weak (E : Ptset.S) : S with type elt = E.elt and type set = E.t +(** Builds an implementation of hashconsed sets of hashconsed elements + with weak internal storage. See {!Hcons.Weak}. +*) diff --git a/src/formula.ml b/src/formula.ml new file mode 100644 index 0000000..f7eae85 --- /dev/null +++ b/src/formula.ml @@ -0,0 +1,178 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) +INCLUDE "utils.ml" + +open Format +type move = [ `Left | `Right ] +type 'hcons expr = + | False | True + | Or of 'hcons * 'hcons + | And of 'hcons * 'hcons + | Atom of (move * bool * State.t) + +type 'hcons node = { + pos : 'hcons expr; + mutable neg : 'hcons; + st : StateSet.t * StateSet.t; + size: int; (* Todo check if this is needed *) +} + +external hash_const_variant : [> ] -> int = "%identity" +external vb : bool -> int = "%identity" + +module rec Node : Hcons.S + with type data = Data.t = Hcons.Make (Data) + and Data : Hashtbl.HashedType with type t = Node.t node = + struct + type t = Node.t node + let equal x y = x.size == y.size && + match x.pos, y.pos with + | a,b when a == b -> true + | Or(xf1, xf2), Or(yf1, yf2) + | And(xf1, xf2), And(yf1,yf2) -> (xf1 == yf1) && (xf2 == yf2) + | Atom(d1, p1, s1), Atom(d2 ,p2 ,s2) -> d1 == d2 && p1 == p2 && s1 == s2 + | _ -> false + + let hash f = + match f.pos with + | False -> 0 + | True -> 1 + | Or (f1, f2) -> + HASHINT3 (PRIME1, Uid.to_int f1.Node.id, Uid.to_int f2.Node.id) + | And (f1, f2) -> + HASHINT3(PRIME3, Uid.to_int f1.Node.id, Uid.to_int f2.Node.id) + + | Atom(d, p, s) -> HASHINT4(PRIME5, hash_const_variant d,vb p,s) + end + +type t = Node.t +let hash x = x.Node.key +let uid x = x.Node.id +let equal = Node.equal +let expr f = f.Node.node.pos +let st f = f.Node.node.st +let size f = f.Node.node.size +let compare f1 f2 = compare f1.Node.id f2.Node.id +let prio f = + match expr f with + | True | False -> 10 + | Atom _ -> 8 + | And _ -> 6 + | Or _ -> 1 + +let rec print ?(parent=false) ppf f = + if parent then fprintf ppf "("; + let _ = match expr f with + | True -> fprintf ppf "%s" Pretty.top + | False -> fprintf ppf "%s" Pretty.bottom + | And(f1,f2) -> + print ~parent:(prio f > prio f1) ppf f1; + fprintf ppf " %s " Pretty.wedge; + print ~parent:(prio f > prio f2) ppf f2; + | Or(f1,f2) -> + (print ppf f1); + fprintf ppf " %s " Pretty.vee; + (print ppf f2); + | Atom(dir, b, s) -> + let _ = flush_str_formatter() in + let fmt = str_formatter in + let a_str, d_str = + match dir with + | `Left -> Pretty.down_arrow, Pretty.subscript 1 + | `Right -> Pretty.down_arrow, Pretty.subscript 2 + in + fprintf fmt "%s%s" a_str d_str; + State.print fmt s; + let str = flush_str_formatter() in + if b then fprintf ppf "%s" str + else Pretty.pp_overline ppf str + in + if parent then fprintf ppf ")" + +let print ppf f = print ~parent:false ppf f + +let is_true f = (expr f) == True +let is_false f = (expr f) == False + + +let cons pos neg s1 s2 size1 size2 = + let nnode = Node.make { pos = neg; neg = (Obj.magic 0); st = s2; size = size2 } in + let pnode = Node.make { pos = pos; neg = nnode ; st = s1; size = size1 } in + (Node.node nnode).neg <- pnode; (* works because the neg field isn't taken into + account for hashing ! *) + pnode,nnode + + +let empty_pair = StateSet.empty, StateSet.empty +let true_,false_ = cons True False empty_pair empty_pair 0 0 +let atom_ d p s = + let si = StateSet.singleton s in + let ss = match d with + | `Left -> si, StateSet.empty + | `Right -> StateSet.empty, si + in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1) + +let not_ f = f.Node.node.neg + +let union_pair (l1,r1) (l2, r2) = + StateSet.union l1 l2, + StateSet.union r1 r2 + +let merge_states f1 f2 = + let sp = + union_pair (st f1) (st f2) + and sn = + union_pair (st (not_ f1)) (st (not_ f2)) + in + sp,sn + +let order f1 f2 = if uid f1 < uid f2 then f2,f1 else f1,f2 + +let or_ f1 f2 = + (* Tautologies: x|x, x|not(x) *) + + if equal f1 f2 then f1 + else if equal f1 (not_ f2) then true_ + + (* simplification *) + else if is_true f1 || is_true f2 then true_ + else if is_false f1 && is_false f2 then false_ + else if is_false f1 then f2 + else if is_false f2 then f1 + + (* commutativity of | *) + else + let f1, f2 = order f1 f2 in + let psize = (size f1) + (size f2) in + let nsize = (size (not_ f1)) + (size (not_ f2)) in + let sp, sn = merge_states f1 f2 in + fst (cons (Or(f1,f2)) (And(not_ f1, not_ f2)) sp sn psize nsize) + + +let and_ f1 f2 = + not_ (or_ (not_ f1) (not_ f2)) + + +let of_bool = function true -> true_ | false -> false_ + + +module Infix = struct + let ( +| ) f1 f2 = or_ f1 f2 + + let ( *& ) f1 f2 = and_ f1 f2 + + let ( *+ ) d s = atom_ d true s + let ( *- ) d s = atom_ d false s +end diff --git a/src/formula.mli b/src/formula.mli new file mode 100644 index 0000000..46cbe65 --- /dev/null +++ b/src/formula.mli @@ -0,0 +1,92 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Implementation of hashconsed Boolean formulae *) + +type move = [ `Left |`Right ] +(** Direction for automata predicates *) + +type 'hcons expr = + False + | True + | Or of 'hcons * 'hcons + | And of 'hcons * 'hcons + | Atom of (move * bool * State.t) + +(** Partial internal representation of a formula, + used for pattern matching *) + +type t +(** Abstract type representing hashconsed formulae *) + +val hash : t -> int +(** Hash function for formulae *) + +val uid : t -> Uid.t +(** Returns a unique ID for formulae *) + +val equal : t -> t -> bool +(** Equality over formulae *) + +val expr : t -> t expr +(** Equality over formulae *) + +val st : t -> StateSet.t * StateSet.t +(** states occuring left and right, positively or negatively *) + +val compare : t -> t -> int +(** Comparison of formulae *) + +val size : t -> int +(** Syntactic size of the formula *) + +val print : Format.formatter -> t -> unit +(** Pretty printer *) + +val is_true : t -> bool +(** [is_true f] tests whether the formula is the atom True *) + +val is_false : t -> bool +(** [is_false f] tests whether the formula is the atom False *) + +val true_ : t +(** Atom True *) + +val false_ : t +(** Atom False *) + +val atom_ : move -> bool -> StateSet.elt -> t +(** [atom_ dir b q] creates a down_left or down_right atom for state + [q]. The atom is positive if [b == true]. +*) + +val not_ : t -> t +val or_ : t -> t -> t +val and_ : t -> t -> t +(** Boolean connective *) + +val of_bool : bool -> t +(** Convert an ocaml Boolean value to a formula *) + +module Infix : sig + val ( +| ) : t -> t -> t + val ( *& ) : t -> t -> t + val ( *+ ) : move -> StateSet.elt -> t + val ( *- ) : move -> StateSet.elt -> t +end +(** Module to facilitate infix notations of formulae. + Just [open Formla.Infix] and write: + [let f = `Left *+ q1 +| `Right *+ q2 in ...] +*) diff --git a/src/hcons.ml b/src/hcons.ml new file mode 100644 index 0000000..d421d03 --- /dev/null +++ b/src/hcons.ml @@ -0,0 +1,96 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +module type Abstract = + sig + type data + type t + val make : data -> t + val node : t -> data + val hash : t -> int + val uid : t -> Uid.t + val equal : t -> t -> bool + val init : unit -> unit + end + +type 'a node = { id : Uid.t; + key : int; + node : 'a } +module type S = +sig + type data + type t = private { id : Uid.t; + key : int; + node : data } + include Abstract with type data := data and type t := t +end + +module type TableBuilder = + functor + (H : Sigs.HashedType) -> + Sigs.HashSet with type data = H.t + +module Builder (TB : TableBuilder) (H : Sigs.HashedType) = +struct + type data = H.t + type t = { id : Uid.t; + key : int; + node : data } + let uid_make = ref (Uid.make_maker()) + let node t = t.node + let uid t = t.id + let hash t = t.key + let equal t1 t2 = t1 == t2 + module HN = + struct + type _t = t + type t = _t + let hash = hash + let equal x y = x == y || H.equal x.node y.node + end + module T = TB(HN) + + let pool = T.create 101 + let init () = + T.clear pool; + uid_make := Uid.make_maker () + + let make x = + let cell = { id = Uid.dummy; key = H.hash x; node = x } in + try + T.find pool cell + with + | Not_found -> + let cell = { cell with id = !uid_make(); } in + T.add pool cell; + cell +end + +module Make = Builder (Utils.HashSet) +module Weak = Builder (Weak.Make) + +module PosInt = +struct + type data = int + type t = int + let make v = + if v < 0 then raise (Invalid_argument "Hcons.PosInt.make") + else v + let node v = v + let hash v = v + let uid v = Uid.of_int v + let equal x y = x == y + let init () = () +end diff --git a/src/hcons.mli b/src/hcons.mli new file mode 100644 index 0000000..af5618e --- /dev/null +++ b/src/hcons.mli @@ -0,0 +1,82 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Implementation of generic hashconsing. *) + +module type Abstract = + sig + type data + (** The type of the data to be hashconsed *) + + type t + (** The type of hashconsed data *) + + val make : data -> t + (** [make v] internalizes the value [v], making it an hashconsed + value. + *) + + val node : t -> data + (** [node h] extract the original data from its hashconsed value + *) + + val hash : t -> int + (** [hash h] returns a hash of [h], such that for every [h1] and + [h2], [equal h1 h2] implies [hash h1 = hash h2]. + *) + + val uid : t -> Uid.t + (** [uid h] returns a unique identifier *) + val equal : t -> t -> bool + (** Equality between hashconsed values. Equivalent to [==] *) + + val init : unit -> unit + (** Initializes the internal storage. Any previously hashconsed + element is discarded. *) + + end +(** Abstract signature of a module implementing an hashconsed datatype *) + +module type S = +sig + type data + type t = private { id : Uid.t; + key : int; + node : data } + include Abstract with type data := data and type t := t +end + +(** Output signature of the functor {!Hcons.Make} *) + +module Make (H : Sigs.HashedType) : S with type data = H.t +(** Functor building an implementation of hashconsed values for a given + implementation of {!Sigs.HashedType}. Hashconsed values are + persistent: the are kept in memory even if no external reference + remain. Calling [init()] explicitely will reclaim the space. +*) + +module Weak (H : Sigs.HashedType) : S with type data = H.t +(** Functor building an implementation of hashconsed values for a given + implementation of {!Sigs.HashedType}. Hashconsed values have a + weak semantics: they may be reclaimed as soon as no external + reference to them exists. The space may still be reclaimed + explicitely by calling [init]. +*) + +module PosInt : Abstract with type data = int and type t = int +(** Compact implementation of hashconsed positive integer that + avoids boxing. [PosInt.make v] raises [Invalid_argument] if + [ v < 0 ] +*) diff --git a/src/pretty.ml b/src/pretty.ml new file mode 100644 index 0000000..02c413f --- /dev/null +++ b/src/pretty.ml @@ -0,0 +1,151 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +open Format + +exception InvalidUtf8Codepoint of int + +let subscripts = "₀₁₂₃₄₅₆₇₈₉" +let superscripts = "⁰¹²³⁴⁵⁶⁷⁸⁹" + +let char_length c = + let code = Char.code c in + if code <= 0x7f then 1 + else if 0xc2 <= code && code <= 0xdf then 2 + else if 0xe0 <= code && code <= 0xef then 3 + else if 0xf0 <= code && code <= 0xf4 then 4 + else raise (InvalidUtf8Codepoint code) + + +let next_char s i len = + let n = i + char_length s.[i] in + if n >= len then -1 else n + +let str_len s = + let len = String.length s in + let rec loop i acc = + if i == -1 then acc + else loop (next_char s i len) (acc+1) + in + loop 0 0 + +let length = str_len + +let get_char s i = + let len = String.length s in + let rec loop j count = + if count == i then String.sub s j (char_length s.[j]) + else loop (next_char s j len) (count+1) + in + loop 0 0 + + +let format_number digits i = + let s = string_of_int i in + let len = String.length s in + let buf = Buffer.create (len*4) in + for i = 0 to len - 1 do + let d = Char.code s.[i] - Char.code '0' in + Buffer.add_string buf (get_char digits d) + done; + Buffer.contents buf + +let rev_explode s = + let len = str_len s in + let rec loop i acc = + if i >= len then acc + else + loop (i+1) ((get_char s i) :: acc) + in + loop 0 [] + + +let explode s = List.rev (rev_explode s) + +let combine_all comp s = + let l = rev_explode s in + String.concat "" (List.fold_left (fun acc e -> comp::e::acc) [] l) + + +let subscript = format_number subscripts +let superscript = format_number superscripts +let down_arrow = "↓" +let up_arrow = "↑" +let right_arrow = "→" +let left_arrow = "←" +let epsilon = "ϵ" +let cap = "∩" +let cup = "∪" +let lnot = "¬" +let wedge = "∧" +let vee = "∨" +let top = "⊤" +let bottom = "⊥" +let dummy = "☠" +let double_right_arrow = "⇒" +let combining_overbar = "\204\133" +let combining_underbar = "\204\178" +let combining_stroke = "\204\182" +let combining_vertical_line = "\226\131\146" + + +let overline s = combine_all combining_overbar s +let underline s = combine_all combining_underbar s +let strike s = combine_all combining_stroke s + +let mk_repeater c = + let mk_str i = String.make i c in + let _table = Array.init 16 mk_str in + fun i -> try + if i < 16 then _table.(i) else mk_str i + with e -> print_int i; print_newline(); raise e +let padding = mk_repeater ' ' +let line = mk_repeater '_' + + + + +let ppf f fmt s = + pp_print_string fmt (f s) + +let pp_overline = ppf overline +let pp_underline = ppf underline +let pp_strike = ppf strike +let pp_subscript = ppf subscript +let pp_superscript = ppf superscript +let dummy_printer fmt () = () + +let pp_print_list ?(sep=dummy_printer) printer fmt l = + match l with + [] -> () + | [ e ] -> printer fmt e + | e :: es -> printer fmt e; List.iter + (fun x -> + sep fmt (); + fprintf fmt "%a" printer x) es + +let pp_print_array ?(sep=dummy_printer) printer fmt a = + pp_print_list ~sep:sep printer fmt (Array.to_list a) + +let print_list ?(sep=" ") printer fmt l = + let sep_printer fmt () = + pp_print_string fmt sep + in + pp_print_list ~sep:sep_printer printer fmt l + +let print_array ?(sep=" ") printer fmt a = + print_list ~sep:sep printer fmt (Array.to_list a) + + diff --git a/src/pretty.mli b/src/pretty.mli new file mode 100644 index 0000000..cb1b870 --- /dev/null +++ b/src/pretty.mli @@ -0,0 +1,51 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +exception InvalidUtf8Codepoint of int + +val subscript : int -> string +val superscript : int -> string +val down_arrow : string +val up_arrow : string +val right_arrow : string +val left_arrow : string +val epsilon : string +val cap : string +val cup : string +val lnot : string +val wedge : string +val vee : string +val top : string +val bottom : string +val dummy : string +val double_right_arrow : string +val overline : string -> string +val underline : string -> string +val strike : string -> string +val padding : int -> string +val line : int -> string +val length : string -> int +val pp_overline : Format.formatter -> string -> unit +val pp_underline : Format.formatter -> string -> unit +val pp_strike : Format.formatter -> string -> unit +val pp_subscript : Format.formatter -> int -> unit +val pp_superscript : Format.formatter -> int -> unit + +val pp_print_list : + ?sep:(Format.formatter -> unit -> unit) -> (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a list -> unit +val pp_print_array : + ?sep:(Format.formatter -> unit -> unit) -> (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a array -> unit +val print_list : ?sep:string -> (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a list -> unit +val print_array : ?sep:string -> (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a array -> unit diff --git a/src/ptset.ml b/src/ptset.ml new file mode 100644 index 0000000..9576993 --- /dev/null +++ b/src/ptset.ml @@ -0,0 +1,381 @@ +(* Original file : *) + +(***********************************************************************) +(* *) +(* Copyright (C) Jean-Christophe Filliatre *) +(* *) +(* This software is free software; you can redistribute it and/or *) +(* modify it under the terms of the GNU Library General Public *) +(* License version 2.1, with the special exception on linking *) +(* described in file http://www.lri.fr/~filliatr/ftp/ocaml/ds/LICENSE *) +(* *) +(* This software is distributed in the hope that it will be useful, *) +(* but WITHOUT ANY WARRANTY; without even the implied warranty of *) +(* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. *) +(* *) +(***********************************************************************) + +(* Modified by Kim Nguyen *) +(* The Patricia trees are themselves deeply hashconsed. The module + provides a Make (and Weak) functor to build hashconsed patricia + trees whose elements are Abstract hashconsed values. This allows + to build sets of integers without boxing them in an hacons structure +*) +INCLUDE "utils.ml" + +module type S = + sig + include Sigs.Set + include Hcons.S with type t := t + end + +module type HConsBuilder = + functor (H : Sigs.HashedType) -> Hcons.S with type data = H.t + +module Builder (HCB : HConsBuilder) (H : Hcons.Abstract) : + S with type elt = H.t = +struct + type elt = H.t + + type 'a set = + | Empty + | Leaf of elt + | Branch of int * int * 'a * 'a + + module rec Node : Hcons.S with type data = Data.t = HCB(Data) + and Data : Sigs.HashedType with type t = Node.t set = + struct + type t = Node.t set + let equal x y = + match x,y with + Empty,Empty -> true + | Leaf k1, Leaf k2 -> k1 == k2 + | Branch(b1,i1,l1,r1),Branch(b2,i2,l2,r2) -> + b1 == b2 && i1 == i2 && (Node.equal l1 l2) && (Node.equal r1 r2) + + | _ -> false + + let hash = function + | Empty -> 0 + | Leaf i -> HASHINT2 (PRIME1, Uid.to_int (H.uid i)) + | Branch (b,i,l,r) -> + HASHINT4(b, i, Uid.to_int l.Node.id, Uid.to_int r.Node.id) + end + + include Node + + let empty = Node.make Empty + + let is_empty s = (Node.node s) == Empty + + let branch p m l r = Node.make (Branch(p,m,l,r)) + + let leaf k = Node.make (Leaf k) + + (* To enforce the invariant that a branch contains two non empty + sub-trees *) + let branch_ne p m t0 t1 = + if (is_empty t0) then t1 + else if is_empty t1 then t0 else branch p m t0 t1 + + (******** from here on, only use the smart constructors ************) + + let zero_bit k m = (k land m) == 0 + + let singleton k = leaf k + + let is_singleton n = + match Node.node n with Leaf _ -> true + | _ -> false + + let mem (k:elt) n = + let kid = Uid.to_int (H.uid k) in + let rec loop n = match Node.node n with + | Empty -> false + | Leaf j -> k == j + | Branch (p, _, l, r) -> if kid <= p then loop l else loop r + in loop n + + let rec min_elt n = match Node.node n with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_,_,s,_) -> min_elt s + + let rec max_elt n = match Node.node n with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_,_,_,t) -> max_elt t + + let elements s = + let rec elements_aux acc n = match Node.node n with + | Empty -> acc + | Leaf k -> k :: acc + | Branch (_,_,l,r) -> elements_aux (elements_aux acc r) l + in + elements_aux [] s + + let mask k m = (k lor (m-1)) land (lnot m) + + let naive_highest_bit x = + assert (x < 256); + let rec loop i = + if i = 0 then 1 else if x lsr i = 1 then 1 lsl i else loop (i-1) + in + loop 7 + + let hbit = Array.init 256 naive_highest_bit +(* + external clz : int -> int = "caml_clz" "noalloc" + external leading_bit : int -> int = "caml_leading_bit" "noalloc" +*) + let highest_bit x = + try + let n = (x) lsr 24 in + if n != 0 then hbit.(n) lsl 24 + else let n = (x) lsr 16 in if n != 0 then hbit.(n) lsl 16 + else let n = (x) lsr 8 in if n != 0 then hbit.(n) lsl 8 + else hbit.(x) + with + _ -> raise (Invalid_argument ("highest_bit " ^ (string_of_int x))) + + let highest_bit64 x = + let n = x lsr 32 in if n != 0 then highest_bit n lsl 32 + else highest_bit x + + let branching_bit p0 p1 = highest_bit64 (p0 lxor p1) + + let join p0 t0 p1 t1 = + let m = branching_bit p0 p1 in + let msk = mask p0 m in + if zero_bit p0 m then + branch_ne msk m t0 t1 + else + branch_ne msk m t1 t0 + + let match_prefix k p m = (mask k m) == p + + let add k t = + let kid = Uid.to_int (H.uid k) in + assert (kid >=0); + let rec ins n = match Node.node n with + | Empty -> leaf k + | Leaf j -> if j == k then n else join kid (leaf k) (Uid.to_int (H.uid j)) n + | Branch (p,m,t0,t1) -> + if match_prefix kid p m then + if zero_bit kid m then + branch_ne p m (ins t0) t1 + else + branch_ne p m t0 (ins t1) + else + join kid (leaf k) p n + in + ins t + + let remove k t = + let kid = Uid.to_int(H.uid k) in + let rec rmv n = match Node.node n with + | Empty -> empty + | Leaf j -> if k == j then empty else n + | Branch (p,m,t0,t1) -> + if match_prefix kid p m then + if zero_bit kid m then + branch_ne p m (rmv t0) t1 + else + branch_ne p m t0 (rmv t1) + else + n + in + rmv t + + (* should run in O(1) thanks to Hash consing *) + + let equal a b = Node.equal a b + + let compare a b = (Uid.to_int (Node.uid a)) - (Uid.to_int (Node.uid b)) + + let rec merge s t = + if (equal s t) (* This is cheap thanks to hash-consing *) + then s + else + match Node.node s, Node.node t with + | Empty, _ -> t + | _, Empty -> s + | Leaf k, _ -> add k t + | _, Leaf k -> add k s + | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> + if m == n && match_prefix q p m then + branch p m (merge s0 t0) (merge s1 t1) + else if m > n && match_prefix q p m then + if zero_bit q m then + branch_ne p m (merge s0 t) s1 + else + branch_ne p m s0 (merge s1 t) + else if m < n && match_prefix p q n then + if zero_bit p n then + branch_ne q n (merge s t0) t1 + else + branch_ne q n t0 (merge s t1) + else + (* The prefixes disagree. *) + join p s q t + + + + + let rec subset s1 s2 = (equal s1 s2) || + match (Node.node s1,Node.node s2) with + | Empty, _ -> true + | _, Empty -> false + | Leaf k1, _ -> mem k1 s2 + | Branch _, Leaf _ -> false + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + subset l1 l2 && subset r1 r2 + else if m1 < m2 && match_prefix p1 p2 m2 then + if zero_bit p1 m2 then + subset l1 l2 && subset r1 l2 + else + subset l1 r2 && subset r1 r2 + else + false + + + let union s1 s2 = merge s1 s2 + (* Todo replace with e Memo Module *) + + let rec inter s1 s2 = + if equal s1 s2 + then s1 + else + match (Node.node s1,Node.node s2) with + | Empty, _ -> empty + | _, Empty -> empty + | Leaf k1, _ -> if mem k1 s2 then s1 else empty + | _, Leaf k2 -> if mem k2 s1 then s2 else empty + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + merge (inter l1 l2) (inter r1 r2) + else if m1 > m2 && match_prefix p2 p1 m1 then + inter (if zero_bit p2 m1 then l1 else r1) s2 + else if m1 < m2 && match_prefix p1 p2 m2 then + inter s1 (if zero_bit p1 m2 then l2 else r2) + else + empty + + let rec diff s1 s2 = + if equal s1 s2 + then empty + else + match (Node.node s1,Node.node s2) with + | Empty, _ -> empty + | _, Empty -> s1 + | Leaf k1, _ -> if mem k1 s2 then empty else s1 + | _, Leaf k2 -> remove k2 s1 + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + merge (diff l1 l2) (diff r1 r2) + else if m1 > m2 && match_prefix p2 p1 m1 then + if zero_bit p2 m1 then + merge (diff l1 s2) r1 + else + merge l1 (diff r1 s2) + else if m1 < m2 && match_prefix p1 p2 m2 then + if zero_bit p1 m2 then diff s1 l2 else diff s1 r2 + else + s1 + + +(*s All the following operations ([cardinal], [iter], [fold], [for_all], + [exists], [filter], [partition], [choose], [elements]) are + implemented as for any other kind of binary trees. *) + +let rec cardinal n = match Node.node n with + | Empty -> 0 + | Leaf _ -> 1 + | Branch (_,_,t0,t1) -> cardinal t0 + cardinal t1 + +let rec iter f n = match Node.node n with + | Empty -> () + | Leaf k -> f k + | Branch (_,_,t0,t1) -> iter f t0; iter f t1 + +let rec fold f s accu = match Node.node s with + | Empty -> accu + | Leaf k -> f k accu + | Branch (_,_,t0,t1) -> fold f t0 (fold f t1 accu) + + +let rec for_all p n = match Node.node n with + | Empty -> true + | Leaf k -> p k + | Branch (_,_,t0,t1) -> for_all p t0 && for_all p t1 + +let rec exists p n = match Node.node n with + | Empty -> false + | Leaf k -> p k + | Branch (_,_,t0,t1) -> exists p t0 || exists p t1 + +let rec filter pr n = match Node.node n with + | Empty -> empty + | Leaf k -> if pr k then n else empty + | Branch (p,m,t0,t1) -> branch_ne p m (filter pr t0) (filter pr t1) + +let partition p s = + let rec part (t,f as acc) n = match Node.node n with + | Empty -> acc + | Leaf k -> if p k then (add k t, f) else (t, add k f) + | Branch (_,_,t0,t1) -> part (part acc t0) t1 + in + part (empty, empty) s + +let rec choose n = match Node.node n with + | Empty -> raise Not_found + | Leaf k -> k + | Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *) + + +let split x s = + let coll k (l, b, r) = + if k < x then add k l, b, r + else if k > x then l, b, add k r + else l, true, r + in + fold coll s (empty, false, empty) + +(*s Additional functions w.r.t to [Set.S]. *) + +let rec intersect s1 s2 = (equal s1 s2) || + match (Node.node s1,Node.node s2) with + | Empty, _ -> false + | _, Empty -> false + | Leaf k1, _ -> mem k1 s2 + | _, Leaf k2 -> mem k2 s1 + | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> + if m1 == m2 && p1 == p2 then + intersect l1 l2 || intersect r1 r2 + else if m1 < m2 && match_prefix p2 p1 m1 then + intersect (if zero_bit p2 m1 then l1 else r1) s2 + else if m1 > m2 && match_prefix p1 p2 m2 then + intersect s1 (if zero_bit p1 m2 then l2 else r2) + else + false + + +let from_list l = List.fold_left (fun acc e -> add e acc) empty l + + +end + +module Make = Builder(Hcons.Make) +module Weak = Builder(Hcons.Weak) + +module PosInt + = + struct + include Make(Hcons.PosInt) + let print ppf s = + Format.pp_print_string ppf "{ "; + iter (fun i -> Format.fprintf ppf "%i " i) s; + Format.pp_print_string ppf "}"; + Format.pp_print_flush ppf () + end diff --git a/src/ptset.mli b/src/ptset.mli new file mode 100644 index 0000000..df38255 --- /dev/null +++ b/src/ptset.mli @@ -0,0 +1,34 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +module type S = + sig + include Sigs.Set + include Hcons.S with type t := t + end +(** Output signature of the {!Ptset.Make} and {!Ptset.Weak} functors. *) + +module Make (H : Hcons.Abstract) : S with type elt = H.t +(** Builds an implementation of hashconsed sets of hashconsed elements. + See {!Hcons.Make}. +*) + +module Weak (H : Hcons.Abstract) : S with type elt = H.t +(** Builds an implementation of hashconsed sets of hashconsed elements + with weak internal storage. See {!Hcons.Weak}. +*) + +module PosInt : Sigs.Set with type elt = int +(** Implementation of hashconsed sets of positive integers *) diff --git a/src/qName.ml b/src/qName.ml new file mode 100644 index 0000000..5d41943 --- /dev/null +++ b/src/qName.ml @@ -0,0 +1,33 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +include Hcons.Make (struct + include String + let hash s = Hashtbl.hash s + let equal s1 s2 = s1 = s2 +end) + +let print pp s = Format.fprintf pp "%s" s.node + +let of_string = make +let to_string = node + +let document = of_string "#document" +let text = of_string "#text" +let cdata_section = of_string "#cdata-section" +let comment = of_string "#comment" +let document_fragment = of_string "#document-fragment" +let attribute_map = of_string "#attribute-map" +let nil = of_string "#" diff --git a/src/qName.mli b/src/qName.mli new file mode 100644 index 0000000..36bf022 --- /dev/null +++ b/src/qName.mli @@ -0,0 +1,67 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Implementation of qualified names as hashconsed strings *) + +include Hcons.S with type data = string +include Sigs.Printable with type t := t + + +val of_string : string -> t +(** Utility function, equivalent to [make] *) + +val to_string : t -> string +(** Utility function, equivalent to [node] *) + + +(** Special constants, that denote the QName of nodes that are not + elements (using the nodeValue property of DOM for such nodes. +*) + +val document : t +(** Represents the QName of a document node. Equivalent to + [of_string "#document"] +*) + +val text : t +(** Represents the QName of a text node. Equivalent to + [of_string "#text"] +*) + +val cdata_section : t +(** Represents the QName of a document node. Equivalent to + [of_string "#cdata-section"] +*) + +val comment : t +(** Represents the QName of a comment node. Equivalent to + [of_string "#cdata-section"] +*) + +val document_fragment : t +(** Represents the QName of a document fragment. Equivalent to + [of_string "#document-fragment"] +*) + +val attribute_map : t +(** Represents the QName of a dummy document node holding the + attributes of the current element. Equivalent to + [of_string "#attribute-map"] +*) + +val nil : t +(** Represents the QName of a nil node. Equivalent to + [of_string "#"] +*) diff --git a/src/qNameSet.ml b/src/qNameSet.ml new file mode 100644 index 0000000..4035957 --- /dev/null +++ b/src/qNameSet.ml @@ -0,0 +1,18 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +include FiniteCofinite.Make(Ptset.Make(QName)) + +module Weak = FiniteCofinite.Weak(Ptset.Weak(QName)) diff --git a/src/qNameSet.mli b/src/qNameSet.mli new file mode 100644 index 0000000..71d1478 --- /dev/null +++ b/src/qNameSet.mli @@ -0,0 +1,22 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + + +(** Implementation of sets of Qualified Names that can be finite + or cofinite *) + +include FiniteCofinite.S with type elt = QName.t + +module Weak : FiniteCofinite.S with type elt = QName.t diff --git a/src/sigs.ml b/src/sigs.ml new file mode 100644 index 0000000..3664d4e --- /dev/null +++ b/src/sigs.ml @@ -0,0 +1,126 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Various generic signatures *) + +module type HashedType = +sig + type t + val hash : t -> int + (** [hash v] returns an integer in the range + [ 0 - 2^30-1 ] + *) + val equal : t -> t -> bool + (** Equality *) +end +(** Type equipped with an equality and hash function. + If [equal a b] then [(hash a) = (hash b)] +*) + + +module type OrderedType = +sig + type t + val compare : t -> t -> int + (** Total ordering on values of type [t]. + [compare a b] returns a negative number if [a] is strictly + smaller than [b], a positive number if [a] is strictly greater + than [b] and 0 if [a] is equal to [b]. + *) +end +(** Type equiped with a total ordering *) + +module type Printable = +sig + type t + val print : Format.formatter -> t -> unit +end + (** Type equiped with a pretty-printer *) + + +module type Type = +sig + type t + include HashedType with type t := t + include OrderedType with type t := t +end + (** Type with both total ordering and hashing functions. + All modules of that type must enforce than: + [equal a b] if and only if [compare a b = 0] + *) + +module type HashSet = +sig + type data + type t + val create : int -> t + val add : t -> data -> unit + val remove : t -> data -> unit + val find : t -> data -> data + val find_all : t -> data -> data list + val clear : t -> unit + val mem : t -> data -> bool +end +(** Signature of a simple HashSet module *) +module type Set = +sig + type elt + type t + val empty : t + val is_empty : t -> bool + val mem : elt -> t -> bool + val add : elt -> t -> t + val singleton : elt -> t + val remove : elt -> t -> t + val union : t -> t -> t + val inter : t -> t -> t + val diff : t -> t -> t + val compare : t -> t -> int + val equal : t -> t -> bool + val subset : t -> t -> bool + val iter : (elt -> unit) -> t -> unit + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val for_all : (elt -> bool) -> t -> bool + val exists : (elt -> bool) -> t -> bool + val filter : (elt -> bool) -> t -> t + val partition : (elt -> bool) -> t -> t * t + val cardinal : t -> int + val elements : t -> elt list + val min_elt : t -> elt + val max_elt : t -> elt + val choose : t -> elt + val split : elt -> t -> t * bool * t + val intersect : t -> t -> bool + val is_singleton : t -> bool + val from_list : elt list -> t +end + +exception InfiniteSet + +module type FiniteCofiniteSet = +sig + include Set + type set + val any : t + val is_any : t -> bool + val is_finite : t -> bool + val kind : t -> [ `Finite | `Cofinite ] + val complement : t -> t + val kind_split : t list -> t * t + val positive : t -> set + val negative : t -> set + val inj_positive : set -> t + val inj_negative : set -> t +end diff --git a/src/state.ml b/src/state.ml new file mode 100644 index 0000000..4fbfc1c --- /dev/null +++ b/src/state.ml @@ -0,0 +1,36 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +open Format + +type t = int +let make = + let id = ref ~-1 in + fun () -> incr id; !id + +let compare = (-) + +let equal = (==) + +external hash : t -> int = "%identity" + +let print fmt x = fprintf fmt "q%a" Pretty.pp_subscript x + +let dump fmt x = print fmt x + +let check x = + if x < 0 then failwith (Printf.sprintf "State: Assertion %i < 0 failed" x) + +let dummy = max_int diff --git a/src/state.mli b/src/state.mli new file mode 100644 index 0000000..ef54518 --- /dev/null +++ b/src/state.mli @@ -0,0 +1,27 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Implementation of states *) + +include Sigs.Type with type t = int + +val make : unit -> t +(** Generate a fresh state *) + +val dummy : t +(** Dummy state that can never be returned by [make ()] *) + +val print : Format.formatter -> t -> unit +(** Pretty printer *) diff --git a/src/stateSet.ml b/src/stateSet.ml new file mode 100644 index 0000000..47ad460 --- /dev/null +++ b/src/stateSet.ml @@ -0,0 +1,22 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +open Format + +include Ptset.Make (Hcons.PosInt) + +let print ppf s = + fprintf ppf "{ %a }" + (Pretty.print_list ~sep:" " (State.print)) (elements s) diff --git a/src/stateSet.mli b/src/stateSet.mli new file mode 100644 index 0000000..1a628a4 --- /dev/null +++ b/src/stateSet.mli @@ -0,0 +1,20 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Implementation of sets of states *) +include Ptset.S with type elt = int + +val print : Format.formatter -> t -> unit +(** Pretty printer *) diff --git a/src/test.ml b/src/test.ml new file mode 100644 index 0000000..770d4c5 --- /dev/null +++ b/src/test.ml @@ -0,0 +1,37 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + + +module F = Formula +(* to force ocaml build to add Formula to the dependency chain even if + we don't use it yet*) + +let doc = + let fd = open_in Sys.argv.(1) in + let d = Tree.load_xml_file fd in + close_in fd; d + + + +let query = XPath.parse Sys.argv.(2) + +open Format + +let () = + fprintf err_formatter "Query: %a\n%!" XPath.Ast.print query; + fprintf err_formatter "Document:\n%!"; + Tree.print_xml stderr doc (Tree.root doc); + exit 0 + diff --git a/src/tree.ml b/src/tree.ml new file mode 100644 index 0000000..c30427c --- /dev/null +++ b/src/tree.ml @@ -0,0 +1,204 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +type node = { + tag : QName.t; + preorder : int; + mutable data : string; + mutable first_child : node; + mutable next_sibling : node; + mutable parent: node; +} + + + +let rec nil = { + tag = QName.nil; + preorder = -1; + data = ""; + first_child = nil; + next_sibling = nil; + parent = nil; +} + +let dummy_tag = QName.of_string "#dummy" +let rec dummy = { + tag = dummy_tag; + preorder = -1; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = dummy; +} + + +type t = { + root : node; + (* TODO add other intersting stuff *) +} + + + +module Parser = +struct + + type context = { + mutable stack : node list; + text_buffer : Buffer.t; + mutable current_preorder : int; + } + + let print_node_ptr fmt n = + Format.fprintf fmt "%s" + (if n == nil then "" else + if n == dummy then "" else + "") + + let debug_node fmt node = + Format.fprintf fmt "{ tag=%s; preorder=%i; data=%s; first_child=%a; next_sibling=%a; parent=%a }" + (QName.to_string node.tag) + node.preorder + node.data + print_node_ptr node.first_child + print_node_ptr node.next_sibling + print_node_ptr node.parent + + + let debug_ctx fmt ctx = + Format.fprintf fmt "Current context: { preorder = %i\n; stack = \n%a\n }\n-------------\n" + ctx.current_preorder + (Pretty.print_list ~sep:";\n" debug_node) ctx.stack + + + let push n ctx = ctx.stack <- n :: ctx.stack + let pop ctx = + match ctx.stack with + [] -> failwith "XML parse error" + | e :: l -> ctx.stack <- l; e + + let top ctx = match ctx.stack with + | [] -> failwith "XML parse error" + | e :: _ -> e + + let next ctx = + let i = ctx.current_preorder in + ctx.current_preorder <- 1 + i; + i + + let is_left n = n.next_sibling == dummy + + let start_element_handler parser_ ctx tag attr_list = + let parent = top ctx in + let n = { tag = QName.of_string tag; + preorder = next ctx; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = parent; + } + in + if parent.first_child == dummy then parent.first_child <- n + else parent.next_sibling <- n; + push n ctx + + let rec consume_closing ctx n = + if n.next_sibling != dummy then + let _ = pop ctx in consume_closing ctx (top ctx) + + + let end_element_handler parser_ ctx tag = + let node = top ctx in + if node.first_child == dummy then node.first_child <- nil + else begin + node.next_sibling <- nil; + consume_closing ctx node + end + + let character_data_handler parser_ _ t text = + Buffer.add_string t text + + let create_parser () = + let ctx = { text_buffer = Buffer.create 512; + current_preorder = 0; + stack = [] } in + let parser_ = Expat.parser_create ~encoding:None in + Expat.set_start_element_handler parser_ (start_element_handler parser_ ctx); + Expat.set_end_element_handler parser_ (end_element_handler parser_ ctx); + push { tag = QName.document; + preorder = next ctx; + data = ""; + first_child = dummy; + next_sibling = dummy; + parent = nil; + } ctx; + (parser_, + fun () -> + let node = top ctx in + node.next_sibling <- nil; + consume_closing ctx node; + match ctx.stack with + [ root ] -> + root.next_sibling <- nil; + { root = root } + | _ -> raise (Expat.Expat_error Expat.UNCLOSED_TOKEN) + ) + + + let parse_string s = + let parser_, finalize = create_parser () in + Expat.parse parser_ s; + finalize () + + let parse_file fd = + let buffer = String.create 4096 in + let parser_, finalize = create_parser () in + let rec loop () = + let read = input fd buffer 0 4096 in + if read != 0 then + let () = Expat.parse_sub parser_ buffer 0 read in + loop () + in loop (); finalize () + +end + + +let load_xml_file = Parser.parse_file +let load_xml_string = Parser.parse_string + +let rec print_xml out tree_ node = + if node != nil then + let tag = QName.to_string node.tag in + output_char out '<'; + output_string out tag; + (* print attributes *) + if node.first_child == nil then output_string out "/>" + else begin + output_char out '>'; + print_xml out tree_ node.first_child; + output_string out "' + end; + print_xml out tree_ node.next_sibling + + + +let root t = t.root +let first_child _ n = n.first_child +let next_sibling _ n = n.next_sibling +let parent _ n = n.parent +let tag _ n = n.tag +let data _ n = n.data +let preorder _ n = n.preorder diff --git a/src/tree.mli b/src/tree.mli new file mode 100644 index 0000000..1d5d77b --- /dev/null +++ b/src/tree.mli @@ -0,0 +1,76 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Implementation of documents as binary trees *) + +type node +(** The type of a tree node *) + +type t +(** The type of trees *) + +val nil : node +(** Nil node, denoting the first/second child of a leaf or the parent of + the root *) + +val dummy : node +(** Dummy node that is guaranteed to never occur in any tree *) + +val load_xml_file : in_channel -> t +(** Takes a file descriptor and returns the XML data stored in the + corresponding file. Start at the current position in the file + descriptor (which is not necessarily the begining of file) +*) + +val load_xml_string : string -> t +(** Loads XML data stored in a string *) + +val print_xml : out_channel -> t -> node -> unit +(** Outputs the tree as an XML document on the given output_channel *) + + +val root : t -> node +(** Returns the root of the tree *) + +val first_child : t -> node -> node +(** [first_child t n] returns the first child of node [n] in tree [t]. + Returns [nil] if [n] is a leaf. Returns [nil] if [n == nil]. +*) + +val next_sibling : t -> node -> node +(** [next_sibling t n] returns the next_sibling of node [n] in tree [t]. + Returns [nil] if [n] is the last child of a node. + Returns [nil] if [n == nil]. +*) + +val parent : t -> node -> node +(** [next_sibling t n] returns the parent of node [n] in tree [t]. + Returns [nil] if [n] is the root of the tree. + Returns [nil] if [n == nil]. +*) + +val tag : t -> node -> QName.t +(** Returns the label of a given node *) + +val data : t -> node -> string +(** Returns the character data associated with a node. + The only node having character data are those whose label is + QName.text, QName.cdata_section or QName.comment + *) + +val preorder : t -> node -> int +(** Returns the position of a node in pre-order in the tree. The + root has preorder 0. [nil] has pre-order [-1]. +*) diff --git a/src/uid.ml b/src/uid.ml new file mode 100644 index 0000000..46b8658 --- /dev/null +++ b/src/uid.ml @@ -0,0 +1,29 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +type t = int + +exception Overflow +let make_maker () = + let _id = ref ~-1 in + fun () -> + incr _id; + let i = !_id in + if i < 0 then raise Overflow else i + +let dummy = -1 + +external to_int : t -> int = "%identity" +external of_int : int -> t= "%identity" diff --git a/src/uid.mli b/src/uid.mli new file mode 100644 index 0000000..3c05d19 --- /dev/null +++ b/src/uid.mli @@ -0,0 +1,41 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** This modules implements unique identifiers represented by integers *) + +type t = private int +(** The type of unique identifiers. *) + +exception Overflow +(** Raised when the internal counters for IDs overflows. *) + +val make_maker : unit -> (unit -> t) +(** Returns an uid generator. + [make_maker ()] returns a function that generates unique ids. Raises + [Overflow] if the internal counter overflows. +*) + +val dummy : t +(** A dummy identifier, guaranteed to be distinct from any value + returned by a generator. +*) + +external to_int : t -> int = "%identity" +(** Convert a unique id to an integer *) + +(**/**) + +external of_int : int -> t = "%identity" +(** May break the invariant, use with caution *) diff --git a/src/ulexer.ml b/src/ulexer.ml new file mode 100644 index 0000000..157e7d2 --- /dev/null +++ b/src/ulexer.ml @@ -0,0 +1,264 @@ +(******************************************************************************) +(* SXSI : XPath evaluator *) +(* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) +(* Copyright NICTA 2008 *) +(* Distributed under the terms of the LGPL (see LICENCE) *) +(******************************************************************************) +open Camlp4.PreCast + +module Loc = struct + type t = int * int + + let mk _ = (0,0) + let ghost = (-1,-1) + + let of_lexing_position _ = assert false + let to_ocaml_location _ = assert false + let of_ocaml_location _ = assert false + let of_lexbuf _ = assert false + let of_tuple _ = assert false + let to_tuple _ = assert false + + let merge (x1, x2) (y1, y2) = (min x1 y1, max x2 y2) + let join (x1, _) = (x1, x1) + let move _ _ _ = assert false + let shift _ _ = assert false + let move_line _ _ = assert false + let file_name _ = assert false + let start_line _ = assert false + let stop_line _ = assert false + let start_bol _ = assert false + let stop_bol _ = assert false + let start_off = fst + let stop_off = snd + let start_pos _ = assert false + let stop_pos _ = assert false + let is_ghost _ = assert false + let ghostify _ = assert false + let set_file_name _ = assert false + let strictly_before _ = assert false + let make_absolute _ = assert false + let print _ = assert false + let dump _ = assert false + let to_string _ = assert false + exception Exc_located of t * exn + let raise loc exn = + match exn with + | Exc_located _ -> raise exn + | _ -> raise (Exc_located (loc, exn)) + let name = ref "_loc" +end + +type token = + | TAG of string + | STRING of string + | INT of int + | KWD of string + | ATT of string + | EOI + +module Token = struct + open Format + module Loc = Loc + type t = token + type token = t + + let sf = Printf.sprintf + + let to_string = + function + | TAG s -> sf "TAG <%s>" s + | STRING s -> sf "STRING \"%s\"" s + | KWD s -> sf "KWD %s" s + | INT i -> sf "INT %i" i + | ATT s -> sf "ATT %s" s + | EOI -> sf "EOI" + + let print ppf x = pp_print_string ppf (to_string x) + + let match_keyword kwd = + function + | KWD kwd' when kwd = kwd' -> true + | _ -> false + + let extract_string = + function + | KWD s | STRING s | TAG s | ATT s -> s + | INT i -> string_of_int i + | tok -> + invalid_arg ("Cannot extract a string from this token: "^ + to_string tok) + + module Error = struct + type t = string + exception E of string + let print = pp_print_string + let to_string x = x + end + + module Filter = struct + type token_filter = (t, Loc.t) Camlp4.Sig.stream_filter + + type t = + { is_kwd : string -> bool; + mutable filter : token_filter } + + let mk is_kwd = + { is_kwd = is_kwd; + filter = (fun s -> s) } + + let filter x = + let f tok loc = + let tok' = tok in + (tok', loc) + in + let rec filter = + parser + | [< '(tok, loc); s >] -> [< ' f tok loc; filter s >] + | [< >] -> [< >] + in + fun strm -> x.filter (filter strm) + + let define_filter x f = x.filter <- f x.filter + + let keyword_added _ _ _ = () + let keyword_removed _ _ = () + end + +end +module Error = Camlp4.Struct.EmptyError + +module L = Ulexing + +exception Error of int * int * string + +let error i j s = raise (Error (i,j,s)) + +(***********************************************************) +(* Buffer for string literals *) + +let string_buff = Buffer.create 1024 + +let store_lexeme lexbuf = + Buffer.add_string string_buff (Ulexing.utf8_lexeme lexbuf) +let store_ascii = Buffer.add_char string_buff +let store_code = Utf8.store string_buff +let clear_buff () = Buffer.clear string_buff +let get_stored_string () = + let s = Buffer.contents string_buff in + clear_buff (); + Buffer.clear string_buff; + s + +(***********************************************************) +(* Lexer *) + +let illegal lexbuf = + error + (L.lexeme_start lexbuf) + (L.lexeme_end lexbuf) + "Illegal character" + +let return lexbuf tok = (tok, L.loc lexbuf) +let return_loc i j tok = (tok, (i,j)) + +let regexp ncname_char = + xml_letter | xml_digit | [ '-' '_' ] | xml_combining_char | xml_extender | "\\." + +let hexa_digit = function + | '0'..'9' as c -> (Char.code c) - (Char.code '0') + | 'a'..'f' as c -> (Char.code c) - (Char.code 'a') + 10 + | 'A'..'F' as c -> (Char.code c) - (Char.code 'A') + 10 + | _ -> -1 + +let regexp ncname = ( xml_letter ncname_char* ) | ('_' ncname_char+) + +let parse_char lexbuf base i = + let s = L.latin1_sub_lexeme lexbuf i (L.lexeme_length lexbuf - i - 1) in + let r = ref 0 in + for i = 0 to String.length s - 1 do + let c = hexa_digit s.[i] in + if (c >= base) || (c < 0) then + error (L.lexeme_start lexbuf) (L.lexeme_end lexbuf) "invalid digit"; + r := !r * base + c; + done; + !r + +let rec token = lexer + | [' ' '\t' '\n'] -> token lexbuf + | "and" | "not" | "or" | "text()" | "node()" + | "self" | "descendant" | "child" | "descendant-or-self" + | "attribute" | "following-sibling" | "preceding-sibling" + | "parent" | "ancestor" | "ancestor-or-self" | "preceding" | "following" + | "(" |")" | "," | "::" | "/" | "//" | "[" | "]" | "*" | "." | ".." | "@" + -> return lexbuf (KWD (L.utf8_lexeme lexbuf)) + | ncname -> return lexbuf (TAG(L.utf8_lexeme lexbuf)) + | '-'? ['0'-'9']+ -> let i = INT (int_of_string(L.utf8_lexeme lexbuf)) in return lexbuf i + | '"' | "'" -> + let start = L.lexeme_start lexbuf in + let double_quote = L.latin1_lexeme_char lexbuf 0 = '"' in + string (L.lexeme_start lexbuf) double_quote lexbuf; + let s = get_stored_string () in + return_loc start (L.lexeme_end lexbuf) (STRING s) + + | eof -> return lexbuf EOI + | _ -> illegal lexbuf + +and string start double = lexer + | '"' | "'" -> + let d = L.latin1_lexeme_char lexbuf 0 = '"' in + if d != double then (store_lexeme lexbuf; string start double lexbuf) + | '\\' ['\\' '"' '\''] -> + store_ascii (L.latin1_lexeme_char lexbuf 1); + string start double lexbuf + | "\\n" -> + store_ascii '\n'; string start double lexbuf + | "\\t" -> + store_ascii '\t'; string start double lexbuf + | "\\r" -> + store_ascii '\r'; string start double lexbuf + | '\\' ['0'-'9']+ ';' -> + store_code (parse_char lexbuf 10 1); + string start double lexbuf + | '\\' 'x' ['0'-'9' 'a'-'f' 'A'-'F']+ ';' -> + store_code (parse_char lexbuf 16 2); + string start double lexbuf + | '\\' -> + illegal lexbuf; + | eof -> + error start (start+1) "Unterminated string" + | _ -> + store_lexeme lexbuf; + string start double lexbuf + + + +(***********************************************************) + +let enc = ref L.Latin1 +let lexbuf = ref None +let last_tok = ref (KWD "DUMMY") + +let raise_clean e = + clear_buff (); + raise e + +let mk () _loc cs = + let lb = L.from_var_enc_stream enc cs in + lexbuf := Some lb; + let next _ = + let tok, loc = + try token lb + with + | Ulexing.Error -> + raise_clean (Error (Ulexing.lexeme_end lb, Ulexing.lexeme_end lb, + "Unexpected character")) + | Ulexing.InvalidCodepoint i -> + raise_clean (Error (Ulexing.lexeme_end lb, Ulexing.lexeme_end lb, + "Code point invalid for the current encoding")) + | e -> raise_clean e + in + last_tok := tok; + Some (tok, loc) + in + Stream.from next diff --git a/src/ulexer.mli b/src/ulexer.mli new file mode 100644 index 0000000..6fdac83 --- /dev/null +++ b/src/ulexer.mli @@ -0,0 +1,21 @@ +(******************************************************************************) +(* SXSI : XPath evaluator *) +(* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) +(* Copyright NICTA 2008 *) +(* Distributed under the terms of the LGPL (see LICENCE) *) +(******************************************************************************) +open Camlp4.Sig + +exception Error of int*int*string +type token = + TAG of string + | STRING of string + | INT of int + | KWD of string + | ATT of string + | EOI +module Loc : Loc with type t = int * int +module Token : Token with module Loc = Loc and type t = token +module Error : Error + +val mk : unit -> (Loc.t -> char Stream.t -> (Token.t * Loc.t) Stream.t) diff --git a/src/utils.ml b/src/utils.ml new file mode 100644 index 0000000..2c3772d --- /dev/null +++ b/src/utils.ml @@ -0,0 +1,48 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +(** Various generic signatures and generic module and functor definitions +*) +INCLUDE "utils.ml" + + +module HashSet (H : Hashtbl.HashedType) : + Sigs.HashSet with type data = H.t = +struct + module T = Hashtbl.Make(H) + type data = H.t + type t = data T.t + let create = T.create + let add h v = T.add h v v + let find = T.find + let remove = T.remove + let find_all = T.find_all + let clear = T.clear + let mem = T.mem +end + +module Pair (X : Sigs.Type) (Y : Sigs.Type) : + Sigs.Type with type t = X.t * Y.t = +struct + type t = X.t * Y.t + let hash (x, y) = HASHINT2(X.hash x, Y.hash y) + let compare (x1, y1) (x2, y2) = + let r = X.compare x1 x2 in + if r != 0 then r else Y.compare y1 y2 + let equal p1 p2 = + p1 == p2 || + let x1, y1 = p1 + and x2, y2 = p2 in X.equal x1 x2 && Y.equal y1 y2 +end diff --git a/src/xPath.ml b/src/xPath.ml new file mode 100644 index 0000000..cb3393f --- /dev/null +++ b/src/xPath.ml @@ -0,0 +1,371 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + +#load "pa_extend.cmo";; +module Ast = +struct + + type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list + and step = axis * test *predicate + and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling + | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following + + and test = Simple of QNameSet.t + + + and predicate = Or of predicate*predicate + | And of predicate*predicate + | Not of predicate + | Expr of expression + and expression = Path of path + | Function of string*expression list + | Int of int + | String of string + | True | False + type t = path + + + let text = QNameSet.singleton QName.text + let node = QNameSet.any + let star = + QNameSet.complement ( + QNameSet.from_list [ QName.text; + QName.document; + QName.cdata_section; + QName.comment]) + let t_text = Simple text + let t_node = Simple node + let t_star = Simple star + + + + let pp fmt = Format.fprintf fmt + let print_list printer fmt sep l = + match l with + [] -> () + | [e] -> printer fmt e + | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es + + + let rec print fmt p = + let l = match p with + | Absolute l -> pp fmt "/"; l + | AbsoluteDoS l -> pp fmt "/"; + print_step fmt (DescendantOrSelf,Simple QNameSet.any,Expr True); + pp fmt "/"; l + | Relative l -> l + in + Pretty.print_list ~sep:"/" print_step fmt l + and print_step fmt (axis, test, predicate) = + print_axis fmt axis;pp fmt "::";print_test fmt test; + match predicate with + Expr True -> () + | _ -> pp fmt "["; print_predicate fmt predicate; pp fmt "]" + and print_axis fmt a = pp fmt "%s" (match a with + Self -> "self" + | Child -> "child" + | Descendant -> "descendant" + | DescendantOrSelf -> "descendant-or-self" + | FollowingSibling -> "following-sibling" + | Attribute -> "attribute" + | Ancestor -> "ancestor" + | AncestorOrSelf -> "ancestor-or-self" + | PrecedingSibling -> "preceding-sibling" + | Parent -> "parent" + | _ -> assert false + ) + and print_test fmt ts = + try + pp fmt "%s" (List.assoc ts + [ t_text,"text()"; + t_node,"node()"; + t_star, "*" ] ) + with + Not_found -> pp fmt "%s" + (match ts with + Simple t -> if QNameSet.is_finite t + then QName.to_string (QNameSet.choose t) + else "" + ) + + and print_predicate fmt = function + | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q + | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q + | Not p -> pp fmt "not "; print_predicate fmt p + | Expr e -> print_expression fmt e + + and print_expression fmt = function + | Path p -> print fmt p + | Function (f,l) -> + pp fmt "%s(" f; + Pretty.print_list ~sep:"," print_expression fmt l; + pp fmt ")" + | Int i -> pp fmt "%i" i + | String s -> pp fmt "\"%s\"" s + | t -> pp fmt "%b" (t== True) + +end +module Parser = +struct + open Ast + open Ulexer + let predopt = function None -> Expr True | Some p -> p + + module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer) + let query = Gram.Entry.mk "query" + + exception Error of Gram.Loc.t*string + let test_of_keyword t loc = + match t with + | "text()" -> text + | "node()" -> node + | "*" -> star + | "and" | "not" | "or" -> QNameSet.singleton (QName.of_string t) + | _ -> raise (Error(loc,"Invalid test name "^t )) + + let axis_to_string a = let r = Format.str_formatter in + print_axis r a; Format.flush_str_formatter() + + + + +EXTEND Gram + +GLOBAL: query; + + query : [ [ p = path; `EOI -> p ]] +; + + path : [ + [ "//" ; l = slist -> AbsoluteDoS (List.rev l) ] + | [ "/" ; l = slist -> Absolute (List.rev l) ] + | [ l = slist -> Relative (List.rev l) ] + ] +; + +slist: [ + [ l = slist ;"/"; s = step -> s @ l ] +| [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, t_node ,Expr True)]@l] +| [ s = step -> s ] +]; + +step : [ + (* yurk, this is done to parse stuff like + a/b/descendant/a where descendant is actually a tag name :( + if OPT is None then this is a child::descendant if not, this is a real axis name + *) + + +[ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred -> + let a,t,p = + match o with + | Some(t) -> (axis,t,p) + | None -> (Child,Simple (QNameSet.singleton (QName.of_string (axis_to_string axis))),p) + in match a with + | Following -> [ (DescendantOrSelf,t,p); + (FollowingSibling, t_star,Expr(True)); + (Ancestor, t_star ,Expr(True)) ] + + | Preceding -> [ (DescendantOrSelf,t,p); + (PrecedingSibling,t_star,Expr(True)); + (Ancestor,t_star,Expr(True)) ] + | _ -> [ a,t,p ] + +] + +| [ "." ; p = top_pred -> [(Self, t_node,p)] ] +| [ ".." ; p = top_pred -> [(Parent,t_star,p)] ] +| [ test = test; p = top_pred -> [(Child,test, p)] ] +| [ att = ATT ; p = top_pred -> + match att with + | "*" -> [(Attribute,t_star,p)] + | _ -> [(Attribute, Simple (QNameSet.singleton (QName.of_string att)) ,p )]] +] +; +top_pred : [ + [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ] +] +; +axis : [ + [ "self" -> Self | "child" -> Child | "descendant" -> Descendant + | "descendant-or-self" -> DescendantOrSelf + | "ancestor-or-self" -> AncestorOrSelf + | "following-sibling" -> FollowingSibling + | "attribute" -> Attribute + | "parent" -> Parent + | "ancestor" -> Ancestor + | "preceding-sibling" -> PrecedingSibling + | "preceding" -> Preceding + | "following" -> Following + ] + + +]; +test : [ + [ s = KWD -> Simple (test_of_keyword s _loc) ] +| [ t = TAG -> Simple (QNameSet.singleton (QName.of_string t)) ] +]; + + +predicate: [ + + [ p = predicate; "or"; q = predicate -> Or(p,q) ] +| [ p = predicate; "and"; q = predicate -> And(p,q) ] +| [ "not" ; p = predicate -> Not p ] +| [ "("; p = predicate ;")" -> p ] +| [ e = expression -> Expr e ] +]; + +expression: [ + [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)] +| [ `INT(i) -> Int (i) ] +| [ s = STRING -> String s ] +| [ p = path -> Path p ] +| [ "("; e = expression ; ")" -> e ] +] +; +END +;; +(* + +GLOBAL: query; + + query : [ [ p = location_path; `EOI -> p ]] +; + + + location_path : [ + [ "/" ; l = OPT relative_location_path -> + let l = match l with None -> [] | Some l' -> l' in Absolute l ] + | [ l = relative_location_path -> Relative l ] + | [ l = abbrev_absolute_location_path -> l ] + + ] +; + + relative_location_path : [ + [ s = step -> [ s ] ] + | [ l = relative_location_path ; "/"; s = step -> l @ [ s ] ] + | [ l = abbrev_relative_location_path -> l ] + ] +; + + + step : [ + [ a = axis_specifier ; n = node_test ; p = OPT predicate -> + let p = match p with Some p' -> p' | None -> Expr(True) in + a, n, p + ] + | [ a = abbrev_step -> a ] + ] +; + axis_specifier : [ + [ a = axis_name ; "::" -> a ] + | [ a = abbrev_axis_specifier -> a ] + ]; + + axis_name : [ + [ "self" -> Self | "child" -> Child | "descendant" -> Descendant + | "descendant-or-self" -> DescendantOrSelf + | "ancestor-or-self" -> AncestorOrSelf + | "following-sibling" -> FollowingSibling + | "attribute" -> Attribute + | "parent" -> Parent + | "ancestor" -> Ancestor + | "preceding-sibling" -> PrecedingSibling + | "preceding" -> Preceding + | "following" -> Following + ] + ] +; + node_test : [ + [ n = name_test -> n ] + | [ n = node_type ; "("; ")" -> n ] + (* | [ "processing-instruction" ; "(" ... ")" ] *) + ] +; + name_test : [ + [ "*" -> Simple(TagSet.star) ] + | [ t = axis_name -> Simple(TagSet.singleton (Tag.tag (axis_to_string t))) ] + | [ t = TAG -> Simple(TagSet.singleton (Tag.tag t)) ] + ] +; + node_type : [ + [ "text" -> Simple(TagSet.pcdata) ] + | [ "node" -> Simple(TagSet.node) ] + ] +; + predicate : [ + [ "["; e = expr ; "]" -> e ] + ] +; + abbrev_absolute_location_path : [ + [ "//"; l = relative_location_path -> AbsoluteDoS l ] + ]; + + abbrev_relative_location_path : [ + [ l = relative_location_path; "//"; s = step -> + l @ [ (DescendantOrSelf,Simple(TagSet.node),Expr(True)); s ] + ] + ]; + + abbrev_step : [ + [ "." -> (Self, Simple(TagSet.node), Expr(True)) ] + | [ ".." -> (Parent, Simple(TagSet.node), Expr(True)) ] + ]; + + abbrev_axis_specifier: [ + [ a = OPT "@" -> match a with None -> Attribute | _ -> Child ] + ]; + + expr : [ + [ o = or_expr -> o ] + ]; + + primary_expr : [ + [ "("; e = expr ; ")" -> e ] + | [ s = STRING -> Expr (String s) ] + | [ `INT(i) -> Expr (Int (i)) ] + | [ f = TAG; "("; args = LIST0 expr SEP "," ; ")" -> + Expr(Function(f, List.map (function Expr e -> e | _ -> assert false) args))] + ] +; + + or_expr : [ + [ o1 = or_expr ; "or" ; o2 = and_expr -> Or(o1, o2) ] + | [ a = and_expr -> a ] + ] + ; + + and_expr : [ + [ a1 = and_expr; "and"; a2 = unary_expr -> And(a1, a2) ] + | [ p = unary_expr -> p ] + ] +; + unary_expr : [ + [ l = location_path -> Expr(Path l) ] + | [ "not"; "("; e = expr ; ")" -> Not e ] + | [ p = primary_expr -> p ] + + ]; + +END +;; + +*) + + let parse = Gram.parse_string query (Ulexer.Loc.mk "") +end +let parse = Parser.parse diff --git a/src/xPath.mli b/src/xPath.mli new file mode 100644 index 0000000..44935c3 --- /dev/null +++ b/src/xPath.mli @@ -0,0 +1,49 @@ +(***********************************************************************) +(* *) +(* TAToo *) +(* *) +(* Kim Nguyen, LRI UMR8623 *) +(* Université Paris-Sud & CNRS *) +(* *) +(* Copyright 2010-2012 Université Paris-Sud and Centre National de la *) +(* Recherche Scientifique. All rights reserved. This file is *) +(* distributed under the terms of the GNU Lesser General Public *) +(* License, with the special exception on linking described in file *) +(* ../LICENSE. *) +(* *) +(***********************************************************************) + + +(** Abstract syntax tree of XPath queries *) + +module Ast : +sig + type path = Absolute of step list | AbsoluteDoS of step list + | Relative of step list + + and step = axis * test * predicate + and axis = Self | Attribute | Child | Descendant | DescendantOrSelf + | FollowingSibling + | Parent | Ancestor | AncestorOrSelf |PrecedingSibling + | Preceding | Following + and test = Simple of QNameSet.t + and predicate = Or of predicate*predicate + | And of predicate*predicate + | Not of predicate + | Expr of expression + and expression = Path of path + | Function of string*expression list + | Int of int + | String of string + | True | False + type t = path + val print : Format.formatter -> path -> unit + val print_step : Format.formatter -> step -> unit + val print_axis : Format.formatter -> axis -> unit + val print_test : Format.formatter -> test -> unit + val print_predicate : Format.formatter -> predicate -> unit + val print_expression : Format.formatter -> expression -> unit +end + + +val parse : string -> Ast.path diff --git a/tests/docs/tiny.xml b/tests/docs/tiny.xml new file mode 100644 index 0000000..647d3fc --- /dev/null +++ b/tests/docs/tiny.xml @@ -0,0 +1,10 @@ + + + + + + + + + +