(******************************************************************************) (* SXSI : XPath evaluator *) (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) INCLUDE "debug.ml";; #load "pa_extend.cmo";; module Ast = struct type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list and step = axis*test*predicate and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling | Parent and test = TagSet.Xml.t and predicate = Or of predicate*predicate | And of predicate*predicate | Not of predicate | Expr of expression and expression = Path of path | Function of string*expression list | Int of int | String of string | True | False type t = path let pp fmt = Format.fprintf fmt let print_list printer fmt sep l = match l with [] -> () | [e] -> printer fmt e | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es let rec print fmt p = let l = match p with | Absolute l -> pp fmt "/"; l | AbsoluteDoS l -> pp fmt "/"; print_step fmt (DescendantOrSelf,TagSet.Xml.node,Expr True); pp fmt "/"; l | Relative l -> l in print_list print_step fmt "/" (List.rev l) and print_step fmt (axis,test,predicate) = print_axis fmt axis;pp fmt "::";print_test fmt test; pp fmt "["; print_predicate fmt predicate; pp fmt "]" and print_axis fmt a = pp fmt "%s" (match a with Self -> "self" | Child -> "child" | Descendant -> "descendant" | DescendantOrSelf -> "descendant-or-self" | FollowingSibling -> "following-sibling" | Attribute -> "attribute" | Parent -> "parent") and print_test fmt ts = try pp fmt "%s" (List.assoc ts [ (TagSet.Xml.pcdata,"text()"); (TagSet.Xml.node,"node()"); (TagSet.Xml.star),"*"]) with Not_found -> pp fmt "%s" (if TagSet.Xml.is_finite ts then Tag.to_string (TagSet.Xml.choose ts) else "") and print_predicate fmt = function | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q | Not p -> pp fmt "not "; print_predicate fmt p | Expr e -> print_expression fmt e and print_expression fmt = function | Path p -> print fmt p | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")" | Int i -> pp fmt "%i" i | String s -> pp fmt "\"%s\"" s | t -> pp fmt "%b" (t== True) end module Parser = struct open Ast open Ulexer let predopt = function None -> Expr True | Some p -> p module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer) let query = Gram.Entry.mk "query" exception Error of Gram.Loc.t*string let test_of_keyword t loc = match t with | "text()" -> TagSet.Xml.pcdata | "node()" -> TagSet.Xml.node | "*" -> TagSet.Xml.star | "and" | "not" | "or" -> TagSet.Xml.singleton (Tag.tag t) | _ -> raise (Error(loc,"Invalid test name "^t )) let axis_to_string a = let r = Format.str_formatter in print_axis r a; Format.flush_str_formatter() EXTEND Gram GLOBAL: query; query : [ [ p = path; `EOI -> p ]] ; path : [ [ "//" ; l = slist -> AbsoluteDoS l ] | [ "/" ; l = slist -> Absolute l ] | [ l = slist -> Relative l ] ] ; slist: [ [ l = slist ;"/"; s = step -> s::l ] | [ l = slist ; "//"; s = step -> s::(DescendantOrSelf,TagSet.Xml.node,Expr True)::l] | [ s = step -> [ s ] ] ]; step : [ (* yurk, this is done to parse stuff like a/b/descendant/a where descendant is actually a tag name :( if OPT is None then this is a child::descendant if not, this is a real axis name *) [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred -> match o with | Some(t) -> (axis,t,p) | None -> (Child,TagSet.Xml.singleton (Tag.tag (axis_to_string axis)),p) ] | [ "." ; p = top_pred -> (Self,TagSet.Xml.node,p) ] | [ test = test; p = top_pred -> (Child,test, p) ] | [ att = ATT ; p = top_pred -> match att with | "*" -> (Attribute,TagSet.Xml.star,p) | _ -> (Attribute, TagSet.Xml.singleton (Tag.tag att) ,p )] ] ; top_pred : [ [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ] ] ; axis : [ [ "self" -> Self | "child" -> Child | "descendant" -> Descendant | "descendant-or-self" -> DescendantOrSelf | "following-sibling" -> FollowingSibling | "attribute" -> Attribute | "parent" -> Parent ] ]; test : [ [ s = KWD -> test_of_keyword s _loc ] | [ t = TAG -> TagSet.Xml.singleton (Tag.tag t) ] ]; predicate: [ [ p = predicate; "or"; q = predicate -> Or(p,q) ] | [ p = predicate; "and"; q = predicate -> And(p,q) ] | [ "not" ; p = predicate -> Not p ] | [ "("; p = predicate ;")" -> p ] | [ e = expression -> Expr e ] ]; expression: [ [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)] | [ `INT(i) -> Int (i) ] | [ s = STRING -> String s ] | [ p = path -> Path p ] | [ "("; e = expression ; ")" -> e ] ] ; END ;; let parse_string = Gram.parse_string query (Ulexer.Loc.mk "") let parse = Gram.parse_string query (Ulexer.Loc.mk "") end module Functions = struct type value = [ `NodeSet of Automaton.BST.t | `Int of int | `String of string | `Bool of bool | `True | `False ] type expr = [ value | `Call of (string*(expr list)) | `Auto of Automaton.t | `Contains of expr list ] let count = function [`NodeSet(s) ] -> `Int(Automaton.BST.cardinal s) | _ -> failwith "count" let contains_old = function [`NodeSet(s) ; `String(str) ] -> `Bool(Automaton.BST.exists (fun tree -> Tree.Binary.contains_old tree str ) s) | _ -> failwith "contains_old" let equal = function [ `Int i; `Int j ] -> `Bool (i == j) |_ -> failwith "equal" let globals : (string*(value list -> value)) list = [ ("count",count); ("equal",equal); ("contains_old",contains_old); ] let text t = Tree.Binary.string (Tree.Binary.left t) let rec eval_expr tree (e:expr) : value = match e with | `Call (f,args) -> (List.assoc f globals) (List.map (eval_expr tree) args) | `Auto(a) -> `NodeSet(ignore (Automaton.BottomUp.accept a tree); a.Automaton.result) | `Contains(args) -> begin match args with [ `Auto(a); `String(s) ] -> let docs = Tree.Binary.contains tree s in let _ = Automaton.BottomUp.accept ~strings:(Some docs) a tree in `NodeSet(a.Automaton.result) | _ -> failwith "contains invalid" end | #value as x -> x let truth_value = function `NodeSet s -> not (Automaton.BST.is_empty s) |`Bool(b) -> b | _ -> failwith "truth_value" end module Compile = struct open Ast open Automaton type direction = Left | Right | Final let (==>) a (b,c,d) = Transition.Label(a,b,c,d) let (@@) b (c,d) = (b,c,d) let star = TagSet.Xml.star let any = TagSet.Xml.any let notstar = TagSet.Xml.add Tag.pcdata (TagSet.Xml.add Tag.attribute TagSet.Xml.empty) let swap dir a b = match dir with | Left | Final -> (a,b) | Right -> (b,a) let split_dest q l = let rec aux ((qacc,nqacc) as acc) = function | [] -> acc | t::r -> aux (if State.equal (Transition.dest1 t) q || State.equal (Transition.dest2 t) q then t::qacc , nqacc else qacc , (t::nqacc)) r in aux ([],[]) l let mk_tag_t dir s ts q1 q2 = (s==> ts @@ (swap dir q1 q2));; let mk_self_trs ts acc l = List.fold_left (fun acc t -> let s = Transition.source t in let d1 = Transition.dest1 t in let d2 = Transition.dest2 t in let tself = (s ==> ts @@ (d1,d2)) in (Transition.cap t tself)::acc ) (acc) l let mk_pred_trs f acc l = List.fold_left (fun acc t -> let s = Transition.source t in let d1 = Transition.dest1 t in let d2 = Transition.dest2 t in let tself = Transition.External(s,f,d1,d2) in (Transition.cap t tself)::acc ) (acc) l let mk_dself_trs q' ts acc l = List.fold_left (fun acc t -> let t',s,d2 = match t with | Transition.Label(s,ts,_,d2) -> Transition.Label(s,ts,q',d2),s,d2 | Transition.External (s,f,_,d2) -> Transition.External(s,f,q',d2),s,d2 in let tself = (s ==> ts @@ (q',d2)) in (Transition.cap t' tself)::acc ) (acc) l let from_list = List.fold_left (fun acc e -> SSet.add e acc) SSet.empty let dir = function (FollowingSibling,_,_) -> Right | _ -> Left let rev_map_dir p = let rec map_dir (d,acc) = function | [] -> acc | s::r -> map_dir ((dir s),(s,d)::acc) r in let l = match p with | Absolute p | Relative p -> map_dir (Final,[]) p | AbsoluteDoS p -> let l = (map_dir (Final,[]) p) in ((DescendantOrSelf,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l in ((Child,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l let rec compile_step q dir trs final initial ignore (axis,test,pred) = let q' = State.mk() in let trs,final,initial = match axis,test with | Self,ts -> let tchange,tkeep = split_dest q trs in let trs' = mk_self_trs ts tkeep tchange in (trs',q::final,initial) | Child,ts -> (mk_tag_t dir q ts q' ignore) ::( q==> any @@ (ignore,q))::trs, q'::final,initial | Descendant,ts -> (mk_tag_t dir q ts q' ignore) :: (q==> star @@ (q,q))::(q==> notstar @@ (ignore,q))::trs,q'::final,initial | DescendantOrSelf,ts -> let tchange,tkeep = split_dest q trs in let trs' = mk_dself_trs q' ts trs tchange in (mk_tag_t dir q ts q' ignore) :: (q==> star @@ (q,q))::(q==> notstar @@ (ignore,q))::trs',q'::final,initial | FollowingSibling,ts -> (mk_tag_t dir q ts q' ignore) :: (q ==> any @@ (ignore,q))::trs,q'::final,initial (* q' is not returned and thus not added to the set of final states. It's ok since we should never be in a final state on a node <@> *) | Attribute,ts -> let q'' = State.mk() in (mk_tag_t Left q (TagSet.Xml.attribute) q' ignore):: (mk_tag_t Left q' (ts) q'' ignore)::( q==> any @@ (ignore,q))::trs, q''::q'::final,initial | Parent,ts -> let q'' = List.hd initial in (mk_tag_t Left q' (star) q q'):: ( q'' ==> ts @@ (q',q'')):: ( q'' ==> star @@ (q'',q'')):: ( q'' ==> notstar @@ (ignore,q''))::trs,q'::q''::final,q''::initial in let q_out = List.hd final in let tchange,tkeep = split_dest q_out trs in let trs' = compile_pred q_out tkeep tchange pred in (trs',final,initial) and compile_pred q_out tkeep tchange p = let rec pred_rec = function | Or(p1,p2) -> cup (pred_rec p1) (pred_rec p2) | And(p1,p2) -> cap (pred_rec p1) (pred_rec p2) | Not(p) -> neg (pred_rec p) | Expr e -> match compile_expr e with | `True -> `Label (TagSet.Xml.any) | `False -> `Label (TagSet.Xml.empty) | e -> `Fun (fun t -> Functions.truth_value (Functions.eval_expr t e)) in match pred_rec p with `Fun f -> mk_pred_trs f tkeep tchange | `Label ts -> mk_self_trs ts tkeep tchange and compile_expr = function True -> `True | False -> `False | Path p -> `Auto(compile p) | Int i -> `Int i | String s -> `String s | Function ("contains",elist) ->`Contains(List.map compile_expr elist) | Function (f,elist) -> `Call(f,List.map compile_expr elist) and cup a b = match a,b with | `Label l1 , `Label l2 -> `Label(TagSet.Xml.cup l1 l2) | `Fun f1 , `Fun f2 -> `Fun (fun x -> (f1 x)||(f2 x)) | `Fun f , `Label l | `Label l, `Fun f -> `Fun (fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) l) || (f x)) and cap a b = match a,b with | `Label l1, `Label l2 -> `Label (TagSet.Xml.cap l1 l2) | `Fun f1,`Fun f2 -> `Fun (fun x -> (f1 x)&&(f2 x)) | `Fun f,`Label l | `Label l,`Fun f -> `Fun (fun x -> (TagSet.Xml.mem (Tree.Binary.tag x) l) && f x) and neg = function `Label l -> `Label(TagSet.Xml.neg l) | `Fun f -> `Fun (fun x -> not (f x)) and compile p = let p = rev_map_dir p in let ignore = State.mk() in let q0 = State.mk() in let transitions = Transition.empty () in let tlist,qlist,initacc = List.fold_left (fun (tlist,qlist,initacc) (s,dir) -> let q = List.hd qlist in compile_step q dir tlist qlist initacc ignore s ) ([],[q0;ignore],[q0]) p in List.iter (Transition.add transitions) tlist; let qmark = List.hd qlist in { Automaton.mk() with initial = from_list initacc; final = from_list qlist; transitions = transitions; marking = from_list [qmark]; ignore = from_list [qmark;ignore]; } end