1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
10 #load "pa_extend.cmo";;
16 type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list
17 and step = axis*test*predicate
18 and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling
21 and test = TagSet.Xml.t
23 and predicate = Or of predicate*predicate
24 | And of predicate*predicate
27 and expression = Path of path
28 | Function of string*expression list
35 let pp fmt = Format.fprintf fmt
36 let print_list printer fmt sep l =
39 | [e] -> printer fmt e
40 | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es
45 | Absolute l -> pp fmt "/"; l
46 | AbsoluteDoS l -> pp fmt "/";
47 print_step fmt (DescendantOrSelf,TagSet.Xml.node,Expr True);
51 print_list print_step fmt "/" (List.rev l)
52 and print_step fmt (axis,test,predicate) =
53 print_axis fmt axis;pp fmt "::";print_test fmt test;
54 pp fmt "["; print_predicate fmt predicate; pp fmt "]"
55 and print_axis fmt a = pp fmt "%s" (match a with
58 | Descendant -> "descendant"
59 | DescendantOrSelf -> "descendant-or-self"
60 | FollowingSibling -> "following-sibling"
61 | Attribute -> "attribute"
63 and print_test fmt ts =
65 pp fmt "%s" (List.assoc ts
66 [ (TagSet.Xml.pcdata,"text()"); (TagSet.Xml.node,"node()");
67 (TagSet.Xml.star),"*"])
69 Not_found -> pp fmt "%s"
70 (if TagSet.Xml.is_finite ts
71 then Tag.to_string (TagSet.Xml.choose ts)
74 and print_predicate fmt = function
75 | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q
76 | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q
77 | Not p -> pp fmt "not "; print_predicate fmt p
78 | Expr e -> print_expression fmt e
80 and print_expression fmt = function
81 | Path p -> print fmt p
82 | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")"
83 | Int i -> pp fmt "%i" i
84 | String s -> pp fmt "\"%s\"" s
85 | t -> pp fmt "%b" (t== True)
92 let predopt = function None -> Expr True | Some p -> p
94 module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer)
95 let query = Gram.Entry.mk "query"
97 exception Error of Gram.Loc.t*string
98 let test_of_keyword t loc =
100 | "text()" -> TagSet.Xml.pcdata
101 | "node()" -> TagSet.Xml.node
102 | "*" -> TagSet.Xml.star
103 | "and" | "not" | "or" -> TagSet.Xml.singleton (Tag.tag t)
104 | _ -> raise (Error(loc,"Invalid test name "^t ))
106 let axis_to_string a = let r = Format.str_formatter in
107 print_axis r a; Format.flush_str_formatter()
112 query : [ [ p = path; `EOI -> p ]]
116 [ "//" ; l = slist -> AbsoluteDoS l ]
117 | [ "/" ; l = slist -> Absolute l ]
118 | [ l = slist -> Relative l ]
123 [ l = slist ;"/"; s = step -> s::l ]
124 | [ l = slist ; "//"; s = step -> s::(DescendantOrSelf,TagSet.Xml.node,Expr True)::l]
125 | [ s = step -> [ s ] ]
129 (* yurk, this is done to parse stuff like
130 a/b/descendant/a where descendant is actually a tag name :(
131 if OPT is None then this is a child::descendant if not, this is a real axis name
133 [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred ->
135 | Some(t) -> (axis,t,p)
136 | None -> (Child,TagSet.Xml.singleton (Tag.tag (axis_to_string axis)),p) ]
138 | [ "." ; p = top_pred -> (Self,TagSet.Xml.node,p) ]
139 | [ test = test; p = top_pred -> (Child,test, p) ]
140 | [ att = ATT ; p = top_pred ->
142 | "*" -> (Attribute,TagSet.Xml.star,p)
143 | _ -> (Attribute, TagSet.Xml.singleton (Tag.tag att) ,p )]
147 [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ]
151 [ "self" -> Self | "child" -> Child | "descendant" -> Descendant
152 | "descendant-or-self" -> DescendantOrSelf
153 | "following-sibling" -> FollowingSibling
154 | "attribute" -> Attribute
161 [ s = KWD -> test_of_keyword s _loc ]
162 | [ t = TAG -> TagSet.Xml.singleton (Tag.tag t) ]
167 [ p = predicate; "or"; q = predicate -> Or(p,q) ]
168 | [ p = predicate; "and"; q = predicate -> And(p,q) ]
169 | [ "not" ; p = predicate -> Not p ]
170 | [ "("; p = predicate ;")" -> p ]
171 | [ e = expression -> Expr e ]
175 [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)]
176 | [ `INT(i) -> Int (i) ]
177 | [ s = STRING -> String s ]
178 | [ p = path -> Path p ]
179 | [ "("; e = expression ; ")" -> e ]
184 let parse_string = Gram.parse_string query (Ulexer.Loc.mk "<string>")
185 let parse = Gram.parse_string query (Ulexer.Loc.mk "<string>")
188 module Functions = struct
190 type value = [ `NodeSet of Automaton.BST.t
191 | `Int of int | `String of string
192 | `Bool of bool | `True | `False ]
193 type expr = [ value | `Call of (string*(expr list))
194 | `Auto of Automaton.t ]
197 let count = function [`NodeSet(s) ] -> `Int(Automaton.BST.cardinal s)
198 | _ -> failwith "count"
201 let equal = function [ `Int i; `Int j ] -> `Bool (i == j)
202 |_ -> failwith "equal"
204 let globals : (string*(value list -> value)) list = [
210 let text t = Tree.Binary.string (Tree.Binary.left t)
212 let rec eval_expr tree (e:expr) : value = match e with
213 | `Call (f,args) -> (List.assoc f globals) (List.map (eval_expr tree) args)
214 | `Auto(a) -> `NodeSet(ignore (Automaton.BottomUp.accept a tree);a.Automaton.result)
218 function `NodeSet s -> Automaton.BST.is_empty s
220 | _ -> failwith "truth_value"
223 module Compile = struct
228 type direction = Left | Right | Final
229 let (==>) a (b,c,d) = Transition.Label(a,b,c,d)
230 let (@@) b (c,d) = (b,c,d)
232 let star = TagSet.Xml.star
233 let any = TagSet.Xml.any
234 let notstar = TagSet.Xml.add Tag.pcdata (TagSet.Xml.add Tag.attribute TagSet.Xml.empty)
235 let swap dir a b = match dir with
236 | Left | Final -> (a,b)
240 let rec aux ((qacc,nqacc) as acc) = function
243 aux (if State.equal (Transition.dest1 t) q
244 || State.equal (Transition.dest2 t) q
246 else qacc , (t::nqacc)) r
251 let mk_tag_t dir s ts q1 q2 = (s==> ts @@ (swap dir q1 q2));;
252 let mk_self_trs ts acc l =
255 let s = Transition.source t in
256 let d1 = Transition.dest1 t in
257 let d2 = Transition.dest2 t in
258 let tself = (s ==> ts @@ (d1,d2)) in
259 (Transition.cap t tself)::acc ) (acc) l
261 let mk_pred_trs f acc l =
264 let s = Transition.source t in
265 let d1 = Transition.dest1 t in
266 let d2 = Transition.dest2 t in
267 let tself = Transition.External(s,f,d1,d2) in
268 (Transition.cap t tself)::acc ) (acc) l
270 let mk_dself_trs q' ts acc l =
273 let t',s,d2 = match t with
274 | Transition.Label(s,ts,_,d2) -> Transition.Label(s,ts,q',d2),s,d2
275 | Transition.External (s,f,_,d2) -> Transition.External(s,f,q',d2),s,d2
277 let tself = (s ==> ts @@ (q',d2)) in
278 (Transition.cap t' tself)::acc ) (acc) l
280 let from_list = List.fold_left (fun acc e -> SSet.add e acc) SSet.empty
282 let dir = function (FollowingSibling,_,_) -> Right
286 let rec map_dir (d,acc) = function
288 | s::r -> map_dir ((dir s),(s,d)::acc) r
289 in let l = match p with
290 | Absolute p | Relative p -> map_dir (Final,[]) p
292 let l = (map_dir (Final,[]) p)
293 in ((DescendantOrSelf,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l
294 in ((Child,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l
297 let rec compile_step q dir trs final initial ignore (axis,test,pred) =
298 let q' = State.mk() in
299 let trs,final,initial = match axis,test with
301 let tchange,tkeep = split_dest q trs in
302 let trs' = mk_self_trs ts tkeep tchange in
303 (trs',q::final,initial)
306 (mk_tag_t dir q ts q' ignore) ::( q==> any @@ (ignore,q))::trs, q'::final,initial
309 (mk_tag_t dir q ts q' ignore) ::
310 (q==> star @@ (q,q))::(q==> notstar @@ (ignore,q))::trs,q'::final,initial
312 | DescendantOrSelf,ts ->
313 let tchange,tkeep = split_dest q trs in
314 let trs' = mk_dself_trs q' ts trs tchange in
315 (mk_tag_t dir q ts q' ignore) ::
316 (q==> star @@ (q,q))::(q==> notstar @@ (ignore,q))::trs',q'::final,initial
318 | FollowingSibling,ts ->
319 (mk_tag_t dir q ts q' ignore) :: (q ==> any @@ (ignore,q))::trs,q'::final,initial
321 (* q' is not returned and thus not added to the set of final states.
322 It's ok since we should never be in a final state on a node
324 | Attribute,ts -> let q'' = State.mk() in
325 (mk_tag_t Left q (TagSet.Xml.attribute) q' ignore)::
326 (mk_tag_t Left q' (ts) q'' ignore)::( q==> any @@ (ignore,q))::trs, q''::q'::final,initial
328 | Parent,ts -> let q'' = List.hd initial in
329 (mk_tag_t Left q' (star) q q')::
330 ( q'' ==> ts @@ (q',q''))::
331 ( q'' ==> star @@ (q'',q''))::
332 ( q'' ==> notstar @@ (ignore,q''))::trs,q'::q''::final,q''::initial
335 let q_out = List.hd final in
336 let tchange,tkeep = split_dest q_out trs in
337 let trs' = compile_pred q_out tkeep tchange pred in
340 and compile_pred q_out tkeep tchange p =
341 let rec pred_rec = function
343 | Or(p1,p2) -> cup (pred_rec p1) (pred_rec p2)
344 | And(p1,p2) -> cap (pred_rec p1) (pred_rec p2)
345 | Not(p) -> neg (pred_rec p)
346 | Expr e -> match compile_expr e with
347 | `True -> `Label (TagSet.Xml.any)
348 | `False -> `Label (TagSet.Xml.empty)
349 | e -> `Fun (fun t -> Functions.truth_value (Functions.eval_expr t e))
351 in match pred_rec p with
352 `Fun f -> mk_pred_trs f tkeep tchange
353 | `Label ts -> mk_self_trs ts tkeep tchange
355 and compile_expr = function
358 | Path p -> `Auto(compile p)
360 | String s -> `String s
361 | Function (f,elist) -> `Call(f,List.map compile_expr elist)
363 and cup a b = match a,b with
364 | `Label l1 , `Label l2 -> `Label(TagSet.Xml.cup l1 l2)
365 | `Fun f1 , `Fun f2 -> `Fun (fun x -> (f1 x)||(f2 x))
366 | `Fun f , `Label l | `Label l, `Fun f ->
368 (TagSet.Xml.mem (Tree.Binary.tag x) l)
371 and cap a b = match a,b with
372 | `Label l1, `Label l2 -> `Label (TagSet.Xml.cap l1 l2)
373 | `Fun f1,`Fun f2 -> `Fun (fun x -> (f1 x)&&(f2 x))
374 | `Fun f,`Label l | `Label l,`Fun f ->
376 (TagSet.Xml.mem (Tree.Binary.tag x) l)
379 `Label l -> `Label(TagSet.Xml.neg l)
380 | `Fun f -> `Fun (fun x -> not (f x))
383 let p = rev_map_dir p in
384 let ignore = State.mk()
386 let q0 = State.mk() in
387 let transitions = Transition.empty () in
388 let tlist,qlist,initacc = List.fold_left
389 (fun (tlist,qlist,initacc) (s,dir) ->
390 let q = List.hd qlist in
391 compile_step q dir tlist qlist initacc ignore s ) ([],[q0;ignore],[q0]) p
393 List.iter (Transition.add transitions) tlist;
394 let qmark = List.hd qlist in
395 { Automaton.mk() with
396 initial = from_list initacc;
397 final = from_list qlist;
398 transitions = transitions;
399 marking = from_list [qmark];
400 ignore = from_list [qmark;ignore];