1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
10 #load "pa_extend.cmo";;
16 type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list
17 and step = axis*test*predicate
18 and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling
21 and test = TagSet.Xml.t
23 and predicate = Or of predicate*predicate
24 | And of predicate*predicate
27 and expression = Path of path
28 | Function of string*expression list
35 let pp fmt = Format.fprintf fmt
36 let print_list printer fmt sep l =
39 | [e] -> printer fmt e
40 | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es
45 | Absolute l -> pp fmt "/"; l
46 | AbsoluteDoS l -> pp fmt "/";
47 print_step fmt (DescendantOrSelf,TagSet.Xml.node,Expr True);
51 print_list print_step fmt "/" (List.rev l)
52 and print_step fmt (axis,test,predicate) =
53 print_axis fmt axis;pp fmt "::";print_test fmt test;
54 pp fmt "["; print_predicate fmt predicate; pp fmt "]"
55 and print_axis fmt a = pp fmt "%s" (match a with
58 | Descendant -> "descendant"
59 | DescendantOrSelf -> "descendant-or-self"
60 | FollowingSibling -> "following-sibling"
61 | Attribute -> "attribute"
63 and print_test fmt ts =
65 pp fmt "%s" (List.assoc ts
66 [ (TagSet.Xml.pcdata,"text()"); (TagSet.Xml.node,"node()");
67 (TagSet.Xml.star),"*"])
69 Not_found -> pp fmt "%s"
70 (if TagSet.Xml.is_finite ts
71 then Tag.to_string (TagSet.Xml.choose ts)
74 and print_predicate fmt = function
75 | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q
76 | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q
77 | Not p -> pp fmt "not "; print_predicate fmt p
78 | Expr e -> print_expression fmt e
80 and print_expression fmt = function
81 | Path p -> print fmt p
82 | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")"
83 | Int i -> pp fmt "%i" i
84 | String s -> pp fmt "\"%s\"" s
85 | t -> pp fmt "%b" (t== True)
92 let predopt = function None -> Expr True | Some p -> p
94 module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer)
95 let query = Gram.Entry.mk "query"
97 exception Error of Gram.Loc.t*string
98 let test_of_keyword t loc =
100 | "text()" -> TagSet.Xml.pcdata
101 | "node()" -> TagSet.Xml.node
102 | "*" -> TagSet.Xml.star
103 | "and" | "not" | "or" -> TagSet.Xml.singleton (Tag.tag t)
104 | _ -> raise (Error(loc,"Invalid test name "^t ))
106 let axis_to_string a = let r = Format.str_formatter in
107 print_axis r a; Format.flush_str_formatter()
112 query : [ [ p = path; `EOI -> p ]]
116 [ "//" ; l = slist -> AbsoluteDoS l ]
117 | [ "/" ; l = slist -> Absolute l ]
118 | [ l = slist -> Relative l ]
123 [ l = slist ;"/"; s = step -> s::l ]
124 | [ l = slist ; "//"; s = step -> s::(DescendantOrSelf,TagSet.Xml.node,Expr True)::l]
125 | [ s = step -> [ s ] ]
129 (* yurk, this is done to parse stuff like
130 a/b/descendant/a where descendant is actually a tag name :(
131 if OPT is None then this is a child::descendant if not, this is a real axis name
133 [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred ->
135 | Some(t) -> (axis,t,p)
136 | None -> (Child,TagSet.Xml.singleton (Tag.tag (axis_to_string axis)),p) ]
138 | [ "." ; p = top_pred -> (Self,TagSet.Xml.node,p) ]
139 | [ test = test; p = top_pred -> (Child,test, p) ]
140 | [ att = ATT ; p = top_pred ->
142 | "*" -> (Attribute,TagSet.Xml.star,p)
143 | _ -> (Attribute, TagSet.Xml.singleton (Tag.tag att) ,p )]
147 [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ]
151 [ "self" -> Self | "child" -> Child | "descendant" -> Descendant
152 | "descendant-or-self" -> DescendantOrSelf
153 | "following-sibling" -> FollowingSibling
154 | "attribute" -> Attribute
161 [ s = KWD -> test_of_keyword s _loc ]
162 | [ t = TAG -> TagSet.Xml.singleton (Tag.tag t) ]
167 [ p = predicate; "or"; q = predicate -> Or(p,q) ]
168 | [ p = predicate; "and"; q = predicate -> And(p,q) ]
169 | [ "not" ; p = predicate -> Not p ]
170 | [ "("; p = predicate ;")" -> p ]
171 | [ e = expression -> Expr e ]
175 [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)]
176 | [ `INT(i) -> Int (i) ]
177 | [ s = STRING -> String s ]
178 | [ p = path -> Path p ]
179 | [ "("; e = expression ; ")" -> e ]
184 let parse_string = Gram.parse_string query (Ulexer.Loc.mk "<string>")
185 let parse = Gram.parse_string query (Ulexer.Loc.mk "<string>")
188 module Functions = struct
190 type value = [ `NodeSet of Automaton.BST.t
191 | `Int of int | `String of string
192 | `Bool of bool | `True | `False ]
193 type expr = [ value | `Call of (string*(expr list))
194 | `Auto of Automaton.t ]
197 let count = function [`NodeSet(s) ] -> `Int(Automaton.BST.cardinal s)
198 | _ -> failwith "count"
201 let equal = function [ `Int i; `Int j ] -> `Bool (i == j)
202 |_ -> failwith "equal"
204 let globals : (string*(value list -> value)) list = [
210 let text t = Tree.Binary.string (Tree.Binary.left t)
212 let rec eval_expr tree (e:expr) : value = match e with
213 | `Call (f,args) -> (List.assoc f globals) (List.map (eval_expr tree) args)
214 | `Auto(a) -> `NodeSet(ignore (Automaton.dump Format.err_formatter a;
215 Tree.Binary.print_xml_fast stderr tree;
216 Printf.eprintf "\n=======================\n%!";
217 Automaton.TopDown.run a tree);
218 Printf.eprintf "Results : %i\n%!"
219 (Automaton.BST.cardinal a.Automaton.result);
220 Automaton.BST.iter (fun t -> Tree.Binary.print_xml_fast stderr t;
221 Printf.eprintf "^^^^^^^^^^^^^^^^^^^^^^^^\n%!")
227 function `NodeSet s -> not (Automaton.BST.is_empty s)
229 | _ -> failwith "truth_value"
232 module Compile = struct
237 type direction = Left | Right | Final
238 let (==>) a (b,c,d) = Transition.Label(a,b,c,d)
239 let (@@) b (c,d) = (b,c,d)
241 let star = TagSet.Xml.star
242 let any = TagSet.Xml.any
243 let notstar = TagSet.Xml.add Tag.pcdata (TagSet.Xml.add Tag.attribute TagSet.Xml.empty)
244 let swap dir a b = match dir with
245 | Left | Final -> (a,b)
249 let rec aux ((qacc,nqacc) as acc) = function
252 aux (if State.equal (Transition.dest1 t) q
253 || State.equal (Transition.dest2 t) q
255 else qacc , (t::nqacc)) r
260 let mk_tag_t dir s ts q1 q2 = (s==> ts @@ (swap dir q1 q2));;
261 let mk_self_trs ts acc l =
264 let s = Transition.source t in
265 let d1 = Transition.dest1 t in
266 let d2 = Transition.dest2 t in
267 let tself = (s ==> ts @@ (d1,d2)) in
268 (Transition.cap t tself)::acc ) (acc) l
270 let mk_pred_trs f acc l =
273 let s = Transition.source t in
274 let d1 = Transition.dest1 t in
275 let d2 = Transition.dest2 t in
276 let tself = Transition.External(s,f,d1,d2) in
277 (Transition.cap t tself)::acc ) (acc) l
279 let mk_dself_trs q' ts acc l =
282 let t',s,d2 = match t with
283 | Transition.Label(s,ts,_,d2) -> Transition.Label(s,ts,q',d2),s,d2
284 | Transition.External (s,f,_,d2) -> Transition.External(s,f,q',d2),s,d2
286 let tself = (s ==> ts @@ (q',d2)) in
287 (Transition.cap t' tself)::acc ) (acc) l
289 let from_list = List.fold_left (fun acc e -> SSet.add e acc) SSet.empty
291 let dir = function (FollowingSibling,_,_) -> Right
295 let rec map_dir (d,acc) = function
297 | s::r -> map_dir ((dir s),(s,d)::acc) r
298 in let l = match p with
299 | Absolute p | Relative p -> map_dir (Final,[]) p
301 let l = (map_dir (Final,[]) p)
302 in ((DescendantOrSelf,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l
303 in ((Child,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l
306 let rec compile_step q dir trs final initial ignore (axis,test,pred) =
307 let q' = State.mk() in
308 let trs,final,initial = match axis,test with
310 let tchange,tkeep = split_dest q trs in
311 let trs' = mk_self_trs ts tkeep tchange in
312 (trs',q::final,initial)
315 (mk_tag_t dir q ts q' ignore) ::( q==> any @@ (ignore,q))::trs, q'::final,initial
318 (mk_tag_t dir q ts q' ignore) ::
319 (q==> star @@ (q,q))::(q==> notstar @@ (ignore,q))::trs,q'::final,initial
321 | DescendantOrSelf,ts ->
322 let tchange,tkeep = split_dest q trs in
323 let trs' = mk_dself_trs q' ts trs tchange in
324 (mk_tag_t dir q ts q' ignore) ::
325 (q==> star @@ (q,q))::(q==> notstar @@ (ignore,q))::trs',q'::final,initial
327 | FollowingSibling,ts ->
328 (mk_tag_t dir q ts q' ignore) :: (q ==> any @@ (ignore,q))::trs,q'::final,initial
330 (* q' is not returned and thus not added to the set of final states.
331 It's ok since we should never be in a final state on a node
334 | Attribute,ts -> let q'' = State.mk() in
335 (mk_tag_t Left q (TagSet.Xml.attribute) q' ignore)::
336 (mk_tag_t Left q' (ts) q'' ignore)::( q==> any @@ (ignore,q))::trs, q''::q'::final,initial
338 | Parent,ts -> let q'' = List.hd initial in
339 (mk_tag_t Left q' (star) q q')::
340 ( q'' ==> ts @@ (q',q''))::
341 ( q'' ==> star @@ (q'',q''))::
342 ( q'' ==> notstar @@ (ignore,q''))::trs,q'::q''::final,q''::initial
345 let q_out = List.hd final in
346 let tchange,tkeep = split_dest q_out trs in
347 let trs' = compile_pred q_out tkeep tchange pred in
350 and compile_pred q_out tkeep tchange p =
351 let rec pred_rec = function
353 | Or(p1,p2) -> cup (pred_rec p1) (pred_rec p2)
354 | And(p1,p2) -> cap (pred_rec p1) (pred_rec p2)
355 | Not(p) -> neg (pred_rec p)
356 | Expr e -> match compile_expr e with
357 | `True -> `Label (TagSet.Xml.any)
358 | `False -> `Label (TagSet.Xml.empty)
359 | e -> `Fun (fun t -> let r = Functions.truth_value (Functions.eval_expr t e)
360 in Printf.eprintf "Truth value is %b\n%!" r;r)
362 in match pred_rec p with
363 `Fun f -> mk_pred_trs f tkeep tchange
364 | `Label ts -> mk_self_trs ts tkeep tchange
366 and compile_expr = function
369 | Path p -> `Auto(compile p)
371 | String s -> `String s
372 | Function (f,elist) -> `Call(f,List.map compile_expr elist)
374 and cup a b = match a,b with
375 | `Label l1 , `Label l2 -> `Label(TagSet.Xml.cup l1 l2)
376 | `Fun f1 , `Fun f2 -> `Fun (fun x -> (f1 x)||(f2 x))
377 | `Fun f , `Label l | `Label l, `Fun f ->
379 (TagSet.Xml.mem (Tree.Binary.tag x) l)
382 and cap a b = match a,b with
383 | `Label l1, `Label l2 -> `Label (TagSet.Xml.cap l1 l2)
384 | `Fun f1,`Fun f2 -> `Fun (fun x -> (f1 x)&&(f2 x))
385 | `Fun f,`Label l | `Label l,`Fun f ->
387 (TagSet.Xml.mem (Tree.Binary.tag x) l)
390 `Label l -> `Label(TagSet.Xml.neg l)
391 | `Fun f -> `Fun (fun x -> not (f x))
394 let p = rev_map_dir p in
395 let ignore = State.mk()
397 let q0 = State.mk() in
398 let transitions = Transition.empty () in
399 let tlist,qlist,initacc = List.fold_left
400 (fun (tlist,qlist,initacc) (s,dir) ->
401 let q = List.hd qlist in
402 compile_step q dir tlist qlist initacc ignore s ) ([],[q0;ignore],[q0]) p
404 List.iter (Transition.add transitions) tlist;
405 let qmark = List.hd qlist in
406 { Automaton.mk() with
407 initial = from_list initacc;
408 final = from_list qlist;
409 transitions = transitions;
410 marking = from_list [qmark];
411 ignore = from_list [qmark;ignore];