1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
7 #load "pa_extend.cmo";;
8 let contains = ref None
11 (* The steps are in reverse order !!!! *)
12 type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list
13 and step = axis*test*predicate
14 and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling
15 | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following
19 and predicate = Or of predicate*predicate
20 | And of predicate*predicate
23 and expression = Path of path
24 | Function of string*expression list
33 let pp fmt = Format.fprintf fmt
34 let print_list printer fmt sep l =
37 | [e] -> printer fmt e
38 | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es
43 | Absolute l -> pp fmt "/"; l
44 | AbsoluteDoS l -> pp fmt "/";
45 print_step fmt (DescendantOrSelf,TagSet.node,Expr True);
49 print_list print_step fmt "/" (List.rev l)
50 and print_step fmt (axis,test,predicate) =
51 print_axis fmt axis;pp fmt "::";print_test fmt test;
52 pp fmt "["; print_predicate fmt predicate; pp fmt "]"
53 and print_axis fmt a = pp fmt "%s" (match a with
56 | Descendant -> "descendant"
57 | DescendantOrSelf -> "descendant-or-self"
58 | FollowingSibling -> "following-sibling"
59 | Attribute -> "attribute"
60 | Ancestor -> "ancestor"
61 | AncestorOrSelf -> "ancestor-or-self"
62 | PrecedingSibling -> "preceding-sibling"
66 and print_test fmt ts =
68 pp fmt "%s" (List.assoc ts
69 [ (TagSet.pcdata,"text()"); (TagSet.node,"node()");
72 Not_found -> pp fmt "%s"
73 (if TagSet.is_finite ts
74 then Tag.to_string (TagSet.choose ts)
77 and print_predicate fmt = function
78 | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q
79 | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q
80 | Not p -> pp fmt "not "; print_predicate fmt p
81 | Expr e -> print_expression fmt e
83 and print_expression fmt = function
84 | Path p -> print fmt p
85 | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")"
86 | Int i -> pp fmt "%i" i
87 | String s -> pp fmt "\"%s\"" s
88 | t -> pp fmt "%b" (t== True)
95 let predopt = function None -> Expr True | Some p -> p
97 module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer)
98 let query = Gram.Entry.mk "query"
100 exception Error of Gram.Loc.t*string
101 let test_of_keyword t loc =
103 | "text()" -> TagSet.pcdata
104 | "node()" -> TagSet.node
106 | "and" | "not" | "or" -> TagSet.singleton (Tag.tag t)
107 | _ -> raise (Error(loc,"Invalid test name "^t ))
109 let axis_to_string a = let r = Format.str_formatter in
110 print_axis r a; Format.flush_str_formatter()
115 query : [ [ p = path; `EOI -> p ]]
119 [ "//" ; l = slist -> AbsoluteDoS l ]
120 | [ "/" ; l = slist -> Absolute l ]
121 | [ l = slist -> Relative l ]
126 [ l = slist ;"/"; s = step -> s@l ]
127 | [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, TagSet.node,Expr True)]@l]
132 (* yurk, this is done to parse stuff like
133 a/b/descendant/a where descendant is actually a tag name :(
134 if OPT is None then this is a child::descendant if not, this is a real axis name
136 [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred ->
139 | Some(t) -> (axis,t,p)
140 | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p)
142 | Following -> [ (DescendantOrSelf,t,p);
143 (FollowingSibling,TagSet.star,Expr(True));
144 (Ancestor,TagSet.star,Expr(True)) ]
146 | Preceding -> [ (DescendantOrSelf,t,p);
147 (PrecedingSibling,TagSet.star,Expr(True));
148 (Ancestor,TagSet.star,Expr(True)) ]
153 | [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ]
154 | [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ]
155 | [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [
156 let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)]
158 | [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [
159 let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)]
161 | [ test = test; p = top_pred -> [(Child,test, p)] ]
162 | [ att = ATT ; p = top_pred ->
164 | "*" -> [(Attribute,TagSet.star,p)]
165 | _ -> [(Attribute, TagSet.singleton (Tag.tag att) ,p )]]
169 [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ]
173 [ "self" -> Self | "child" -> Child | "descendant" -> Descendant
174 | "descendant-or-self" -> DescendantOrSelf
175 | "ancestor-or-self" -> AncestorOrSelf
176 | "following-sibling" -> FollowingSibling
177 | "attribute" -> Attribute
179 | "ancestor" -> Ancestor
180 | "preceding-sibling" -> PrecedingSibling
181 | "preceding" -> Preceding
182 | "following" -> Following
188 [ s = KWD -> test_of_keyword s _loc ]
189 | [ t = TAG -> TagSet.singleton (Tag.tag t) ]
194 [ p = predicate; "or"; q = predicate -> Or(p,q) ]
195 | [ p = predicate; "and"; q = predicate -> And(p,q) ]
196 | [ "not" ; p = predicate -> Not p ]
197 | [ "("; p = predicate ;")" -> p ]
198 | [ e = expression -> Expr e ]
202 [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)]
203 | [ `INT(i) -> Int (i) ]
204 | [ s = STRING -> String s ]
205 | [ p = path -> Path p ]
206 | [ "("; e = expression ; ")" -> e ]
211 let parse_string = Gram.parse_string query (Ulexer.Loc.mk "<string>")
212 let parse = Gram.parse_string query (Ulexer.Loc.mk "<string>")
216 module Compile = struct
218 type transition = Ata.State.t*TagSet.t*Ata.Transition.t
220 type config = { st_root : Ata.State.t; (* state matching the root element (initial state) *)
221 st_univ : Ata.State.t; (* universal state accepting anything *)
222 st_from_root : Ata.State.t; (* state chaining the root and the current position *)
223 mutable final_state : Ata.StateSet.t;
224 mutable has_backward: bool;
225 (* To store transitions *)
226 (* Key is the from state, (i,l) -> i the number of the step and l the list of trs *)
227 tr_parent_loop : (Ata.State.t,int*(transition list)) Hashtbl.t;
228 tr : (Ata.State.t,int*(transition list)) Hashtbl.t;
229 tr_aux : (Ata.State.t,int*(transition list)) Hashtbl.t;
230 mutable entry_points : (Tag.t*Ata.StateSet.t) list;
231 mutable contains : string option;
232 mutable univ_states : Ata.State.t list;
233 mutable starstate : Ata.StateSet.t option;
235 let dummy_conf = { st_root = -1;
238 final_state = Ata.StateSet.empty;
239 has_backward = false;
240 tr_parent_loop = Hashtbl.create 0;
241 tr = Hashtbl.create 0;
242 tr_aux = Hashtbl.create 0;
251 function (`Left|`Last) -> `Right
258 function (`Left|`Last) -> `Left
264 open Ata.Transition.Infix
265 open Ata.Formula.Infix
269 let add_trans num htr ((q,ts,_)as tr) =
270 Hashtbl.add htr q (num,[tr])
272 let vpush x y = (x,[]) :: y
275 | (z,r)::l -> (z,x::r) ::l
283 | (x,z::y) ::r -> z,(x,y)::r
286 let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num =
287 let ex = existential in
288 let axis,test,pred = step in
289 let is_last = dir = `Last in
290 let { st_root = q_root;
292 st_from_root = q_frm_root } = conf
294 let q_dst = Ata.State.make() in
295 let p_st, p_anc, p_par, p_pre, p_num, p_f =
296 compile_pred conf q_src num ctx_path dir pred q_dst
298 let new_st,new_dst, new_ctx =
300 | Child | Descendant ->
301 if (TagSet.is_finite test)
302 then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points;
304 if nrec then `LLeft,`RRight
307 let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star
308 then conf.starstate <- Some(Ata.StateSet.singleton q_src)
310 let t1,ldst = ?< q_src><(test, is_last && not(ex))>=>
311 p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst),
312 ( if is_last then [] else [q_dst])
315 let _ = add_trans num conf.tr t1 in
316 let _ = if axis=Descendant then
317 add_trans num conf.tr_aux (
318 ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test
319 else TagSet.star),false)>=>
320 (if TagSet.equal test TagSet.star then
321 `Left else `LLeft) *+ q_src )
324 ?< q_src><@ ((if ex then TagSet.diff TagSet.any test
325 else TagSet.any), false)>=>
326 (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then
327 `RRight else `Right) *+ q_src
329 let _ = add_trans num conf.tr_aux t3
332 (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path)
336 let q_dstreal = Ata.State.make() in
337 (* attributes are always the first child *)
338 let t1 = ?< q_src><(TagSet.attribute,false)>=>
340 let t2 = ?< q_dst><(test, is_last && not(existential))>=>
341 if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in
342 let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst
344 add_trans num conf.tr t1;
345 add_trans num conf.tr_aux t2;
346 add_trans num conf.tr_aux tsa;
347 [q_dst;q_dstreal], q_dstreal,
353 (* todo change everything to Ata.StateSet *)
354 (Ata.StateSet.elements (Ata.StateSet.union p_st (Ata.StateSet.from_list new_st)),
357 and is_rec = function
359 | ((axis,_,_),_)::_ ->
361 Descendant | Ancestor -> true
364 and compile_path ?(existential=false) annot_path config q_src states idx ctx_path =
366 (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) ->
367 let add_states,new_dst,new_ctx =
368 compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num
370 let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in
371 let nanc_st,npar_st,npre_st,new_bw =
373 |PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true
374 |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true
375 | _ -> anc_st,par_st,pre_st,has_backward
377 new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r)
379 (states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) )
382 and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst =
383 let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 =
384 compile_pred conf q_src idx ctx_path dir p1 ddst in
385 let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 =
386 compile_pred conf q_src idx1 ctx_path dir p2 ddst
388 Ata.StateSet.union a_st1 a_st2,
389 Ata.StateSet.union anc_st1 anc_st2,
390 Ata.StateSet.union par_st1 par_st2,
391 Ata.StateSet.union pre_st1 pre_st2,
394 and compile_pred conf q_src idx ctx_path dir pred qdst =
397 binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst
399 binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst
400 | Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst
402 let a_st,anc_st,par_st,pre_st,idx,f =
403 compile_pred conf q_src idx ctx_path dir p qdst
404 in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f
406 and compile_expr conf states q_src idx ctx_path dir e qdst =
409 let q = Ata.State.make () in
410 let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in
411 let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ =
412 compile_path ~existential:true annot_path conf q states idx ctx_path
414 let ret_dir = match annot_path with
415 | ((FollowingSibling,_,_),_)::_ -> `Right
418 let _ = match annot_path with
419 | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ata.StateSet.add qdst conf.final_state
421 in let _ = conf.univ_states <- a_dst::conf.univ_states in
422 (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) *+ q))
423 | True -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.true_
424 | False -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.false_
428 and dirannot = function
431 | p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l)
432 | p::l -> (p,`Left) :: (dirannot l)
434 let compile ?(querystring="") path =
438 | Relative(steps) -> steps
439 | AbsoluteDoS(steps) -> steps@[(DescendantOrSelf,TagSet.node,Expr(True))]
441 let steps = List.rev steps in
442 let dirsteps = dirannot steps in
443 let config = { st_root = Ata.State.make();
444 st_univ = Ata.State.make();
445 final_state = Ata.StateSet.empty;
446 st_from_root = Ata.State.make();
447 has_backward = false;
448 tr_parent_loop = Hashtbl.create 5;
449 tr = Hashtbl.create 5;
450 tr_aux = Hashtbl.create 5;
457 let q0 = Ata.State.make() in
458 let states = Ata.StateSet.from_list [config.st_univ;config.st_root]
461 (* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root);
462 add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ);
463 add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ);
465 let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ =
466 compile_path dirsteps config q0 states 0 [(config.st_root,[]) ]
469 ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=>
470 ((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_)
472 add_trans num config.tr fst_tr;
473 if config.has_backward then begin
474 add_trans num config.tr_aux
475 (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root);
476 add_trans num config.tr_aux
477 (?< (config.st_from_root) >< (TagSet.any,false) >=>
478 `RRight *+ config.st_from_root);
481 let phi = Hashtbl.create 37 in
482 let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) ->
487 Hashtbl.replace phi s ((t,tr)::lt)
489 Hashtbl.iter (fadd) config.tr;
490 Hashtbl.iter (fadd) config.tr_aux;
491 Hashtbl.iter (fadd) config.tr_parent_loop;
494 in if has_backward then Ata.StateSet.add config.st_from_root s else s
495 in { Ata.id = Oo.id (object end);
496 Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty;
497 Ata.init = Ata.StateSet.singleton config.st_root;
499 Ata.starstate = config.starstate;
500 Ata.query_string = querystring;
501 },config.entry_points,!contains