1 (******************************************************************************)
2 (* SXSI : XPath evaluator *)
3 (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *)
4 (* Copyright NICTA 2008 *)
5 (* Distributed under the terms of the LGPL (see LICENCE) *)
6 (******************************************************************************)
8 #load "pa_extend.cmo";;
9 let contains = ref None
12 (* The steps are in reverse order !!!! *)
13 type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list
14 and step = axis*test*predicate
15 and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling
16 | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following
20 and predicate = Or of predicate*predicate
21 | And of predicate*predicate
24 and expression = Path of path
25 | Function of string*expression list
34 let pp fmt = Format.fprintf fmt
35 let print_list printer fmt sep l =
38 | [e] -> printer fmt e
39 | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es
44 | Absolute l -> pp fmt "/"; l
45 | AbsoluteDoS l -> pp fmt "/";
46 print_step fmt (DescendantOrSelf,TagSet.node,Expr True);
50 print_list print_step fmt "/" (List.rev l)
51 and print_step fmt (axis,test,predicate) =
52 print_axis fmt axis;pp fmt "::";print_test fmt test;
53 pp fmt "["; print_predicate fmt predicate; pp fmt "]"
54 and print_axis fmt a = pp fmt "%s" (match a with
57 | Descendant -> "descendant"
58 | DescendantOrSelf -> "descendant-or-self"
59 | FollowingSibling -> "following-sibling"
60 | Attribute -> "attribute"
61 | Ancestor -> "ancestor"
62 | AncestorOrSelf -> "ancestor-or-self"
63 | PrecedingSibling -> "preceding-sibling"
67 and print_test fmt ts =
69 pp fmt "%s" (List.assoc ts
70 [ (TagSet.pcdata,"text()"); (TagSet.node,"node()");
73 Not_found -> pp fmt "%s"
74 (if TagSet.is_finite ts
75 then Tag.to_string (TagSet.choose ts)
78 and print_predicate fmt = function
79 | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q
80 | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q
81 | Not p -> pp fmt "not "; print_predicate fmt p
82 | Expr e -> print_expression fmt e
84 and print_expression fmt = function
85 | Path p -> print fmt p
86 | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")"
87 | Int i -> pp fmt "%i" i
88 | String s -> pp fmt "\"%s\"" s
89 | t -> pp fmt "%b" (t== True)
96 let predopt = function None -> Expr True | Some p -> p
98 module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer)
99 let query = Gram.Entry.mk "query"
101 exception Error of Gram.Loc.t*string
102 let test_of_keyword t loc =
104 | "text()" -> TagSet.pcdata
105 | "node()" -> TagSet.node
107 | "and" | "not" | "or" -> TagSet.singleton (Tag.tag t)
108 | _ -> raise (Error(loc,"Invalid test name "^t ))
110 let axis_to_string a = let r = Format.str_formatter in
111 print_axis r a; Format.flush_str_formatter()
116 query : [ [ p = path; `EOI -> p ]]
120 [ "//" ; l = slist -> AbsoluteDoS l ]
121 | [ "/" ; l = slist -> Absolute l ]
122 | [ l = slist -> Relative l ]
127 [ l = slist ;"/"; s = step -> s@l ]
128 | [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, TagSet.node,Expr True)]@l]
133 (* yurk, this is done to parse stuff like
134 a/b/descendant/a where descendant is actually a tag name :(
135 if OPT is None then this is a child::descendant if not, this is a real axis name
137 [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred ->
140 | Some(t) -> (axis,t,p)
141 | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p)
143 | Following -> [ (DescendantOrSelf,t,p);
144 (FollowingSibling,TagSet.star,Expr(True));
145 (Ancestor,TagSet.star,Expr(True)) ]
147 | Preceding -> [ (DescendantOrSelf,t,p);
148 (PrecedingSibling,TagSet.star,Expr(True));
149 (Ancestor,TagSet.star,Expr(True)) ]
154 | [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ]
155 | [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ]
156 | [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [
157 let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)]
159 | [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [
160 let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)]
162 | [ test = test; p = top_pred -> [(Child,test, p)] ]
163 | [ att = ATT ; p = top_pred ->
165 | "*" -> [(Attribute,TagSet.star,p)]
166 | _ -> [(Attribute, TagSet.singleton (Tag.tag att) ,p )]]
170 [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ]
174 [ "self" -> Self | "child" -> Child | "descendant" -> Descendant
175 | "descendant-or-self" -> DescendantOrSelf
176 | "ancestor-or-self" -> AncestorOrSelf
177 | "following-sibling" -> FollowingSibling
178 | "attribute" -> Attribute
180 | "ancestor" -> Ancestor
181 | "preceding-sibling" -> PrecedingSibling
182 | "preceding" -> Preceding
183 | "following" -> Following
189 [ s = KWD -> test_of_keyword s _loc ]
190 | [ t = TAG -> TagSet.singleton (Tag.tag t) ]
195 [ p = predicate; "or"; q = predicate -> Or(p,q) ]
196 | [ p = predicate; "and"; q = predicate -> And(p,q) ]
197 | [ "not" ; p = predicate -> Not p ]
198 | [ "("; p = predicate ;")" -> p ]
199 | [ e = expression -> Expr e ]
203 [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)]
204 | [ `INT(i) -> Int (i) ]
205 | [ s = STRING -> String s ]
206 | [ p = path -> Path p ]
207 | [ "("; e = expression ; ")" -> e ]
212 let parse_string = Gram.parse_string query (Ulexer.Loc.mk "<string>")
213 let parse = Gram.parse_string query (Ulexer.Loc.mk "<string>")
217 module Compile = struct
220 type config = { st_root : Ata.state; (* state matching the root element (initial state) *)
221 st_univ : Ata.state; (* universal state accepting anything *)
222 st_from_root : Ata.state; (* state chaining the root and the current position *)
223 mutable final_state : Ptset.t;
224 mutable has_backward: bool;
225 (* To store transitions *)
226 (* Key is the from state, (i,l) -> i the number of the step and l the list of trs *)
227 tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
228 tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
229 tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
230 mutable entry_points : (Tag.t*Ptset.t) list;
231 mutable contains : string option;
233 let dummy_conf = { st_root = -1;
236 final_state = Ptset.empty;
237 has_backward = false;
238 tr_parent_loop = Hashtbl.create 0;
239 tr = Hashtbl.create 0;
240 tr_aux = Hashtbl.create 0;
247 function (`Left|`Last) -> `Right
254 function (`Left|`Last) -> `Left
263 let add_trans num htr ((q,_,_,_,_) as tr) =
265 let (i,ltr) = Hashtbl.find htr q in
266 if List.exists (Ata.equal_trans tr) ltr
268 else Hashtbl.replace htr q (i,(tr::ltr))
270 | Not_found -> Hashtbl.add htr q (num,[tr])
272 exception Exit of Ata.state * Ata.transition list
273 let rec replace s f =
275 | Ata.Atom(_,b,q) when q = s -> if b then Ata.true_ else Ata.false_
276 | Ata.Or(f1,f2) -> (replace s f1) +| (replace s f2)
277 | Ata.And(f1,f2) -> (replace s f1) *& (replace s f2)
281 let or_self conf old_dst q_src q_dst dir test pred mark =
283 let (num,l) = Hashtbl.find conf.tr q_src in
284 let l2 = List.fold_left (fun acc (q,t,m,f,_) ->
288 (if mark then replace old_dst f else f)
290 (if mark then Ata.true_ else (_l dir) ** q_dst),
293 in Hashtbl.replace conf.tr q_src (num,l2)
297 let nst = Ata.mk_state
298 let att_or_str = TagSet.add Tag.pcdata TagSet.attribute
299 let vpush x y = (x,[]) :: y
302 | (z,r)::l -> (z,x::r) ::l
310 | (x,z::y) ::r -> z,(x,y)::r
313 let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num =
314 let ex = existential in
315 let axis,test,pred = step in
316 let is_last = dir = `Last in
317 let { st_root = q_root;
319 st_from_root = q_frm_root } = conf
321 let q_dst = Ata.mk_state() in
322 let p_st, p_anc, p_par, p_pre, p_num, p_f =
323 compile_pred conf q_src num ctx_path dir pred q_dst
325 let new_st,new_dst, new_ctx =
328 | Child | Descendant ->
330 if nrec then `LLeft,`RRight
334 let t1 = ?< q_src><(test, is_last && not(ex))>=>
335 p_f *& ( if false (*is_last*) then Ata.true_ else (_l left) ** q_dst) in
337 let _ = add_trans num conf.tr t1 in
340 let _ = if axis=Descendant then
341 add_trans num conf.tr_aux (
342 ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test
343 else TagSet.star),false,
344 `True )>=> `LLeft ** q_src )
347 ?< q_src><@ ((if ex then TagSet.diff TagSet.any test
348 else TagSet.any), false, `True )>=>
349 if ex then ( Ata.atom_ `Left false q_src) *& right ** q_src
350 else (if axis=Descendant then `RRight else `Right) ** q_src
352 let _ = add_trans num conf.tr_aux t3
355 (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path)
359 let q_dstreal = Ata.mk_state() in
360 (* attributes are always the first child *)
361 let t1 = ?< q_src><(TagSet.attribute,false)>=>
363 let t2 = ?< q_dst><(test, is_last && not(existential))>=>
364 if is_last then Ata.true_ else `Left ** q_dstreal in
365 let tsa = ?< q_dst><(TagSet.star, false)>=> `Right ** q_dst
367 add_trans num conf.tr t1;
368 add_trans num conf.tr_aux t2;
369 add_trans num conf.tr_aux tsa;
370 [q_dst;q_dstreal], q_dstreal,
373 | Ancestor | AncestorOrSelf ->
374 conf.has_backward <- true;
375 let up_states, new_ctx =
376 List.fold_left (fun acc (q,_) -> if q == q_root then acc else q::acc) [] ctx_path, (vpush q_root [])
378 let _ = if axis = AncestorOrSelf then
379 or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential));
381 let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states
383 let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=>
384 ( (*if is_last then Ata.true_ else *) (`LLeft ) ** q_dst) *& fc in
385 add_trans num conf.tr t1;
386 [q_dst ], q_dst, vpush q_frm_root new_ctx
389 conf.has_backward <- true;
392 | (a,_)::[] -> a, vpush q_root []
396 let t1 = ?< q_frm_root>< (test,is_last && (not existential)) >=>
397 (if is_last then Ata.true_ else (_l dir) ** q_dst) *& (_l dir) ** q_self in
398 add_trans num conf.tr t1;
399 [ q_dst ], q_dst, vpush q_frm_root new_ctx
403 (* todo change everything to Ptset *)
404 (Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)),
407 and is_rec = function
409 | ((axis,_,_),_)::_ ->
411 Descendant | Ancestor -> true
414 and compile_path ?(existential=false) annot_path config q_src states idx ctx_path =
416 (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) ->
417 let add_states,new_dst,new_ctx =
418 compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num
420 let new_states = Ptset.union (Ptset.from_list add_states) a_st in
421 let nanc_st,npar_st,npre_st,new_bw =
423 |PrecedingSibling,_,_ -> anc_st,par_st,Ptset.add a_dst pre_st,true
424 |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true
425 | _ -> anc_st,par_st,pre_st,has_backward
427 new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r)
429 (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false,(List.tl annot_path) )
432 and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst =
433 let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 =
434 compile_pred conf q_src idx ctx_path dir p1 ddst in
435 let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 =
436 compile_pred conf q_src idx1 ctx_path dir p2 ddst
438 Ptset.union a_st1 a_st2,
439 Ptset.union anc_st1 anc_st2,
440 Ptset.union par_st1 par_st2,
441 Ptset.union pre_st1 pre_st2,
444 and compile_pred conf q_src idx ctx_path dir pred qdst =
447 binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst
449 binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst
450 | Expr e -> compile_expr conf Ptset.empty q_src idx ctx_path dir e qdst
452 let a_st,anc_st,par_st,pre_st,idx,f =
453 compile_pred conf q_src idx ctx_path dir p qdst
454 in a_st,anc_st,par_st,pre_st,idx, Ata.not_ f
456 and compile_expr conf states q_src idx ctx_path dir e qdst =
459 let q = Ata.mk_state () in
460 let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in
461 let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ =
462 compile_path ~existential:true annot_path conf q states idx ctx_path
464 let ret_dir = match annot_path with
465 | ((FollowingSibling,_,_),_)::_ -> `Right
468 let _ = match annot_path with
469 | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ptset.add qdst conf.final_state
472 (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) ** q))
473 | True -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.true_
474 | False -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.false_
478 and dirannot = function
481 | p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l)
482 | p::l -> (p,`Left) :: (dirannot l)
488 | Relative(steps) -> steps
489 | AbsoluteDoS(steps) -> steps@[(DescendantOrSelf,TagSet.node,Expr(True))]
491 let steps = List.rev steps in
492 let dirsteps = dirannot steps in
493 let _ = Ata.mk_state() in
494 let config = { st_root = Ata.mk_state();
495 st_univ = Ata.mk_state();
496 final_state = Ptset.empty;
497 st_from_root = Ata.mk_state();
498 has_backward = false;
499 tr_parent_loop = Hashtbl.create 5;
500 tr = Hashtbl.create 5;
501 tr_aux = Hashtbl.create 5;
506 let q0 = Ata.mk_state() in
507 let states = Ptset.from_list [config.st_univ;config.st_root]
510 (* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root);
511 add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ);
512 add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ);
514 let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ =
515 compile_path dirsteps config q0 states 0 [(config.st_root,[]) ]
518 ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=>
519 ((if is_rec dirsteps then `LLeft else `Left)** q0) *& (if config.has_backward then `LLeft ** config.st_from_root else Ata.true_)
521 add_trans num config.tr fst_tr;
522 if config.has_backward then begin
523 add_trans num config.tr_aux
524 (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft ** config.st_from_root);
525 add_trans num config.tr_aux
526 (?< (config.st_from_root) >< (TagSet.any,false) >=>
527 `RRight ** config.st_from_root);
530 let phi = Hashtbl.create 37 in
531 let fadd = fun _ (_,l) -> List.iter (fun (s,t,m,f,p) ->
536 Hashtbl.replace phi s ((t,(m,f,p))::lt)
538 Hashtbl.iter (fadd) config.tr;
539 Hashtbl.iter (fadd) config.tr_aux;
540 Hashtbl.iter (fadd) config.tr_parent_loop;
542 let s = Ptset.union anc_st (Ptset.from_list [])
543 in if has_backward then Ptset.add config.st_from_root s else s
544 in { Ata.id = Oo.id (object end);
545 Ata.states = if has_backward then Ptset.add config.st_from_root a_st else a_st;
546 Ata.init = Ptset.singleton config.st_root;
547 Ata.final = Ptset.union anc_st config.final_state;
548 Ata.universal = Ptset.singleton a_dst;
550 Ata.delta = Hashtbl.create 17;
551 Ata.sigma = Ata.HTagSet.create 17;
552 },config.entry_points,!contains