(******************************************************************************) (* SXSI : XPath evaluator *) (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) INCLUDE "debug.ml";; #load "pa_extend.cmo";; let contains = ref None module Ast = struct (* The steps are in reverse order !!!! *) type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list and step = axis*test*predicate and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following and test = TagSet.t and predicate = Or of predicate*predicate | And of predicate*predicate | Not of predicate | Expr of expression and expression = Path of path | Function of string*expression list | Int of int | String of string | True | False type t = path let pp fmt = Format.fprintf fmt let print_list printer fmt sep l = match l with [] -> () | [e] -> printer fmt e | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es let rec print fmt p = let l = match p with | Absolute l -> pp fmt "/"; l | AbsoluteDoS l -> pp fmt "/"; print_step fmt (DescendantOrSelf,TagSet.node,Expr True); pp fmt "/"; l | Relative l -> l in print_list print_step fmt "/" (List.rev l) and print_step fmt (axis,test,predicate) = print_axis fmt axis;pp fmt "::";print_test fmt test; pp fmt "["; print_predicate fmt predicate; pp fmt "]" and print_axis fmt a = pp fmt "%s" (match a with Self -> "self" | Child -> "child" | Descendant -> "descendant" | DescendantOrSelf -> "descendant-or-self" | FollowingSibling -> "following-sibling" | Attribute -> "attribute" | Ancestor -> "ancestor" | AncestorOrSelf -> "ancestor-or-self" | PrecedingSibling -> "preceding-sibling" | Parent -> "parent" | _ -> assert false ) and print_test fmt ts = try pp fmt "%s" (List.assoc ts [ (TagSet.pcdata,"text()"); (TagSet.node,"node()"); (TagSet.star),"*"]) with Not_found -> pp fmt "%s" (if TagSet.is_finite ts then Tag.to_string (TagSet.choose ts) else "") and print_predicate fmt = function | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q | Not p -> pp fmt "not "; print_predicate fmt p | Expr e -> print_expression fmt e and print_expression fmt = function | Path p -> print fmt p | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")" | Int i -> pp fmt "%i" i | String s -> pp fmt "\"%s\"" s | t -> pp fmt "%b" (t== True) end module Parser = struct open Ast open Ulexer let predopt = function None -> Expr True | Some p -> p module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer) let query = Gram.Entry.mk "query" exception Error of Gram.Loc.t*string let test_of_keyword t loc = match t with | "text()" -> TagSet.pcdata | "node()" -> TagSet.node | "*" -> TagSet.star | "and" | "not" | "or" -> TagSet.singleton (Tag.tag t) | _ -> raise (Error(loc,"Invalid test name "^t )) let axis_to_string a = let r = Format.str_formatter in print_axis r a; Format.flush_str_formatter() EXTEND Gram GLOBAL: query; query : [ [ p = path; `EOI -> p ]] ; path : [ [ "//" ; l = slist -> AbsoluteDoS l ] | [ "/" ; l = slist -> Absolute l ] | [ l = slist -> Relative l ] ] ; slist: [ [ l = slist ;"/"; s = step -> s@l ] | [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, TagSet.node,Expr True)]@l] | [ s = step -> s ] ]; step : [ (* yurk, this is done to parse stuff like a/b/descendant/a where descendant is actually a tag name :( if OPT is None then this is a child::descendant if not, this is a real axis name *) [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred -> let a,t,p = match o with | Some(t) -> (axis,t,p) | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p) in match a with | Following -> [ (DescendantOrSelf,t,p); (FollowingSibling,TagSet.star,Expr(True)); (Ancestor,TagSet.star,Expr(True)) ] | Preceding -> [ (DescendantOrSelf,t,p); (PrecedingSibling,TagSet.star,Expr(True)); (Ancestor,TagSet.star,Expr(True)) ] | _ -> [ a,t,p ] ] | [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ] | [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ] | [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)] ] | [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)] ] | [ test = test; p = top_pred -> [(Child,test, p)] ] | [ att = ATT ; p = top_pred -> match att with | "*" -> [(Attribute,TagSet.star,p)] | _ -> [(Attribute, TagSet.singleton (Tag.tag att) ,p )]] ] ; top_pred : [ [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ] ] ; axis : [ [ "self" -> Self | "child" -> Child | "descendant" -> Descendant | "descendant-or-self" -> DescendantOrSelf | "ancestor-or-self" -> AncestorOrSelf | "following-sibling" -> FollowingSibling | "attribute" -> Attribute | "parent" -> Parent | "ancestor" -> Ancestor | "preceding-sibling" -> PrecedingSibling | "preceding" -> Preceding | "following" -> Following ] ]; test : [ [ s = KWD -> test_of_keyword s _loc ] | [ t = TAG -> TagSet.singleton (Tag.tag t) ] ]; predicate: [ [ p = predicate; "or"; q = predicate -> Or(p,q) ] | [ p = predicate; "and"; q = predicate -> And(p,q) ] | [ "not" ; p = predicate -> Not p ] | [ "("; p = predicate ;")" -> p ] | [ e = expression -> Expr e ] ]; expression: [ [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)] | [ `INT(i) -> Int (i) ] | [ s = STRING -> String s ] | [ p = path -> Path p ] | [ "("; e = expression ; ")" -> e ] ] ; END ;; let parse_string = Gram.parse_string query (Ulexer.Loc.mk "") let parse = Gram.parse_string query (Ulexer.Loc.mk "") end module Compile = struct open Ast type config = { st_root : Ata.state; (* state matching the root element (initial state) *) st_univ : Ata.state; (* universal state accepting anything *) st_from_root : Ata.state; (* state chaining the root and the current position *) mutable final_state : Ptset.t; mutable has_backward: bool; (* To store transitions *) (* Key is the from state, (i,l) -> i the number of the step and l the list of trs *) tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t; tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t; tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t; mutable entry_points : (Tag.t*Ptset.t) list; mutable contains : string option; mutable univ_states : Ata.state list; mutable starstate : Ptset.t option; } let dummy_conf = { st_root = -1; st_univ = -1; st_from_root = -1; final_state = Ptset.empty; has_backward = false; tr_parent_loop = Hashtbl.create 0; tr = Hashtbl.create 0; tr_aux = Hashtbl.create 0; entry_points = []; contains = None; univ_states = []; starstate = None; } let _r = function (`Left|`Last) -> `Right | `Right -> `Left | `RRight -> `LLeft | `LLeft -> `RRight let _l = function (`Left|`Last) -> `Left | `Right -> `Right | `RRight -> `RRight | `LLeft -> `LLeft open Ata.Transitions let add_trans num htr ((q,_,_,_,_) as tr) = try let (i,ltr) = Hashtbl.find htr q in if List.exists (Ata.equal_trans tr) ltr then () else Hashtbl.replace htr q (i,(tr::ltr)) with | Not_found -> Hashtbl.add htr q (num,[tr]) exception Exit of Ata.state * Ata.transition list let rec replace s f = match f.Ata.pos with | Ata.Atom(_,b,q) when q = s -> if b then Ata.true_ else Ata.false_ | Ata.Or(f1,f2) -> (replace s f1) +| (replace s f2) | Ata.And(f1,f2) -> (replace s f1) *& (replace s f2) | _ -> f let or_self conf old_dst q_src q_dst dir test pred mark = try let (num,l) = Hashtbl.find conf.tr q_src in let l2 = List.fold_left (fun acc (q,t,m,f,_) -> (q, TagSet.cap t test, mark, (if mark then replace old_dst f else f) *& pred *& (if mark then Ata.true_ else (_l dir) ** q_dst), false)::acc) l l in Hashtbl.replace conf.tr q_src (num,l2) with Not_found -> () let nst = Ata.mk_state let att_or_str = TagSet.add Tag.pcdata TagSet.attribute let vpush x y = (x,[]) :: y let hpush x y = match y with | (z,r)::l -> (z,x::r) ::l | _ -> assert false let vpop = function (x,_)::r -> x,r | _ -> assert false let hpop = function | (x,z::y) ::r -> z,(x,y)::r | _-> assert false let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num = let ex = existential in let axis,test,pred = step in let is_last = dir = `Last in let { st_root = q_root; st_univ = q_univ; st_from_root = q_frm_root } = conf in let q_dst = Ata.mk_state() in let p_st, p_anc, p_par, p_pre, p_num, p_f = compile_pred conf q_src num ctx_path dir pred q_dst in let new_st,new_dst, new_ctx = match axis with | Child | Descendant -> if (TagSet.is_finite test) then conf.entry_points <- (TagSet.choose test,Ptset.singleton q_src)::conf.entry_points; let left,right = if nrec then `LLeft,`RRight else `Left,`Right in let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star then conf.starstate <- Some(Ptset.singleton q_src) in let t1 = ?< q_src><(test, is_last && not(ex))>=> p_f *& ( if is_last then Ata.true_ else (_l left) ** q_dst) in let _ = add_trans num conf.tr t1 in let _ = if axis=Descendant then add_trans num conf.tr_aux ( ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test else TagSet.star),false)>=> `LLeft ** q_src ) in let t3 = ?< q_src><@ ((if ex then TagSet.diff TagSet.any test else TagSet.any), false)>=> if ex then right ** q_src else (if axis=Descendant then `RRight else `Right) ** q_src in let _ = add_trans num conf.tr_aux t3 in [q_dst], q_dst, (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path) | Attribute -> let q_dstreal = Ata.mk_state() in (* attributes are always the first child *) let t1 = ?< q_src><(TagSet.attribute,false)>=> `Left ** q_dst in let t2 = ?< q_dst><(test, is_last && not(existential))>=> if is_last then Ata.true_ else `Left ** q_dstreal in let tsa = ?< q_dst><(TagSet.star, false)>=> `Right ** q_dst in add_trans num conf.tr t1; add_trans num conf.tr_aux t2; add_trans num conf.tr_aux tsa; [q_dst;q_dstreal], q_dstreal, ctx_path | Ancestor | AncestorOrSelf -> conf.has_backward <- true; let up_states, new_ctx = List.fold_left (fun acc (q,_) -> if q == q_root then acc else q::acc) [] ctx_path, (vpush q_root []) in let _ = if axis = AncestorOrSelf then or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); in let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states in let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=> ( (*if is_last then Ata.true_ else *) (`LLeft ) ** q_dst) *& fc in add_trans num conf.tr t1; [q_dst ], q_dst, vpush q_frm_root new_ctx | Parent -> conf.has_backward <- true; let q_self,new_ctx = match ctx_path with | (a,_)::[] -> a, vpush q_root [] | (a,_)::r -> a, r | _ -> assert false in let t1 = ?< q_frm_root>< (test,is_last && (not existential)) >=> (if is_last then Ata.true_ else (_l dir) ** q_dst) *& (_l dir) ** q_self in add_trans num conf.tr t1; [ q_dst ], q_dst, vpush q_frm_root new_ctx | _ -> assert false in (* todo change everything to Ptset *) (Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)), new_dst, new_ctx) and is_rec = function [] -> false | ((axis,_,_),_)::_ -> match axis with Descendant | Ancestor -> true | _ -> false and compile_path ?(existential=false) annot_path config q_src states idx ctx_path = List.fold_left (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) -> let add_states,new_dst,new_ctx = compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num in let new_states = Ptset.union (Ptset.from_list add_states) a_st in let nanc_st,npar_st,npre_st,new_bw = match step with |PrecedingSibling,_,_ -> anc_st,par_st,Ptset.add a_dst pre_st,true |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true | _ -> anc_st,par_st,pre_st,has_backward in new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r) ) (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false,(List.tl annot_path) ) annot_path and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 = compile_pred conf q_src idx ctx_path dir p1 ddst in let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 = compile_pred conf q_src idx1 ctx_path dir p2 ddst in Ptset.union a_st1 a_st2, Ptset.union anc_st1 anc_st2, Ptset.union par_st1 par_st2, Ptset.union pre_st1 pre_st2, idx2, (f f1 f2) and compile_pred conf q_src idx ctx_path dir pred qdst = match pred with | Or(p1,p2) -> binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst | And(p1,p2) -> binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst | Expr e -> compile_expr conf Ptset.empty q_src idx ctx_path dir e qdst | Not(p) -> let a_st,anc_st,par_st,pre_st,idx,f = compile_pred conf q_src idx ctx_path dir p qdst in a_st,anc_st,par_st,pre_st,idx, Ata.not_ f and compile_expr conf states q_src idx ctx_path dir e qdst = match e with | Path (p) -> let q = Ata.mk_state () in let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = compile_path ~existential:true annot_path conf q states idx ctx_path in let ret_dir = match annot_path with | ((FollowingSibling,_,_),_)::_ -> `Right | _ -> `Left in let _ = match annot_path with | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ptset.add qdst conf.final_state | _ -> () in let _ = conf.univ_states <- a_dst::conf.univ_states in (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) ** q)) | True -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.true_ | False -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.false_ | _ -> assert false and dirannot = function [] -> [] | [p] -> [p,`Last] | p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l) | p::l -> (p,`Left) :: (dirannot l) let compile path = let steps = match path with | Absolute(steps) | Relative(steps) -> steps | AbsoluteDoS(steps) -> steps@[(DescendantOrSelf,TagSet.node,Expr(True))] in let steps = List.rev steps in let dirsteps = dirannot steps in let _ = Ata.mk_state() in let config = { st_root = Ata.mk_state(); st_univ = Ata.mk_state(); final_state = Ptset.empty; st_from_root = Ata.mk_state(); has_backward = false; tr_parent_loop = Hashtbl.create 5; tr = Hashtbl.create 5; tr_aux = Hashtbl.create 5; entry_points = []; contains = None; univ_states = []; starstate = None; } in let q0 = Ata.mk_state() in let states = Ptset.from_list [config.st_univ;config.st_root] in let num = 0 in (* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root); add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ); add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ); *) let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ = compile_path dirsteps config q0 states 0 [(config.st_root,[]) ] in let fst_tr = ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> ((if is_rec dirsteps then `LLeft else `Left)** q0) *& (if config.has_backward then `LLeft ** config.st_from_root else Ata.true_) in add_trans num config.tr fst_tr; if config.has_backward then begin add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft ** config.st_from_root); add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.any,false) >=> `RRight ** config.st_from_root); end; let phi = Hashtbl.create 37 in let fadd = fun _ (_,l) -> List.iter (fun (s,t,m,f,p) -> let lt = try Hashtbl.find phi s with Not_found -> [] in Hashtbl.replace phi s ((t,(m,f,p))::lt) ) l in Hashtbl.iter (fadd) config.tr; Hashtbl.iter (fadd) config.tr_aux; Hashtbl.iter (fadd) config.tr_parent_loop; let final = let s = Ptset.union anc_st (Ptset.from_list []) in if has_backward then Ptset.add config.st_from_root s else s in { Ata.id = Oo.id (object end); Ata.states = Hashtbl.fold (fun q _ acc -> Ptset.add q acc) phi Ptset.empty; Ata.init = Ptset.singleton config.st_root; Ata.final = Ptset.union anc_st config.final_state; Ata.universal = Ptset.add a_dst (Ptset.from_list config.univ_states); Ata.phi = phi; Ata.sigma = Hashtbl.create 17; Ata.starstate = config.starstate; },config.entry_points,!contains end