(******************************************************************************) (* SXSI : XPath evaluator *) (* Kim Nguyen (Kim.Nguyen@nicta.com.au) *) (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) #load "pa_extend.cmo";; let contains = ref None module Ast = struct (* The steps are in reverse order !!!! *) type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list and step = axis*test*predicate and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following and test = TagSet.t and predicate = Or of predicate*predicate | And of predicate*predicate | Not of predicate | Expr of expression and expression = Path of path | Function of string*expression list | Int of int | String of string | True | False type t = path let pp fmt = Format.fprintf fmt let print_list printer fmt sep l = match l with [] -> () | [e] -> printer fmt e | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es let rec print fmt p = let l = match p with | Absolute l -> pp fmt "/"; l | AbsoluteDoS l -> pp fmt "/"; print_step fmt (DescendantOrSelf,TagSet.node,Expr True); pp fmt "/"; l | Relative l -> l in print_list print_step fmt "/" (List.rev l) and print_step fmt (axis,test,predicate) = print_axis fmt axis;pp fmt "::";print_test fmt test; pp fmt "["; print_predicate fmt predicate; pp fmt "]" and print_axis fmt a = pp fmt "%s" (match a with Self -> "self" | Child -> "child" | Descendant -> "descendant" | DescendantOrSelf -> "descendant-or-self" | FollowingSibling -> "following-sibling" | Attribute -> "attribute" | Ancestor -> "ancestor" | AncestorOrSelf -> "ancestor-or-self" | PrecedingSibling -> "preceding-sibling" | Parent -> "parent" | _ -> assert false ) and print_test fmt ts = try pp fmt "%s" (List.assoc ts [ (TagSet.pcdata,"text()"); (TagSet.node,"node()"); (TagSet.star),"*"]) with Not_found -> pp fmt "%s" (if TagSet.is_finite ts then Tag.to_string (TagSet.choose ts) else "") and print_predicate fmt = function | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q | Not p -> pp fmt "not "; print_predicate fmt p | Expr e -> print_expression fmt e and print_expression fmt = function | Path p -> print fmt p | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")" | Int i -> pp fmt "%i" i | String s -> pp fmt "\"%s\"" s | t -> pp fmt "%b" (t== True) end module Parser = struct open Ast open Ulexer let predopt = function None -> Expr True | Some p -> p module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer) let query = Gram.Entry.mk "query" exception Error of Gram.Loc.t*string let test_of_keyword t loc = match t with | "text()" -> TagSet.pcdata | "node()" -> TagSet.node | "*" -> TagSet.star | "and" | "not" | "or" -> TagSet.singleton (Tag.tag t) | _ -> raise (Error(loc,"Invalid test name "^t )) let axis_to_string a = let r = Format.str_formatter in print_axis r a; Format.flush_str_formatter() EXTEND Gram GLOBAL: query; query : [ [ p = path; `EOI -> p ]] ; path : [ [ "//" ; l = slist -> AbsoluteDoS l ] | [ "/" ; l = slist -> Absolute l ] | [ l = slist -> Relative l ] ] ; slist: [ [ l = slist ;"/"; s = step -> s@l ] | [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, TagSet.node,Expr True)]@l] | [ s = step -> s ] ]; step : [ (* yurk, this is done to parse stuff like a/b/descendant/a where descendant is actually a tag name :( if OPT is None then this is a child::descendant if not, this is a real axis name *) [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred -> let a,t,p = match o with | Some(t) -> (axis,t,p) | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p) in match a with | Following -> [ (DescendantOrSelf,t,p); (FollowingSibling,TagSet.star,Expr(True)); (Ancestor,TagSet.star,Expr(True)) ] | Preceding -> [ (DescendantOrSelf,t,p); (PrecedingSibling,TagSet.star,Expr(True)); (Ancestor,TagSet.star,Expr(True)) ] | _ -> [ a,t,p ] ] | [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ] | [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ] | [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some((`CONTAINS,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] | [ "equals"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some((`EQUALS,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] | [ "startswith"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some((`STARTSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] | [ "endswith"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some((`ENDSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] | [ test = test; p = top_pred -> [(Child,test, p)] ] | [ att = ATT ; p = top_pred -> match att with | "*" -> [(Attribute,TagSet.star,p)] | _ -> [(Attribute, TagSet.singleton (Tag.tag att) ,p )]] ] ; top_pred : [ [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ] ] ; axis : [ [ "self" -> Self | "child" -> Child | "descendant" -> Descendant | "descendant-or-self" -> DescendantOrSelf | "ancestor-or-self" -> AncestorOrSelf | "following-sibling" -> FollowingSibling | "attribute" -> Attribute | "parent" -> Parent | "ancestor" -> Ancestor | "preceding-sibling" -> PrecedingSibling | "preceding" -> Preceding | "following" -> Following ] ]; test : [ [ s = KWD -> test_of_keyword s _loc ] | [ t = TAG -> TagSet.singleton (Tag.tag t) ] ]; predicate: [ [ p = predicate; "or"; q = predicate -> Or(p,q) ] | [ p = predicate; "and"; q = predicate -> And(p,q) ] | [ "not" ; p = predicate -> Not p ] | [ "("; p = predicate ;")" -> p ] | [ e = expression -> Expr e ] ]; expression: [ [ f = TAG; "("; args = LIST0 expression SEP "," ; ")" -> Function(f,args)] | [ `INT(i) -> Int (i) ] | [ s = STRING -> String s ] | [ p = path -> Path p ] | [ "("; e = expression ; ")" -> e ] ] ; END ;; let parse_string = Gram.parse_string query (Ulexer.Loc.mk "") let parse = Gram.parse_string query (Ulexer.Loc.mk "") end module Compile = struct open Ast type transition = Ata.State.t*TagSet.t*Ata.Transition.t type config = { st_root : Ata.State.t; (* state matching the root element (initial state) *) st_univ : Ata.State.t; (* universal state accepting anything *) st_from_root : Ata.State.t; (* state chaining the root and the current position *) mutable final_state : Ata.StateSet.t; mutable has_backward: bool; (* To store transitions *) (* Key is the from state, (i,l) -> i the number of the step and l the list of trs *) tr_parent_loop : (Ata.State.t,int*(transition list)) Hashtbl.t; tr : (Ata.State.t,int*(transition list)) Hashtbl.t; tr_aux : (Ata.State.t,int*(transition list)) Hashtbl.t; mutable entry_points : (Tag.t*Ata.StateSet.t) list; mutable contains : string option; mutable univ_states : Ata.State.t list; mutable starstate : Ata.StateSet.t option; } let dummy_conf = { st_root = -1; st_univ = -1; st_from_root = -1; final_state = Ata.StateSet.empty; has_backward = false; tr_parent_loop = Hashtbl.create 0; tr = Hashtbl.create 0; tr_aux = Hashtbl.create 0; entry_points = []; contains = None; univ_states = []; starstate = None; } let _r = function (`Left|`Last) -> `Right | `Right -> `Left | `RRight -> `LLeft | `LLeft -> `RRight let _l = function (`Left|`Last) -> `Left | `Right -> `Right | `RRight -> `RRight | `LLeft -> `LLeft open Ata.Transition.Infix open Ata.Formula.Infix (* Todo : fix *) let add_trans num htr ((q,ts,_)as tr) = Hashtbl.add htr q (num,[tr]) let vpush x y = (x,[]) :: y let hpush x y = match y with | (z,r)::l -> (z,x::r) ::l | _ -> assert false let vpop = function (x,_)::r -> x,r | _ -> assert false let hpop = function | (x,z::y) ::r -> z,(x,y)::r | _-> assert false let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num = let ex = existential in let axis,test,pred = step in let is_last = dir = `Last in let { st_root = q_root; st_univ = q_univ; st_from_root = q_frm_root } = conf in let q_dst = Ata.State.make() in let p_st, p_anc, p_par, p_pre, p_num, p_f = compile_pred conf q_src num ctx_path dir pred q_dst in let new_st,new_dst, new_ctx = match axis with | Child | Descendant -> if (TagSet.is_finite test) then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points; let left,right = if nrec then `LLeft,`RRight else `Left,`Right in let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star then conf.starstate <- Some(Ata.StateSet.singleton q_src) in let t1,ldst = ?< q_src><(test, is_last && not(ex))>=> p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst), ( if is_last then [] else [q_dst]) in let _ = add_trans num conf.tr t1 in let _ = if axis=Descendant then add_trans num conf.tr_aux ( ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test else TagSet.star),false)>=> (if TagSet.equal test TagSet.star then `Left else `LLeft) *+ q_src ) in let t3 = ?< q_src><@ ((if ex then TagSet.diff TagSet.any test else TagSet.any), false)>=> (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then `RRight else `Right) *+ q_src in let _ = add_trans num conf.tr_aux t3 in ldst, q_dst, (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path) | Attribute -> let q_dstreal = Ata.State.make() in (* attributes are always the first child *) let t1 = ?< q_src><(TagSet.attribute,false)>=> `Left *+ q_dst in let t2 = ?< q_dst><(test, is_last && not(existential))>=> if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst in add_trans num conf.tr t1; add_trans num conf.tr_aux t2; add_trans num conf.tr_aux tsa; [q_dst;q_dstreal], q_dstreal, ctx_path | _ -> assert false in (* todo change everything to Ata.StateSet *) (Ata.StateSet.elements (Ata.StateSet.union p_st (Ata.StateSet.from_list new_st)), new_dst, new_ctx) and is_rec = function [] -> false | ((axis,_,_),_)::_ -> match axis with Descendant | Ancestor -> true | _ -> false and compile_path ?(existential=false) annot_path config q_src states idx ctx_path = List.fold_left (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) -> let add_states,new_dst,new_ctx = compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num in let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in let nanc_st,npar_st,npre_st,new_bw = match step with |PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true | _ -> anc_st,par_st,pre_st,has_backward in new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r) ) (states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) ) annot_path and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 = compile_pred conf q_src idx ctx_path dir p1 ddst in let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 = compile_pred conf q_src idx1 ctx_path dir p2 ddst in Ata.StateSet.union a_st1 a_st2, Ata.StateSet.union anc_st1 anc_st2, Ata.StateSet.union par_st1 par_st2, Ata.StateSet.union pre_st1 pre_st2, idx2, (f f1 f2) and compile_pred conf q_src idx ctx_path dir pred qdst = match pred with | Or(p1,p2) -> binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst | And(p1,p2) -> binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst | Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst | Not(p) -> let a_st,anc_st,par_st,pre_st,idx,f = compile_pred conf q_src idx ctx_path dir p qdst in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f and compile_expr conf states q_src idx ctx_path dir e qdst = match e with | Path (p) -> let q = Ata.State.make () in let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = compile_path ~existential:true annot_path conf q states idx ctx_path in let ret_dir = match annot_path with | ((FollowingSibling,_,_),_)::_ -> `Right | _ -> `Left in let _ = match annot_path with | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ata.StateSet.add qdst conf.final_state | _ -> () in let _ = conf.univ_states <- a_dst::conf.univ_states in (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) *+ q)) | True -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.true_ | False -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.false_ | _ -> assert false and dirannot = function [] -> [] | [p] -> [p,`Last] | p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l) | p::l -> (p,`Left) :: (dirannot l) let compile ?(querystring="") path = let steps = match path with | Absolute(steps) | Relative(steps) -> steps | AbsoluteDoS(steps) -> steps@[(DescendantOrSelf,TagSet.node,Expr(True))] in let steps = List.rev steps in let dirsteps = dirannot steps in let config = { st_root = Ata.State.make(); st_univ = Ata.State.make(); final_state = Ata.StateSet.empty; st_from_root = Ata.State.make(); has_backward = false; tr_parent_loop = Hashtbl.create 5; tr = Hashtbl.create 5; tr_aux = Hashtbl.create 5; entry_points = []; contains = None; univ_states = []; starstate = None; } in let q0 = Ata.State.make() in let states = Ata.StateSet.from_list [config.st_univ;config.st_root] in let num = 0 in (* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root); add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ); add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ); *) let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ = compile_path dirsteps config q0 states 0 [(config.st_root,[]) ] in let fst_tr = ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> ((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_) in add_trans num config.tr fst_tr; if config.has_backward then begin add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root); add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.any,false) >=> `RRight *+ config.st_from_root); end; let phi = Hashtbl.create 37 in let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) -> let lt = try Hashtbl.find phi s with Not_found -> [] in Hashtbl.replace phi s ((t,tr)::lt) ) l in Hashtbl.iter (fadd) config.tr; Hashtbl.iter (fadd) config.tr_aux; Hashtbl.iter (fadd) config.tr_parent_loop; let final = let s = anc_st in if has_backward then Ata.StateSet.add config.st_from_root s else s in { Ata.id = Oo.id (object end); Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty; Ata.init = Ata.StateSet.singleton config.st_root; Ata.trans = phi; Ata.starstate = config.starstate; Ata.query_string = querystring; },config.entry_points,!contains end