X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xPath.ml;h=1c2c127d60a4692590b0bf9a81f0244dd2dd16f0;hb=ac8c1ac563a2c089f789eed5a03ff5b84a2c4fe0;hp=2ac43b76014dbb619772d7e1ac690868fe10d1bd;hpb=a6a05531716a001b369e018c9d2c87ce532ef163;p=SXSI%2Fxpathcomp.git diff --git a/xPath.ml b/xPath.ml index 2ac43b7..1c2c127 100644 --- a/xPath.ml +++ b/xPath.ml @@ -4,7 +4,7 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) -#load "pa_extend.cmo";; +#load "pa_extend.cmo";; let contains = ref None module Ast = struct @@ -13,12 +13,12 @@ struct and step = axis*test*predicate and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following - + and test = TagSet.t - + and predicate = Or of predicate*predicate | And of predicate*predicate - | Not of predicate + | Not of predicate | Expr of expression and expression = Path of path | Function of string*expression list @@ -26,31 +26,31 @@ struct | String of string | True | False type t = path - - - + + + let pp fmt = Format.fprintf fmt let print_list printer fmt sep l = match l with [] -> () | [e] -> printer fmt e | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es - - - let rec print fmt p = - let l = match p with - | Absolute l -> pp fmt "/"; l - | AbsoluteDoS l -> pp fmt "/"; + + + let rec print fmt p = + let l = match p with + | Absolute l -> pp fmt "/"; l + | AbsoluteDoS l -> pp fmt "/"; print_step fmt (DescendantOrSelf,TagSet.node,Expr True); pp fmt "/"; l - | Relative l -> l + | Relative l -> l in print_list print_step fmt "/" (List.rev l) and print_step fmt (axis,test,predicate) = print_axis fmt axis;pp fmt "::";print_test fmt test; pp fmt "["; print_predicate fmt predicate; pp fmt "]" - and print_axis fmt a = pp fmt "%s" (match a with + and print_axis fmt a = pp fmt "%s" (match a with Self -> "self" | Child -> "child" | Descendant -> "descendant" @@ -63,42 +63,42 @@ struct | Parent -> "parent" | _ -> assert false ) - and print_test fmt ts = - try - pp fmt "%s" (List.assoc ts + and print_test fmt ts = + try + pp fmt "%s" (List.assoc ts [ (TagSet.pcdata,"text()"); (TagSet.node,"node()"); (TagSet.star),"*"]) with Not_found -> pp fmt "%s" - (if TagSet.is_finite ts + (if TagSet.is_finite ts then Tag.to_string (TagSet.choose ts) else "") - + and print_predicate fmt = function | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q | Not p -> pp fmt "not "; print_predicate fmt p | Expr e -> print_expression fmt e - + and print_expression fmt = function | Path p -> print fmt p | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")" | Int i -> pp fmt "%i" i | String s -> pp fmt "\"%s\"" s | t -> pp fmt "%b" (t== True) - + end -module Parser = +module Parser = struct - open Ast + open Ast open Ulexer let predopt = function None -> Expr True | Some p -> p module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer) let query = Gram.Entry.mk "query" - + exception Error of Gram.Loc.t*string - let test_of_keyword t loc = + let test_of_keyword t loc = match t with | "text()" -> TagSet.pcdata | "node()" -> TagSet.node @@ -114,8 +114,8 @@ GLOBAL: query; query : [ [ p = path; `EOI -> p ]] ; - - path : [ + + path : [ [ "//" ; l = slist -> AbsoluteDoS l ] | [ "/" ; l = slist -> Absolute l ] | [ l = slist -> Relative l ] @@ -136,8 +136,8 @@ step : [ [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred -> let a,t,p = match o with - | Some(t) -> (axis,t,p) - | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p) + | Some(t) -> (axis,t,p) + | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p) in match a with | Following -> [ (DescendantOrSelf,t,p); (FollowingSibling,TagSet.star,Expr(True)); @@ -149,23 +149,23 @@ step : [ | _ -> [ a,t,p ] ] - + | [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ] | [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ] -| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [ +| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some((`CONTAINS,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] -| [ "equals"; "(" ; s = STRING ; ")";p=top_pred -> [ +| [ "equals"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some((`EQUALS,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] -| [ "startswith"; "(" ; s = STRING ; ")";p=top_pred -> [ +| [ "startswith"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some((`STARTSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] -| [ "endswith"; "(" ; s = STRING ; ")";p=top_pred -> [ +| [ "endswith"; "(" ; s = STRING ; ")";p=top_pred -> [ let _ = contains := Some((`ENDSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)] ] | [ test = test; p = top_pred -> [(Child,test, p)] ] -| [ att = ATT ; p = top_pred -> +| [ att = ATT ; p = top_pred -> match att with | "*" -> [(Attribute,TagSet.star,p)] | _ -> [(Attribute, TagSet.singleton (Tag.tag att) ,p )]] @@ -175,8 +175,8 @@ top_pred : [ [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ] ] ; -axis : [ - [ "self" -> Self | "child" -> Child | "descendant" -> Descendant +axis : [ + [ "self" -> Self | "child" -> Child | "descendant" -> Descendant | "descendant-or-self" -> DescendantOrSelf | "ancestor-or-self" -> AncestorOrSelf | "following-sibling" -> FollowingSibling @@ -188,15 +188,15 @@ axis : [ | "following" -> Following ] - + ]; -test : [ +test : [ [ s = KWD -> test_of_keyword s _loc ] | [ t = TAG -> TagSet.singleton (Tag.tag t) ] ]; -predicate: [ +predicate: [ [ p = predicate; "or"; q = predicate -> Or(p,q) ] | [ p = predicate; "and"; q = predicate -> And(p,q) ] | [ "not" ; p = predicate -> Not p ] @@ -216,7 +216,7 @@ END ;; let parse_string = Gram.parse_string query (Ulexer.Loc.mk "") let parse = Gram.parse_string query (Ulexer.Loc.mk "") -end +end module Compile = struct @@ -251,7 +251,7 @@ let dummy_conf = { st_root = -1; univ_states = []; starstate = None; } - + let _r = function (`Left|`Last) -> `Right @@ -260,7 +260,7 @@ let _r = | `LLeft -> `RRight -let _l = +let _l = function (`Left|`Last) -> `Left | `Right -> `Right | `RRight -> `RRight @@ -276,12 +276,12 @@ let add_trans num htr ((q,ts,_)as tr) = Hashtbl.add htr q (num,[tr]) let vpush x y = (x,[]) :: y -let hpush x y = +let hpush x y = match y with | (z,r)::l -> (z,x::r) ::l | _ -> assert false -let vpop = function +let vpop = function (x,_)::r -> x,r | _ -> assert false @@ -289,68 +289,68 @@ let hpop = function | (x,z::y) ::r -> z,(x,y)::r | _-> assert false -let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num = +let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num = let ex = existential in let axis,test,pred = step in let is_last = dir = `Last in let { st_root = q_root; - st_univ = q_univ; - st_from_root = q_frm_root } = conf + st_univ = q_univ; + st_from_root = q_frm_root } = conf in - let q_dst = Ata.State.make() in - let p_st, p_anc, p_par, p_pre, p_num, p_f = + let q_dst = Ata.State.make() in + let p_st, p_anc, p_par, p_pre, p_num, p_f = compile_pred conf q_src num ctx_path dir pred q_dst in - let new_st,new_dst, new_ctx = + let new_st,new_dst, new_ctx = match axis with | Child | Descendant -> if (TagSet.is_finite test) - then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points; + then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points; let left,right = if nrec then `LLeft,`RRight else `Left,`Right in let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star then conf.starstate <- Some(Ata.StateSet.singleton q_src) - in + in let t1,ldst = ?< q_src><(test, is_last && not(ex))>=> p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst), ( if is_last then [] else [q_dst]) in - - let _ = add_trans num conf.tr t1 in + + let _ = add_trans num conf.tr t1 in let _ = if axis=Descendant then add_trans num conf.tr_aux ( ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test - else TagSet.star),false)>=> + else TagSet.star),false)>=> (if TagSet.equal test TagSet.star then `Left else `LLeft) *+ q_src ) - in - let t3 = - ?< q_src><@ ((if ex then TagSet.diff TagSet.any test - else TagSet.any), false)>=> - (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then - `RRight else `Right) *+ q_src in - let _ = add_trans num conf.tr_aux t3 + let t3 = + ?< q_src><@ ((if ex then TagSet.diff TagSet.any test + else TagSet.any), false)>=> + (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then + `RRight else `Right) *+ q_src in - ldst, q_dst, + let _ = add_trans num conf.tr_aux t3 + in + ldst, q_dst, (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path) - - - | Attribute -> + + + | Attribute -> let q_dstreal = Ata.State.make() in (* attributes are always the first child *) - let t1 = ?< q_src><(TagSet.attribute,false)>=> + let t1 = ?< q_src><(TagSet.attribute,false)>=> `Left *+ q_dst in let t2 = ?< q_dst><(test, is_last && not(existential))>=> if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in - let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst + let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst in add_trans num conf.tr t1; add_trans num conf.tr_aux t2; add_trans num conf.tr_aux tsa; - [q_dst;q_dstreal], q_dstreal, + [q_dst;q_dstreal], q_dstreal, ctx_path @@ -362,19 +362,19 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num new_ctx) and is_rec = function [] -> false - | ((axis,_,_),_)::_ -> + | ((axis,_,_),_)::_ -> match axis with Descendant | Ancestor -> true | _ -> false - -and compile_path ?(existential=false) annot_path config q_src states idx ctx_path = - List.fold_left - (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) -> + +and compile_path ?(existential=false) annot_path config q_src states idx ctx_path = + List.fold_left + (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) -> let add_states,new_dst,new_ctx = compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num in let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in - let nanc_st,npar_st,npre_st,new_bw = + let nanc_st,npar_st,npre_st,new_bw = match step with |PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true @@ -384,11 +384,11 @@ and compile_path ?(existential=false) annot_path config q_src states idx ctx_pat ) (states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) ) annot_path - + and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 = compile_pred conf q_src idx ctx_path dir p1 ddst in - let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 = + let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 = compile_pred conf q_src idx1 ctx_path dir p2 ddst in Ata.StateSet.union a_st1 a_st2, @@ -397,26 +397,26 @@ and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = Ata.StateSet.union pre_st1 pre_st2, idx2, (f f1 f2) -and compile_pred conf q_src idx ctx_path dir pred qdst = +and compile_pred conf q_src idx ctx_path dir pred qdst = match pred with - | Or(p1,p2) -> + | Or(p1,p2) -> binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst - | And(p1,p2) -> + | And(p1,p2) -> binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst | Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst - | Not(p) -> - let a_st,anc_st,par_st,pre_st,idx,f = + | Not(p) -> + let a_st,anc_st,par_st,pre_st,idx,f = compile_pred conf q_src idx ctx_path dir p qdst in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f and compile_expr conf states q_src idx ctx_path dir e qdst = match e with - | Path (p) -> + | Path (p) -> let q = Ata.State.make () in let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in - let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = + let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = compile_path ~existential:true annot_path conf q states idx ctx_path - in + in let ret_dir = match annot_path with | ((FollowingSibling,_,_),_)::_ -> `Right | _ -> `Left @@ -438,9 +438,9 @@ and dirannot = function | p::l -> (p,`Left) :: (dirannot l) let compile ?(querystring="") path = - let steps = + let steps = match path with - | Absolute(steps) + | Absolute(steps) | Relative(steps) -> steps | AbsoluteDoS(steps) -> steps@[(DescendantOrSelf,TagSet.node,Expr(True))] in @@ -453,40 +453,40 @@ let compile ?(querystring="") path = has_backward = false; tr_parent_loop = Hashtbl.create 5; tr = Hashtbl.create 5; - tr_aux = Hashtbl.create 5; + tr_aux = Hashtbl.create 5; entry_points = []; contains = None; univ_states = []; starstate = None; - } + } in let q0 = Ata.State.make() in - let states = Ata.StateSet.from_list [config.st_univ;config.st_root] + let states = Ata.StateSet.from_list [config.st_univ;config.st_root] in let num = 0 in (* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root); add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ); add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ); *) - let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ = + let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ = compile_path dirsteps config q0 states 0 [(config.st_root,[]) ] in - let fst_tr = - ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> + let fst_tr = + ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> ((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_) in add_trans num config.tr fst_tr; if config.has_backward then begin - add_trans num config.tr_aux + add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root); - add_trans num config.tr_aux - (?< (config.st_from_root) >< (TagSet.any,false) >=> - `RRight *+ config.st_from_root); - - end; + add_trans num config.tr_aux + (?< (config.st_from_root) >< (TagSet.any,false) >=> + `RRight *+ config.st_from_root); + + end; let phi = Hashtbl.create 37 in - let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) -> - let lt = try + let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) -> + let lt = try Hashtbl.find phi s with Not_found -> [] in @@ -495,8 +495,8 @@ let compile ?(querystring="") path = Hashtbl.iter (fadd) config.tr; Hashtbl.iter (fadd) config.tr_aux; Hashtbl.iter (fadd) config.tr_parent_loop; - let final = - let s = anc_st + let final = + let s = anc_st in if has_backward then Ata.StateSet.add config.st_from_root s else s in { Ata.id = Oo.id (object end); Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty; @@ -505,6 +505,6 @@ let compile ?(querystring="") path = Ata.starstate = config.starstate; Ata.query_string = querystring; },config.entry_points,!contains - - + + end