X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xPath.ml;h=4d83634b101a08e5952193881cadeebd94bf98f0;hb=cea756c7adc49891004bfe455628010eb7a28bc9;hp=23c5fc6c3efe144ebeec762718c171f952f1b26a;hpb=83aa6cf8a120ea6681402ce42ae56631fca1ddf4;p=SXSI%2Fxpathcomp.git diff --git a/xPath.ml b/xPath.ml index 23c5fc6..4d83634 100644 --- a/xPath.ml +++ b/xPath.ml @@ -6,7 +6,7 @@ (******************************************************************************) INCLUDE "debug.ml";; #load "pa_extend.cmo";; - +let contains = ref None module Ast = struct (* The steps are in reverse order !!!! *) @@ -153,6 +153,12 @@ step : [ | [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ] | [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ] +| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)] + ] +| [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)] + ] | [ test = test; p = top_pred -> [(Child,test, p)] ] | [ att = ATT ; p = top_pred -> match att with @@ -221,6 +227,9 @@ type config = { st_root : Ata.state; (* state matching the root element (initial tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t; tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t; tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t; + mutable entry_points : (Tag.t*Ptset.t) list; + mutable contains : string option; + mutable univ_states : Ata.state list; } let dummy_conf = { st_root = -1; st_univ = -1; @@ -230,14 +239,24 @@ let dummy_conf = { st_root = -1; tr_parent_loop = Hashtbl.create 0; tr = Hashtbl.create 0; tr_aux = Hashtbl.create 0; + entry_points = []; + contains = None; + univ_states = []; } let _r = function (`Left|`Last) -> `Right | `Right -> `Left -let _l = function (`Left|`Last) -> `Left - | `Right -> `Right + | `RRight -> `LLeft + | `LLeft -> `RRight + + +let _l = + function (`Left|`Last) -> `Left + | `Right -> `Right + | `RRight -> `RRight + | `LLeft -> `LLeft open Ata.Transitions @@ -271,7 +290,7 @@ let or_self conf old_dst q_src q_dst dir test pred mark = (if mark then replace old_dst f else f) *& pred *& (if mark then Ata.true_ else (_l dir) ** q_dst), - `True)::acc) + false)::acc) l l in Hashtbl.replace conf.tr q_src (num,l2) with Not_found -> () @@ -293,7 +312,7 @@ let hpop = function | (x,z::y) ::r -> z,(x,y)::r | _-> assert false -let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = +let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num = let ex = existential in let axis,test,pred = step in let is_last = dir = `Last in @@ -305,54 +324,31 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = let p_st, p_anc, p_par, p_pre, p_num, p_f = compile_pred conf q_src num ctx_path dir pred q_dst in - let new_st,new_dst, new_ctx = match axis with - | Child | FollowingSibling | Descendant | DescendantOrSelf -> - let axis = - if axis = DescendantOrSelf - then - begin - or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); - Descendant - end - else axis + + | Child | Descendant -> + let left,right = + if nrec then `LLeft,`RRight + else `Left,`Right in + let t1 = ?< q_src><(test, is_last && not(ex))>=> - p_f *& (if is_last then Ata.true_ else (_l dir) ** q_dst) in + p_f *& ( if false (*is_last*) then Ata.true_ else (_l left) ** q_dst) in let _ = add_trans num conf.tr t1 in let _ = if axis=Descendant then add_trans num conf.tr_aux ( - ?< q_src><@ ((if ex then TagSet.diff TagSet.star test - else TagSet.star),false, - if TagSet.is_finite test - then `Left(fun t -> - if (Tree.Binary.is_node t) - then - let mytag = Tree.Binary.tag t in - TagSet.exists (fun tag -> - tag == mytag || - Tree.Binary.has_tagged_desc t tag - ) - test - else true - ) - - else `True )>=> `Left ** q_src ) + ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test + else TagSet.star),false)>=> `LLeft ** q_src ) in let t3 = ?< q_src><@ ((if ex then TagSet.diff TagSet.any test - else TagSet.any), false, - if axis=Descendant&&TagSet.is_finite test - then `True (*`Right(fun t -> - TagSet.exists (fun tag -> Tree.Binary.has_tagged_foll t tag) - test) *) - else `True )>=> - if ex then ( Ata.atom_ `Left false q_src) *& `Right ** q_src - else `Right ** q_src + else TagSet.any), false)>=> + if ex then right ** q_src + else (if axis=Descendant then `RRight else `Right) ** q_src in let _ = add_trans num conf.tr_aux t3 in @@ -378,7 +374,7 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = | Ancestor | AncestorOrSelf -> conf.has_backward <- true; let up_states, new_ctx = - List.map (fst) ctx_path, (vpush q_root []) + List.fold_left (fun acc (q,_) -> if q == q_root then acc else q::acc) [] ctx_path, (vpush q_root []) in let _ = if axis = AncestorOrSelf then or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); @@ -386,7 +382,7 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states in let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=> - (if is_last then Ata.true_ else (_l dir) ** q_dst) *& fc in + ( (*if is_last then Ata.true_ else *) (`LLeft ) ** q_dst) *& fc in add_trans num conf.tr t1; [q_dst ], q_dst, vpush q_frm_root new_ctx @@ -409,12 +405,18 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = (Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)), new_dst, new_ctx) - +and is_rec = function + [] -> false + | ((axis,_,_),_)::_ -> + match axis with + Descendant | Ancestor -> true + | _ -> false + and compile_path ?(existential=false) annot_path config q_src states idx ctx_path = List.fold_left - (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward) (step,dir) -> + (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) -> let add_states,new_dst,new_ctx = - compile_step ~existential:existential config a_dst dir ctx_path step num + compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num in let new_states = Ptset.union (Ptset.from_list add_states) a_st in let nanc_st,npar_st,npre_st,new_bw = @@ -423,11 +425,11 @@ and compile_path ?(existential=false) annot_path config q_src states idx ctx_pat |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true | _ -> anc_st,par_st,pre_st,has_backward in - new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw + new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r) ) - (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false ) + (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false,(List.tl annot_path) ) annot_path - + and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 = compile_pred conf q_src idx ctx_path dir p1 ddst in @@ -457,7 +459,7 @@ and compile_expr conf states q_src idx ctx_path dir e qdst = | Path (p) -> let q = Ata.mk_state () in let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in - let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward = + let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = compile_path ~existential:true annot_path conf q states idx ctx_path in let ret_dir = match annot_path with @@ -467,7 +469,7 @@ and compile_expr conf states q_src idx ctx_path dir e qdst = let _ = match annot_path with | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ptset.add qdst conf.final_state | _ -> () - in + in let _ = conf.univ_states <- a_dst::conf.univ_states in (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) ** q)) | True -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.true_ | False -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.false_ @@ -489,6 +491,7 @@ let compile path = in let steps = List.rev steps in let dirsteps = dirannot steps in + let _ = Ata.mk_state() in let config = { st_root = Ata.mk_state(); st_univ = Ata.mk_state(); final_state = Ptset.empty; @@ -497,6 +500,9 @@ let compile path = tr_parent_loop = Hashtbl.create 5; tr = Hashtbl.create 5; tr_aux = Hashtbl.create 5; + entry_points = []; + contains = None; + univ_states = []; } in let q0 = Ata.mk_state() in @@ -507,21 +513,20 @@ let compile path = add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ); add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ); *) - let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward = + let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ = compile_path dirsteps config q0 states 0 [(config.st_root,[]) ] in let fst_tr = - ?< (config.st_root) >< (TagSet.star,false) >=> - (`Left** q0) *& (if config.has_backward then `Left ** config.st_from_root else Ata.true_) + ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> + ((if is_rec dirsteps then `LLeft else `Left)** q0) *& (if config.has_backward then `LLeft ** config.st_from_root else Ata.true_) in add_trans num config.tr fst_tr; - if config.has_backward then begin + if config.has_backward then begin add_trans num config.tr_aux - (?< (config.st_from_root) >< (TagSet.star,false) >=> `Left ** config.st_from_root +| - `Right ** config.st_from_root); + (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft ** config.st_from_root); add_trans num config.tr_aux - (?< (config.st_from_root) >< (TagSet.cup TagSet.pcdata TagSet.attribute,false) >=> - `Right ** config.st_from_root); + (?< (config.st_from_root) >< (TagSet.any,false) >=> + `RRight ** config.st_from_root); end; let phi = Hashtbl.create 37 in @@ -542,11 +547,10 @@ let compile path = Ata.states = if has_backward then Ptset.add config.st_from_root a_st else a_st; Ata.init = Ptset.singleton config.st_root; Ata.final = Ptset.union anc_st config.final_state; - Ata.universal = Ptset.union anc_st config.final_state; + Ata.universal = Ptset.add a_dst (Ptset.from_list config.univ_states); Ata.phi = phi; - Ata.delta = Hashtbl.create 17; Ata.sigma = Ata.HTagSet.create 17; - },[] + },config.entry_points,!contains end