X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xPath.ml;h=bef0336a344f1c477ba75173173385fe4fde8a06;hb=f0557f21878be17ddc75b1bc8f4f86da68c8e604;hp=4d83634b101a08e5952193881cadeebd94bf98f0;hpb=7489c542a7b7357a1c2bbc436d1d77c601833d3b;p=SXSI%2Fxpathcomp.git diff --git a/xPath.ml b/xPath.ml index 4d83634..bef0336 100644 --- a/xPath.ml +++ b/xPath.ml @@ -4,7 +4,6 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) -INCLUDE "debug.ml";; #load "pa_extend.cmo";; let contains = ref None module Ast = @@ -216,25 +215,27 @@ end module Compile = struct open Ast +type transition = Ata.State.t*TagSet.t*Ata.Transition.t -type config = { st_root : Ata.state; (* state matching the root element (initial state) *) - st_univ : Ata.state; (* universal state accepting anything *) - st_from_root : Ata.state; (* state chaining the root and the current position *) - mutable final_state : Ptset.t; +type config = { st_root : Ata.State.t; (* state matching the root element (initial state) *) + st_univ : Ata.State.t; (* universal state accepting anything *) + st_from_root : Ata.State.t; (* state chaining the root and the current position *) + mutable final_state : Ata.StateSet.t; mutable has_backward: bool; (* To store transitions *) (* Key is the from state, (i,l) -> i the number of the step and l the list of trs *) - tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t; - tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t; - tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t; - mutable entry_points : (Tag.t*Ptset.t) list; + tr_parent_loop : (Ata.State.t,int*(transition list)) Hashtbl.t; + tr : (Ata.State.t,int*(transition list)) Hashtbl.t; + tr_aux : (Ata.State.t,int*(transition list)) Hashtbl.t; + mutable entry_points : (Tag.t*Ata.StateSet.t) list; mutable contains : string option; - mutable univ_states : Ata.state list; + mutable univ_states : Ata.State.t list; + mutable starstate : Ata.StateSet.t option; } let dummy_conf = { st_root = -1; st_univ = -1; st_from_root = -1; - final_state = Ptset.empty; + final_state = Ata.StateSet.empty; has_backward = false; tr_parent_loop = Hashtbl.create 0; tr = Hashtbl.create 0; @@ -242,6 +243,7 @@ let dummy_conf = { st_root = -1; entry_points = []; contains = None; univ_states = []; + starstate = None; } @@ -259,45 +261,14 @@ let _l = | `LLeft -> `LLeft -open Ata.Transitions +open Ata.Transition.Infix +open Ata.Formula.Infix -let add_trans num htr ((q,_,_,_,_) as tr) = - try - let (i,ltr) = Hashtbl.find htr q in - if List.exists (Ata.equal_trans tr) ltr - then () - else Hashtbl.replace htr q (i,(tr::ltr)) - with - | Not_found -> Hashtbl.add htr q (num,[tr]) +(* Todo : fix *) +let add_trans num htr ((q,ts,_)as tr) = + Hashtbl.add htr q (num,[tr]) -exception Exit of Ata.state * Ata.transition list -let rec replace s f = - match f.Ata.pos with - | Ata.Atom(_,b,q) when q = s -> if b then Ata.true_ else Ata.false_ - | Ata.Or(f1,f2) -> (replace s f1) +| (replace s f2) - | Ata.And(f1,f2) -> (replace s f1) *& (replace s f2) - | _ -> f - - -let or_self conf old_dst q_src q_dst dir test pred mark = - try - let (num,l) = Hashtbl.find conf.tr q_src in - let l2 = List.fold_left (fun acc (q,t,m,f,_) -> - (q, - TagSet.cap t test, - mark, - (if mark then replace old_dst f else f) - *& pred *& - (if mark then Ata.true_ else (_l dir) ** q_dst), - false)::acc) - l l - in Hashtbl.replace conf.tr q_src (num,l2) - with Not_found -> () - - -let nst = Ata.mk_state -let att_or_str = TagSet.add Tag.pcdata TagSet.attribute let vpush x y = (x,[]) :: y let hpush x y = match y with @@ -320,50 +291,55 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num st_univ = q_univ; st_from_root = q_frm_root } = conf in - let q_dst = Ata.mk_state() in + let q_dst = Ata.State.make() in let p_st, p_anc, p_par, p_pre, p_num, p_f = compile_pred conf q_src num ctx_path dir pred q_dst in let new_st,new_dst, new_ctx = match axis with - | Child | Descendant -> + if (TagSet.is_finite test) + then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points; let left,right = if nrec then `LLeft,`RRight else `Left,`Right in - - let t1 = ?< q_src><(test, is_last && not(ex))>=> - p_f *& ( if false (*is_last*) then Ata.true_ else (_l left) ** q_dst) in + let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star + then conf.starstate <- Some(Ata.StateSet.singleton q_src) + in + let t1,ldst = ?< q_src><(test, is_last && not(ex))>=> + p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst), + ( if is_last then [] else [q_dst]) + in - let _ = add_trans num conf.tr t1 in - - + let _ = add_trans num conf.tr t1 in let _ = if axis=Descendant then add_trans num conf.tr_aux ( ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test - else TagSet.star),false)>=> `LLeft ** q_src ) + else TagSet.star),false)>=> + (if TagSet.equal test TagSet.star then + `Left else `LLeft) *+ q_src ) in let t3 = ?< q_src><@ ((if ex then TagSet.diff TagSet.any test else TagSet.any), false)>=> - if ex then right ** q_src - else (if axis=Descendant then `RRight else `Right) ** q_src + (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then + `RRight else `Right) *+ q_src in let _ = add_trans num conf.tr_aux t3 in - [q_dst], q_dst, + ldst, q_dst, (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path) | Attribute -> - let q_dstreal = Ata.mk_state() in + let q_dstreal = Ata.State.make() in (* attributes are always the first child *) let t1 = ?< q_src><(TagSet.attribute,false)>=> - `Left ** q_dst in + `Left *+ q_dst in let t2 = ?< q_dst><(test, is_last && not(existential))>=> - if is_last then Ata.true_ else `Left ** q_dstreal in - let tsa = ?< q_dst><(TagSet.star, false)>=> `Right ** q_dst + if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in + let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst in add_trans num conf.tr t1; add_trans num conf.tr_aux t2; @@ -371,38 +347,11 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num [q_dst;q_dstreal], q_dstreal, ctx_path - | Ancestor | AncestorOrSelf -> - conf.has_backward <- true; - let up_states, new_ctx = - List.fold_left (fun acc (q,_) -> if q == q_root then acc else q::acc) [] ctx_path, (vpush q_root []) - in - let _ = if axis = AncestorOrSelf then - or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); - in - let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states - in - let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=> - ( (*if is_last then Ata.true_ else *) (`LLeft ) ** q_dst) *& fc in - add_trans num conf.tr t1; - [q_dst ], q_dst, vpush q_frm_root new_ctx - - | Parent -> - conf.has_backward <- true; - let q_self,new_ctx = - match ctx_path with - | (a,_)::[] -> a, vpush q_root [] - | (a,_)::r -> a, r - | _ -> assert false - in - let t1 = ?< q_frm_root>< (test,is_last && (not existential)) >=> - (if is_last then Ata.true_ else (_l dir) ** q_dst) *& (_l dir) ** q_self in - add_trans num conf.tr t1; - [ q_dst ], q_dst, vpush q_frm_root new_ctx | _ -> assert false in - (* todo change everything to Ptset *) - (Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)), + (* todo change everything to Ata.StateSet *) + (Ata.StateSet.elements (Ata.StateSet.union p_st (Ata.StateSet.from_list new_st)), new_dst, new_ctx) and is_rec = function @@ -418,16 +367,16 @@ and compile_path ?(existential=false) annot_path config q_src states idx ctx_pat let add_states,new_dst,new_ctx = compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num in - let new_states = Ptset.union (Ptset.from_list add_states) a_st in + let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in let nanc_st,npar_st,npre_st,new_bw = match step with - |PrecedingSibling,_,_ -> anc_st,par_st,Ptset.add a_dst pre_st,true - |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true + |PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true + |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true | _ -> anc_st,par_st,pre_st,has_backward in new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r) ) - (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false,(List.tl annot_path) ) + (states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) ) annot_path and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = @@ -436,10 +385,10 @@ and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 = compile_pred conf q_src idx1 ctx_path dir p2 ddst in - Ptset.union a_st1 a_st2, - Ptset.union anc_st1 anc_st2, - Ptset.union par_st1 par_st2, - Ptset.union pre_st1 pre_st2, + Ata.StateSet.union a_st1 a_st2, + Ata.StateSet.union anc_st1 anc_st2, + Ata.StateSet.union par_st1 par_st2, + Ata.StateSet.union pre_st1 pre_st2, idx2, (f f1 f2) and compile_pred conf q_src idx ctx_path dir pred qdst = @@ -448,16 +397,16 @@ and compile_pred conf q_src idx ctx_path dir pred qdst = binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst | And(p1,p2) -> binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst - | Expr e -> compile_expr conf Ptset.empty q_src idx ctx_path dir e qdst + | Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst | Not(p) -> let a_st,anc_st,par_st,pre_st,idx,f = compile_pred conf q_src idx ctx_path dir p qdst - in a_st,anc_st,par_st,pre_st,idx, Ata.not_ f + in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f and compile_expr conf states q_src idx ctx_path dir e qdst = match e with | Path (p) -> - let q = Ata.mk_state () in + let q = Ata.State.make () in let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = compile_path ~existential:true annot_path conf q states idx ctx_path @@ -467,12 +416,12 @@ and compile_expr conf states q_src idx ctx_path dir e qdst = | _ -> `Left in let _ = match annot_path with - | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ptset.add qdst conf.final_state + | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ata.StateSet.add qdst conf.final_state | _ -> () in let _ = conf.univ_states <- a_dst::conf.univ_states in - (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) ** q)) - | True -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.true_ - | False -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.false_ + (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) *+ q)) + | True -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.true_ + | False -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.false_ | _ -> assert false @@ -482,7 +431,7 @@ and dirannot = function | p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l) | p::l -> (p,`Left) :: (dirannot l) -let compile path = +let compile ?(querystring="") path = let steps = match path with | Absolute(steps) @@ -491,11 +440,10 @@ let compile path = in let steps = List.rev steps in let dirsteps = dirannot steps in - let _ = Ata.mk_state() in - let config = { st_root = Ata.mk_state(); - st_univ = Ata.mk_state(); - final_state = Ptset.empty; - st_from_root = Ata.mk_state(); + let config = { st_root = Ata.State.make(); + st_univ = Ata.State.make(); + final_state = Ata.StateSet.empty; + st_from_root = Ata.State.make(); has_backward = false; tr_parent_loop = Hashtbl.create 5; tr = Hashtbl.create 5; @@ -503,10 +451,11 @@ let compile path = entry_points = []; contains = None; univ_states = []; + starstate = None; } in - let q0 = Ata.mk_state() in - let states = Ptset.from_list [config.st_univ;config.st_root] + let q0 = Ata.State.make() in + let states = Ata.StateSet.from_list [config.st_univ;config.st_root] in let num = 0 in (* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root); @@ -518,38 +467,37 @@ let compile path = in let fst_tr = ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> - ((if is_rec dirsteps then `LLeft else `Left)** q0) *& (if config.has_backward then `LLeft ** config.st_from_root else Ata.true_) + ((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_) in add_trans num config.tr fst_tr; if config.has_backward then begin add_trans num config.tr_aux - (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft ** config.st_from_root); + (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root); add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.any,false) >=> - `RRight ** config.st_from_root); + `RRight *+ config.st_from_root); end; let phi = Hashtbl.create 37 in - let fadd = fun _ (_,l) -> List.iter (fun (s,t,m,f,p) -> + let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) -> let lt = try Hashtbl.find phi s - with Not_found -> [] + with Not_found -> [] in - Hashtbl.replace phi s ((t,(m,f,p))::lt) + Hashtbl.replace phi s ((t,tr)::lt) ) l in Hashtbl.iter (fadd) config.tr; Hashtbl.iter (fadd) config.tr_aux; Hashtbl.iter (fadd) config.tr_parent_loop; let final = - let s = Ptset.union anc_st (Ptset.from_list []) - in if has_backward then Ptset.add config.st_from_root s else s + let s = anc_st + in if has_backward then Ata.StateSet.add config.st_from_root s else s in { Ata.id = Oo.id (object end); - Ata.states = if has_backward then Ptset.add config.st_from_root a_st else a_st; - Ata.init = Ptset.singleton config.st_root; - Ata.final = Ptset.union anc_st config.final_state; - Ata.universal = Ptset.add a_dst (Ptset.from_list config.univ_states); - Ata.phi = phi; - Ata.sigma = Ata.HTagSet.create 17; + Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty; + Ata.init = Ata.StateSet.singleton config.st_root; + Ata.trans = phi; + Ata.starstate = config.starstate; + Ata.query_string = querystring; },config.entry_points,!contains