(* Copyright NICTA 2008 *)
(* Distributed under the terms of the LGPL (see LICENCE) *)
(******************************************************************************)
-INCLUDE "debug.ml";;
#load "pa_extend.cmo";;
-
+let contains = ref None
module Ast =
struct
(* The steps are in reverse order !!!! *)
| [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ]
| [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ]
+| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [
+ let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)]
+ ]
+| [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [
+ let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)]
+ ]
| [ test = test; p = top_pred -> [(Child,test, p)] ]
| [ att = ATT ; p = top_pred ->
match att with
module Compile = struct
open Ast
+type transition = Ata.State.t*TagSet.t*Ata.Transition.t
-type config = { st_root : Ata.state; (* state matching the root element (initial state) *)
- st_univ : Ata.state; (* universal state accepting anything *)
- st_from_root : Ata.state; (* state chaining the root and the current position *)
- mutable final_state : Ptset.t;
+type config = { st_root : Ata.State.t; (* state matching the root element (initial state) *)
+ st_univ : Ata.State.t; (* universal state accepting anything *)
+ st_from_root : Ata.State.t; (* state chaining the root and the current position *)
+ mutable final_state : Ata.StateSet.t;
mutable has_backward: bool;
(* To store transitions *)
(* Key is the from state, (i,l) -> i the number of the step and l the list of trs *)
- tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
- tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
- tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
+ tr_parent_loop : (Ata.State.t,int*(transition list)) Hashtbl.t;
+ tr : (Ata.State.t,int*(transition list)) Hashtbl.t;
+ tr_aux : (Ata.State.t,int*(transition list)) Hashtbl.t;
+ mutable entry_points : (Tag.t*Ata.StateSet.t) list;
+ mutable contains : string option;
+ mutable univ_states : Ata.State.t list;
+ mutable starstate : Ata.StateSet.t option;
}
let dummy_conf = { st_root = -1;
st_univ = -1;
st_from_root = -1;
- final_state = Ptset.empty;
+ final_state = Ata.StateSet.empty;
has_backward = false;
tr_parent_loop = Hashtbl.create 0;
tr = Hashtbl.create 0;
tr_aux = Hashtbl.create 0;
+ entry_points = [];
+ contains = None;
+ univ_states = [];
+ starstate = None;
}
let _r =
function (`Left|`Last) -> `Right
| `Right -> `Left
-let _l = function (`Left|`Last) -> `Left
- | `Right -> `Right
+ | `RRight -> `LLeft
+ | `LLeft -> `RRight
-open Ata.Transitions
+let _l =
+ function (`Left|`Last) -> `Left
+ | `Right -> `Right
+ | `RRight -> `RRight
+ | `LLeft -> `LLeft
-let add_trans num htr ((q,_,_,_,_) as tr) =
- try
- let (i,ltr) = Hashtbl.find htr q in
- if List.exists (Ata.equal_trans tr) ltr
- then ()
- else Hashtbl.replace htr q (i,(tr::ltr))
- with
- | Not_found -> Hashtbl.add htr q (num,[tr])
+open Ata.Transition.Infix
+open Ata.Formula.Infix
-exception Exit of Ata.state * Ata.transition list
-let rec replace s f =
- match f.Ata.pos with
- | Ata.Atom(_,b,q) when q = s -> if b then Ata.true_ else Ata.false_
- | Ata.Or(f1,f2) -> (replace s f1) +| (replace s f2)
- | Ata.And(f1,f2) -> (replace s f1) *& (replace s f2)
- | _ -> f
-
-let or_self conf old_dst q_src q_dst dir test pred mark =
- try
- let (num,l) = Hashtbl.find conf.tr q_src in
- let l2 = List.fold_left (fun acc (q,t,m,f,_) ->
- (q,
- TagSet.cap t test,
- mark,
- (if mark then replace old_dst f else f)
- *& pred *&
- (if mark then Ata.true_ else (_l dir) ** q_dst),
- `True)::acc)
- l l
- in Hashtbl.replace conf.tr q_src (num,l2)
- with Not_found -> ()
-
-
-let nst = Ata.mk_state
-let att_or_str = TagSet.add Tag.pcdata TagSet.attribute
+(* Todo : fix *)
+let add_trans num htr ((q,ts,_)as tr) =
+ Hashtbl.add htr q (num,[tr])
+
let vpush x y = (x,[]) :: y
let hpush x y =
match y with
| (x,z::y) ::r -> z,(x,y)::r
| _-> assert false
-let rec compile_step ?(existential=false) conf q_src dir ctx_path step num =
+let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num =
let ex = existential in
let axis,test,pred = step in
let is_last = dir = `Last in
st_univ = q_univ;
st_from_root = q_frm_root } = conf
in
- let q_dst = Ata.mk_state() in
+ let q_dst = Ata.State.make() in
let p_st, p_anc, p_par, p_pre, p_num, p_f =
compile_pred conf q_src num ctx_path dir pred q_dst
in
-
let new_st,new_dst, new_ctx =
match axis with
- | Child | FollowingSibling | Descendant | DescendantOrSelf ->
- let axis =
- if axis = DescendantOrSelf
- then
- begin
- or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential));
- Descendant
- end
- else axis
+ | Child | Descendant ->
+ if (TagSet.is_finite test)
+ then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points;
+ let left,right =
+ if nrec then `LLeft,`RRight
+ else `Left,`Right
+ in
+ let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star
+ then conf.starstate <- Some(Ata.StateSet.singleton q_src)
+ in
+ let t1,ldst = ?< q_src><(test, is_last && not(ex))>=>
+ p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst),
+ ( if is_last then [] else [q_dst])
in
- let t1 = ?< q_src><(test, is_last && not(ex))>=>
- p_f *& (if is_last then Ata.true_ else (_l dir) ** q_dst) in
- let _ = add_trans num conf.tr t1 in
-
-
+ let _ = add_trans num conf.tr t1 in
let _ = if axis=Descendant then
add_trans num conf.tr_aux (
- ?< q_src><@ ((if ex then TagSet.diff TagSet.star test
- else TagSet.star),false,
- if TagSet.is_finite test
- then `Left(fun t ->
- if (Tree.Binary.is_node t)
- then
- let mytag = Tree.Binary.tag t in
- TagSet.exists (fun tag ->
- tag == mytag ||
- Tree.Binary.has_tagged_desc t tag
- )
- test
- else true
- )
-
- else `True )>=> `Left ** q_src )
+ ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test
+ else TagSet.star),false)>=>
+ (if TagSet.equal test TagSet.star then
+ `Left else `LLeft) *+ q_src )
in
let t3 =
?< q_src><@ ((if ex then TagSet.diff TagSet.any test
- else TagSet.any), false,
- if axis=Descendant&&TagSet.is_finite test
- then `True (*`Right(fun t ->
- TagSet.exists (fun tag -> Tree.Binary.has_tagged_foll t tag)
- test) *)
- else `True )>=>
- if ex then ( Ata.atom_ `Left false q_src) *& `Right ** q_src
- else `Right ** q_src
+ else TagSet.any), false)>=>
+ (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then
+ `RRight else `Right) *+ q_src
in
let _ = add_trans num conf.tr_aux t3
in
- [q_dst], q_dst,
+ ldst, q_dst,
(if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path)
| Attribute ->
- let q_dstreal = Ata.mk_state() in
+ let q_dstreal = Ata.State.make() in
(* attributes are always the first child *)
let t1 = ?< q_src><(TagSet.attribute,false)>=>
- `Left ** q_dst in
+ `Left *+ q_dst in
let t2 = ?< q_dst><(test, is_last && not(existential))>=>
- if is_last then Ata.true_ else `Left ** q_dstreal in
- let tsa = ?< q_dst><(TagSet.star, false)>=> `Right ** q_dst
+ if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in
+ let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst
in
add_trans num conf.tr t1;
add_trans num conf.tr_aux t2;
[q_dst;q_dstreal], q_dstreal,
ctx_path
- | Ancestor | AncestorOrSelf ->
- conf.has_backward <- true;
- let up_states, new_ctx =
- List.map (fst) ctx_path, (vpush q_root [])
- in
- let _ = if axis = AncestorOrSelf then
- or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential));
- in
- let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states
- in
- let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=>
- (if is_last then Ata.true_ else (_l dir) ** q_dst) *& fc in
- add_trans num conf.tr t1;
- [q_dst ], q_dst, vpush q_frm_root new_ctx
-
- | Parent ->
- conf.has_backward <- true;
- let q_self,new_ctx =
- match ctx_path with
- | (a,_)::[] -> a, vpush q_root []
- | (a,_)::r -> a, r
- | _ -> assert false
- in
- let t1 = ?< q_frm_root>< (test,is_last && (not existential)) >=>
- (if is_last then Ata.true_ else (_l dir) ** q_dst) *& (_l dir) ** q_self in
- add_trans num conf.tr t1;
- [ q_dst ], q_dst, vpush q_frm_root new_ctx
| _ -> assert false
in
- (* todo change everything to Ptset *)
- (Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)),
+ (* todo change everything to Ata.StateSet *)
+ (Ata.StateSet.elements (Ata.StateSet.union p_st (Ata.StateSet.from_list new_st)),
new_dst,
new_ctx)
-
+and is_rec = function
+ [] -> false
+ | ((axis,_,_),_)::_ ->
+ match axis with
+ Descendant | Ancestor -> true
+ | _ -> false
+
and compile_path ?(existential=false) annot_path config q_src states idx ctx_path =
List.fold_left
- (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward) (step,dir) ->
+ (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) ->
let add_states,new_dst,new_ctx =
- compile_step ~existential:existential config a_dst dir ctx_path step num
+ compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num
in
- let new_states = Ptset.union (Ptset.from_list add_states) a_st in
+ let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in
let nanc_st,npar_st,npre_st,new_bw =
match step with
- |PrecedingSibling,_,_ -> anc_st,par_st,Ptset.add a_dst pre_st,true
- |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true
+ |PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true
+ |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true
| _ -> anc_st,par_st,pre_st,has_backward
in
- new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw
+ new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r)
)
- (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false )
+ (states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) )
annot_path
-
+
and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst =
let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 =
compile_pred conf q_src idx ctx_path dir p1 ddst in
let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 =
compile_pred conf q_src idx1 ctx_path dir p2 ddst
in
- Ptset.union a_st1 a_st2,
- Ptset.union anc_st1 anc_st2,
- Ptset.union par_st1 par_st2,
- Ptset.union pre_st1 pre_st2,
+ Ata.StateSet.union a_st1 a_st2,
+ Ata.StateSet.union anc_st1 anc_st2,
+ Ata.StateSet.union par_st1 par_st2,
+ Ata.StateSet.union pre_st1 pre_st2,
idx2, (f f1 f2)
and compile_pred conf q_src idx ctx_path dir pred qdst =
binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst
| And(p1,p2) ->
binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst
- | Expr e -> compile_expr conf Ptset.empty q_src idx ctx_path dir e qdst
+ | Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst
| Not(p) ->
let a_st,anc_st,par_st,pre_st,idx,f =
compile_pred conf q_src idx ctx_path dir p qdst
- in a_st,anc_st,par_st,pre_st,idx, Ata.not_ f
+ in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f
and compile_expr conf states q_src idx ctx_path dir e qdst =
match e with
| Path (p) ->
- let q = Ata.mk_state () in
+ let q = Ata.State.make () in
let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in
- let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward =
+ let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ =
compile_path ~existential:true annot_path conf q states idx ctx_path
in
let ret_dir = match annot_path with
| _ -> `Left
in
let _ = match annot_path with
- | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ptset.add qdst conf.final_state
+ | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ata.StateSet.add qdst conf.final_state
| _ -> ()
- in
- (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) ** q))
- | True -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.true_
- | False -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.false_
+ in let _ = conf.univ_states <- a_dst::conf.univ_states in
+ (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) *+ q))
+ | True -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.true_
+ | False -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.false_
| _ -> assert false
| p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l)
| p::l -> (p,`Left) :: (dirannot l)
-let compile path =
+let compile ?(querystring="") path =
let steps =
match path with
| Absolute(steps)
in
let steps = List.rev steps in
let dirsteps = dirannot steps in
- let config = { st_root = Ata.mk_state();
- st_univ = Ata.mk_state();
- final_state = Ptset.empty;
- st_from_root = Ata.mk_state();
+ let config = { st_root = Ata.State.make();
+ st_univ = Ata.State.make();
+ final_state = Ata.StateSet.empty;
+ st_from_root = Ata.State.make();
has_backward = false;
tr_parent_loop = Hashtbl.create 5;
tr = Hashtbl.create 5;
tr_aux = Hashtbl.create 5;
+ entry_points = [];
+ contains = None;
+ univ_states = [];
+ starstate = None;
}
in
- let q0 = Ata.mk_state() in
- let states = Ptset.from_list [config.st_univ;config.st_root]
+ let q0 = Ata.State.make() in
+ let states = Ata.StateSet.from_list [config.st_univ;config.st_root]
in
let num = 0 in
(* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root);
add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ);
add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ);
*)
- let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward =
+ let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ =
compile_path dirsteps config q0 states 0 [(config.st_root,[]) ]
in
let fst_tr =
- ?< (config.st_root) >< (TagSet.star,false) >=>
- (`Left** q0) *& (if config.has_backward then `Left ** config.st_from_root else Ata.true_)
+ ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=>
+ ((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_)
in
add_trans num config.tr fst_tr;
- if config.has_backward then begin
+ if config.has_backward then begin
add_trans num config.tr_aux
- (?< (config.st_from_root) >< (TagSet.star,false) >=> `Left ** config.st_from_root +|
- `Right ** config.st_from_root);
+ (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root);
add_trans num config.tr_aux
- (?< (config.st_from_root) >< (TagSet.cup TagSet.pcdata TagSet.attribute,false) >=>
- `Right ** config.st_from_root);
+ (?< (config.st_from_root) >< (TagSet.any,false) >=>
+ `RRight *+ config.st_from_root);
end;
let phi = Hashtbl.create 37 in
- let fadd = fun _ (_,l) -> List.iter (fun (s,t,m,f,p) ->
+ let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) ->
let lt = try
Hashtbl.find phi s
- with Not_found -> []
+ with Not_found -> []
in
- Hashtbl.replace phi s ((t,(m,f,p))::lt)
+ Hashtbl.replace phi s ((t,tr)::lt)
) l in
Hashtbl.iter (fadd) config.tr;
Hashtbl.iter (fadd) config.tr_aux;
Hashtbl.iter (fadd) config.tr_parent_loop;
let final =
- let s = Ptset.union anc_st (Ptset.from_list [])
- in if has_backward then Ptset.add config.st_from_root s else s
+ let s = anc_st
+ in if has_backward then Ata.StateSet.add config.st_from_root s else s
in { Ata.id = Oo.id (object end);
- Ata.states = if has_backward then Ptset.add config.st_from_root a_st else a_st;
- Ata.init = Ptset.singleton config.st_root;
- Ata.final = Ptset.union anc_st config.final_state;
- Ata.universal = Ptset.union anc_st config.final_state;
- Ata.phi = phi;
- Ata.delta = Hashtbl.create 17;
- Ata.sigma = Ata.HTagSet.create 17;
- },[]
+ Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty;
+ Ata.init = Ata.StateSet.singleton config.st_root;
+ Ata.trans = phi;
+ Ata.starstate = config.starstate;
+ Ata.query_string = querystring;
+ },config.entry_points,!contains
end