(* Copyright NICTA 2008 *)
(* Distributed under the terms of the LGPL (see LICENCE) *)
(******************************************************************************)
-INCLUDE "debug.ml";;
#load "pa_extend.cmo";;
let contains = ref None
module Ast =
| [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ]
| [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ]
| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [
- let _ = contains := Some(s) in (Child,TagSet.singleton Tag.pcdata, p)]
+ let _ = contains := Some((`CONTAINS,s)) in (Child,TagSet.singleton Tag.pcdata, p)]
]
-| [ "contains_full"; "(" ; s = STRING ; ")";p=top_pred -> [
- let _ = contains := Some(s) in (Descendant,TagSet.singleton Tag.pcdata, p)]
+| [ "equals"; "(" ; s = STRING ; ")";p=top_pred -> [
+ let _ = contains := Some((`EQUALS,s)) in (Child,TagSet.singleton Tag.pcdata, p)]
+ ]
+| [ "startswith"; "(" ; s = STRING ; ")";p=top_pred -> [
+ let _ = contains := Some((`STARTSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)]
+ ]
+| [ "endswith"; "(" ; s = STRING ; ")";p=top_pred -> [
+ let _ = contains := Some((`ENDSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)]
]
| [ test = test; p = top_pred -> [(Child,test, p)] ]
| [ att = ATT ; p = top_pred ->
module Compile = struct
open Ast
+type transition = Ata.State.t*TagSet.t*Ata.Transition.t
-type config = { st_root : Ata.state; (* state matching the root element (initial state) *)
- st_univ : Ata.state; (* universal state accepting anything *)
- st_from_root : Ata.state; (* state chaining the root and the current position *)
- mutable final_state : Ptset.t;
+type config = { st_root : Ata.State.t; (* state matching the root element (initial state) *)
+ st_univ : Ata.State.t; (* universal state accepting anything *)
+ st_from_root : Ata.State.t; (* state chaining the root and the current position *)
+ mutable final_state : Ata.StateSet.t;
mutable has_backward: bool;
(* To store transitions *)
(* Key is the from state, (i,l) -> i the number of the step and l the list of trs *)
- tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
- tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
- tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t;
- mutable entry_points : (Tag.t*Ptset.t) list;
+ tr_parent_loop : (Ata.State.t,int*(transition list)) Hashtbl.t;
+ tr : (Ata.State.t,int*(transition list)) Hashtbl.t;
+ tr_aux : (Ata.State.t,int*(transition list)) Hashtbl.t;
+ mutable entry_points : (Tag.t*Ata.StateSet.t) list;
mutable contains : string option;
+ mutable univ_states : Ata.State.t list;
+ mutable starstate : Ata.StateSet.t option;
}
let dummy_conf = { st_root = -1;
st_univ = -1;
st_from_root = -1;
- final_state = Ptset.empty;
+ final_state = Ata.StateSet.empty;
has_backward = false;
tr_parent_loop = Hashtbl.create 0;
tr = Hashtbl.create 0;
tr_aux = Hashtbl.create 0;
entry_points = [];
contains = None;
+ univ_states = [];
+ starstate = None;
}
| `LLeft -> `LLeft
-open Ata.Transitions
+open Ata.Transition.Infix
+open Ata.Formula.Infix
-let add_trans num htr ((q,_,_,_,_) as tr) =
- try
- let (i,ltr) = Hashtbl.find htr q in
- if List.exists (Ata.equal_trans tr) ltr
- then ()
- else Hashtbl.replace htr q (i,(tr::ltr))
- with
- | Not_found -> Hashtbl.add htr q (num,[tr])
+(* Todo : fix *)
+let add_trans num htr ((q,ts,_)as tr) =
+ Hashtbl.add htr q (num,[tr])
-exception Exit of Ata.state * Ata.transition list
-let rec replace s f =
- match f.Ata.pos with
- | Ata.Atom(_,b,q) when q = s -> if b then Ata.true_ else Ata.false_
- | Ata.Or(f1,f2) -> (replace s f1) +| (replace s f2)
- | Ata.And(f1,f2) -> (replace s f1) *& (replace s f2)
- | _ -> f
-
-
-let or_self conf old_dst q_src q_dst dir test pred mark =
- try
- let (num,l) = Hashtbl.find conf.tr q_src in
- let l2 = List.fold_left (fun acc (q,t,m,f,_) ->
- (q,
- TagSet.cap t test,
- mark,
- (if mark then replace old_dst f else f)
- *& pred *&
- (if mark then Ata.true_ else (_l dir) ** q_dst),
- `True)::acc)
- l l
- in Hashtbl.replace conf.tr q_src (num,l2)
- with Not_found -> ()
-
-
-let nst = Ata.mk_state
-let att_or_str = TagSet.add Tag.pcdata TagSet.attribute
let vpush x y = (x,[]) :: y
let hpush x y =
match y with
st_univ = q_univ;
st_from_root = q_frm_root } = conf
in
- let q_dst = Ata.mk_state() in
+ let q_dst = Ata.State.make() in
let p_st, p_anc, p_par, p_pre, p_num, p_f =
compile_pred conf q_src num ctx_path dir pred q_dst
in
let new_st,new_dst, new_ctx =
match axis with
-
| Child | Descendant ->
+ if (TagSet.is_finite test)
+ then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points;
let left,right =
if nrec then `LLeft,`RRight
else `Left,`Right
in
-
- let t1 = ?< q_src><(test, is_last && not(ex))>=>
- p_f *& ( if false (*is_last*) then Ata.true_ else (_l left) ** q_dst) in
+ let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star
+ then conf.starstate <- Some(Ata.StateSet.singleton q_src)
+ in
+ let t1,ldst = ?< q_src><(test, is_last && not(ex))>=>
+ p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst),
+ ( if is_last then [] else [q_dst])
+ in
- let _ = add_trans num conf.tr t1 in
-
-
+ let _ = add_trans num conf.tr t1 in
let _ = if axis=Descendant then
add_trans num conf.tr_aux (
?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test
- else TagSet.star),false,
- `True )>=> `LLeft ** q_src )
+ else TagSet.star),false)>=>
+ (if TagSet.equal test TagSet.star then
+ `Left else `LLeft) *+ q_src )
in
let t3 =
?< q_src><@ ((if ex then TagSet.diff TagSet.any test
- else TagSet.any), false, `True )>=>
- if ex then ( Ata.atom_ `Left false q_src) *& right ** q_src
- else (if axis=Descendant then `RRight else `Right) ** q_src
+ else TagSet.any), false)>=>
+ (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then
+ `RRight else `Right) *+ q_src
in
let _ = add_trans num conf.tr_aux t3
in
- [q_dst], q_dst,
+ ldst, q_dst,
(if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path)
| Attribute ->
- let q_dstreal = Ata.mk_state() in
+ let q_dstreal = Ata.State.make() in
(* attributes are always the first child *)
let t1 = ?< q_src><(TagSet.attribute,false)>=>
- `Left ** q_dst in
+ `Left *+ q_dst in
let t2 = ?< q_dst><(test, is_last && not(existential))>=>
- if is_last then Ata.true_ else `Left ** q_dstreal in
- let tsa = ?< q_dst><(TagSet.star, false)>=> `Right ** q_dst
+ if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in
+ let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst
in
add_trans num conf.tr t1;
add_trans num conf.tr_aux t2;
[q_dst;q_dstreal], q_dstreal,
ctx_path
- | Ancestor | AncestorOrSelf ->
- conf.has_backward <- true;
- let up_states, new_ctx =
- List.fold_left (fun acc (q,_) -> if q == q_root then acc else q::acc) [] ctx_path, (vpush q_root [])
- in
- let _ = if axis = AncestorOrSelf then
- or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential));
- in
- let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states
- in
- let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=>
- ( (*if is_last then Ata.true_ else *) (`LLeft ) ** q_dst) *& fc in
- add_trans num conf.tr t1;
- [q_dst ], q_dst, vpush q_frm_root new_ctx
-
- | Parent ->
- conf.has_backward <- true;
- let q_self,new_ctx =
- match ctx_path with
- | (a,_)::[] -> a, vpush q_root []
- | (a,_)::r -> a, r
- | _ -> assert false
- in
- let t1 = ?< q_frm_root>< (test,is_last && (not existential)) >=>
- (if is_last then Ata.true_ else (_l dir) ** q_dst) *& (_l dir) ** q_self in
- add_trans num conf.tr t1;
- [ q_dst ], q_dst, vpush q_frm_root new_ctx
| _ -> assert false
in
- (* todo change everything to Ptset *)
- (Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)),
+ (* todo change everything to Ata.StateSet *)
+ (Ata.StateSet.elements (Ata.StateSet.union p_st (Ata.StateSet.from_list new_st)),
new_dst,
new_ctx)
and is_rec = function
let add_states,new_dst,new_ctx =
compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num
in
- let new_states = Ptset.union (Ptset.from_list add_states) a_st in
+ let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in
let nanc_st,npar_st,npre_st,new_bw =
match step with
- |PrecedingSibling,_,_ -> anc_st,par_st,Ptset.add a_dst pre_st,true
- |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true
+ |PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true
+ |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true
| _ -> anc_st,par_st,pre_st,has_backward
in
new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r)
)
- (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false,(List.tl annot_path) )
+ (states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) )
annot_path
and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst =
let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 =
compile_pred conf q_src idx1 ctx_path dir p2 ddst
in
- Ptset.union a_st1 a_st2,
- Ptset.union anc_st1 anc_st2,
- Ptset.union par_st1 par_st2,
- Ptset.union pre_st1 pre_st2,
+ Ata.StateSet.union a_st1 a_st2,
+ Ata.StateSet.union anc_st1 anc_st2,
+ Ata.StateSet.union par_st1 par_st2,
+ Ata.StateSet.union pre_st1 pre_st2,
idx2, (f f1 f2)
and compile_pred conf q_src idx ctx_path dir pred qdst =
binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst
| And(p1,p2) ->
binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst
- | Expr e -> compile_expr conf Ptset.empty q_src idx ctx_path dir e qdst
+ | Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst
| Not(p) ->
let a_st,anc_st,par_st,pre_st,idx,f =
compile_pred conf q_src idx ctx_path dir p qdst
- in a_st,anc_st,par_st,pre_st,idx, Ata.not_ f
+ in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f
and compile_expr conf states q_src idx ctx_path dir e qdst =
match e with
| Path (p) ->
- let q = Ata.mk_state () in
+ let q = Ata.State.make () in
let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in
let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ =
compile_path ~existential:true annot_path conf q states idx ctx_path
| _ -> `Left
in
let _ = match annot_path with
- | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ptset.add qdst conf.final_state
+ | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ata.StateSet.add qdst conf.final_state
| _ -> ()
- in
- (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) ** q))
- | True -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.true_
- | False -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.false_
+ in let _ = conf.univ_states <- a_dst::conf.univ_states in
+ (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) *+ q))
+ | True -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.true_
+ | False -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.false_
| _ -> assert false
| p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l)
| p::l -> (p,`Left) :: (dirannot l)
-let compile path =
+let compile ?(querystring="") path =
let steps =
match path with
| Absolute(steps)
in
let steps = List.rev steps in
let dirsteps = dirannot steps in
- let _ = Ata.mk_state() in
- let config = { st_root = Ata.mk_state();
- st_univ = Ata.mk_state();
- final_state = Ptset.empty;
- st_from_root = Ata.mk_state();
+ let config = { st_root = Ata.State.make();
+ st_univ = Ata.State.make();
+ final_state = Ata.StateSet.empty;
+ st_from_root = Ata.State.make();
has_backward = false;
tr_parent_loop = Hashtbl.create 5;
tr = Hashtbl.create 5;
tr_aux = Hashtbl.create 5;
entry_points = [];
- contains = None
+ contains = None;
+ univ_states = [];
+ starstate = None;
}
in
- let q0 = Ata.mk_state() in
- let states = Ptset.from_list [config.st_univ;config.st_root]
+ let q0 = Ata.State.make() in
+ let states = Ata.StateSet.from_list [config.st_univ;config.st_root]
in
let num = 0 in
(* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root);
in
let fst_tr =
?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=>
- ((if is_rec dirsteps then `LLeft else `Left)** q0) *& (if config.has_backward then `LLeft ** config.st_from_root else Ata.true_)
+ ((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_)
in
add_trans num config.tr fst_tr;
if config.has_backward then begin
add_trans num config.tr_aux
- (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft ** config.st_from_root);
+ (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root);
add_trans num config.tr_aux
(?< (config.st_from_root) >< (TagSet.any,false) >=>
- `RRight ** config.st_from_root);
+ `RRight *+ config.st_from_root);
end;
let phi = Hashtbl.create 37 in
- let fadd = fun _ (_,l) -> List.iter (fun (s,t,m,f,p) ->
+ let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) ->
let lt = try
Hashtbl.find phi s
- with Not_found -> []
+ with Not_found -> []
in
- Hashtbl.replace phi s ((t,(m,f,p))::lt)
+ Hashtbl.replace phi s ((t,tr)::lt)
) l in
Hashtbl.iter (fadd) config.tr;
Hashtbl.iter (fadd) config.tr_aux;
Hashtbl.iter (fadd) config.tr_parent_loop;
let final =
- let s = Ptset.union anc_st (Ptset.from_list [])
- in if has_backward then Ptset.add config.st_from_root s else s
+ let s = anc_st
+ in if has_backward then Ata.StateSet.add config.st_from_root s else s
in { Ata.id = Oo.id (object end);
- Ata.states = if has_backward then Ptset.add config.st_from_root a_st else a_st;
- Ata.init = Ptset.singleton config.st_root;
- Ata.final = Ptset.union anc_st config.final_state;
- Ata.universal = Ptset.singleton a_dst;
- Ata.phi = phi;
- Ata.delta = Hashtbl.create 17;
- Ata.sigma = Ata.HTagSet.create 17;
+ Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty;
+ Ata.init = Ata.StateSet.singleton config.st_root;
+ Ata.trans = phi;
+ Ata.starstate = config.starstate;
+ Ata.query_string = querystring;
},config.entry_points,!contains