(* Copyright NICTA 2008 *)
(* Distributed under the terms of the LGPL (see LICENCE) *)
(******************************************************************************)
-#load "pa_extend.cmo";;
+#load "pa_extend.cmo";;
let contains = ref None
module Ast =
struct
and step = axis*test*predicate
and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling
| Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following
-
+
and test = TagSet.t
-
+
and predicate = Or of predicate*predicate
| And of predicate*predicate
- | Not of predicate
+ | Not of predicate
| Expr of expression
and expression = Path of path
| Function of string*expression list
| String of string
| True | False
type t = path
-
-
-
+
+
+
let pp fmt = Format.fprintf fmt
let print_list printer fmt sep l =
match l with
[] -> ()
| [e] -> printer fmt e
| e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es
-
-
- let rec print fmt p =
- let l = match p with
- | Absolute l -> pp fmt "/"; l
- | AbsoluteDoS l -> pp fmt "/";
+
+
+ let rec print fmt p =
+ let l = match p with
+ | Absolute l -> pp fmt "/"; l
+ | AbsoluteDoS l -> pp fmt "/";
print_step fmt (DescendantOrSelf,TagSet.node,Expr True);
pp fmt "/"; l
- | Relative l -> l
+ | Relative l -> l
in
print_list print_step fmt "/" (List.rev l)
and print_step fmt (axis,test,predicate) =
print_axis fmt axis;pp fmt "::";print_test fmt test;
pp fmt "["; print_predicate fmt predicate; pp fmt "]"
- and print_axis fmt a = pp fmt "%s" (match a with
+ and print_axis fmt a = pp fmt "%s" (match a with
Self -> "self"
| Child -> "child"
| Descendant -> "descendant"
| Parent -> "parent"
| _ -> assert false
)
- and print_test fmt ts =
- try
- pp fmt "%s" (List.assoc ts
+ and print_test fmt ts =
+ try
+ pp fmt "%s" (List.assoc ts
[ (TagSet.pcdata,"text()"); (TagSet.node,"node()");
(TagSet.star),"*"])
with
Not_found -> pp fmt "%s"
- (if TagSet.is_finite ts
+ (if TagSet.is_finite ts
then Tag.to_string (TagSet.choose ts)
else "<INFINITE>")
-
+
and print_predicate fmt = function
| Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q
| And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q
| Not p -> pp fmt "not "; print_predicate fmt p
| Expr e -> print_expression fmt e
-
+
and print_expression fmt = function
| Path p -> print fmt p
| Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")"
| Int i -> pp fmt "%i" i
| String s -> pp fmt "\"%s\"" s
| t -> pp fmt "%b" (t== True)
-
+
end
-module Parser =
+module Parser =
struct
- open Ast
+ open Ast
open Ulexer
let predopt = function None -> Expr True | Some p -> p
module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer)
let query = Gram.Entry.mk "query"
-
+
exception Error of Gram.Loc.t*string
- let test_of_keyword t loc =
+ let test_of_keyword t loc =
match t with
| "text()" -> TagSet.pcdata
| "node()" -> TagSet.node
query : [ [ p = path; `EOI -> p ]]
;
-
- path : [
+
+ path : [
[ "//" ; l = slist -> AbsoluteDoS l ]
| [ "/" ; l = slist -> Absolute l ]
| [ l = slist -> Relative l ]
[ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred ->
let a,t,p =
match o with
- | Some(t) -> (axis,t,p)
- | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p)
+ | Some(t) -> (axis,t,p)
+ | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p)
in match a with
| Following -> [ (DescendantOrSelf,t,p);
(FollowingSibling,TagSet.star,Expr(True));
| _ -> [ a,t,p ]
]
-
+
| [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ]
| [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ]
-| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [
+| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [
let _ = contains := Some((`CONTAINS,s)) in (Child,TagSet.singleton Tag.pcdata, p)]
]
-| [ "equals"; "(" ; s = STRING ; ")";p=top_pred -> [
+| [ "equals"; "(" ; s = STRING ; ")";p=top_pred -> [
let _ = contains := Some((`EQUALS,s)) in (Child,TagSet.singleton Tag.pcdata, p)]
]
-| [ "startswith"; "(" ; s = STRING ; ")";p=top_pred -> [
+| [ "startswith"; "(" ; s = STRING ; ")";p=top_pred -> [
let _ = contains := Some((`STARTSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)]
]
-| [ "endswith"; "(" ; s = STRING ; ")";p=top_pred -> [
+| [ "endswith"; "(" ; s = STRING ; ")";p=top_pred -> [
let _ = contains := Some((`ENDSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)]
]
| [ test = test; p = top_pred -> [(Child,test, p)] ]
-| [ att = ATT ; p = top_pred ->
+| [ att = ATT ; p = top_pred ->
match att with
| "*" -> [(Attribute,TagSet.star,p)]
| _ -> [(Attribute, TagSet.singleton (Tag.tag att) ,p )]]
[ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ]
]
;
-axis : [
- [ "self" -> Self | "child" -> Child | "descendant" -> Descendant
+axis : [
+ [ "self" -> Self | "child" -> Child | "descendant" -> Descendant
| "descendant-or-self" -> DescendantOrSelf
| "ancestor-or-self" -> AncestorOrSelf
| "following-sibling" -> FollowingSibling
| "following" -> Following
]
-
+
];
-test : [
+test : [
[ s = KWD -> test_of_keyword s _loc ]
| [ t = TAG -> TagSet.singleton (Tag.tag t) ]
];
-predicate: [
+predicate: [
[ p = predicate; "or"; q = predicate -> Or(p,q) ]
| [ p = predicate; "and"; q = predicate -> And(p,q) ]
| [ "not" ; p = predicate -> Not p ]
;;
let parse_string = Gram.parse_string query (Ulexer.Loc.mk "<string>")
let parse = Gram.parse_string query (Ulexer.Loc.mk "<string>")
-end
+end
module Compile = struct
univ_states = [];
starstate = None;
}
-
+
let _r =
function (`Left|`Last) -> `Right
| `LLeft -> `RRight
-let _l =
+let _l =
function (`Left|`Last) -> `Left
| `Right -> `Right
| `RRight -> `RRight
Hashtbl.add htr q (num,[tr])
let vpush x y = (x,[]) :: y
-let hpush x y =
+let hpush x y =
match y with
| (z,r)::l -> (z,x::r) ::l
| _ -> assert false
-let vpop = function
+let vpop = function
(x,_)::r -> x,r
| _ -> assert false
| (x,z::y) ::r -> z,(x,y)::r
| _-> assert false
-let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num =
+let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num =
let ex = existential in
let axis,test,pred = step in
let is_last = dir = `Last in
let { st_root = q_root;
- st_univ = q_univ;
- st_from_root = q_frm_root } = conf
+ st_univ = q_univ;
+ st_from_root = q_frm_root } = conf
in
- let q_dst = Ata.State.make() in
- let p_st, p_anc, p_par, p_pre, p_num, p_f =
+ let q_dst = Ata.State.make() in
+ let p_st, p_anc, p_par, p_pre, p_num, p_f =
compile_pred conf q_src num ctx_path dir pred q_dst
in
- let new_st,new_dst, new_ctx =
+ let new_st,new_dst, new_ctx =
match axis with
| Child | Descendant ->
if (TagSet.is_finite test)
- then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points;
+ then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points;
let left,right =
if nrec then `LLeft,`RRight
else `Left,`Right
in
let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star
then conf.starstate <- Some(Ata.StateSet.singleton q_src)
- in
+ in
let t1,ldst = ?< q_src><(test, is_last && not(ex))>=>
p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst),
( if is_last then [] else [q_dst])
in
-
- let _ = add_trans num conf.tr t1 in
+
+ let _ = add_trans num conf.tr t1 in
let _ = if axis=Descendant then
add_trans num conf.tr_aux (
?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test
- else TagSet.star),false)>=>
+ else TagSet.star),false)>=>
(if TagSet.equal test TagSet.star then
`Left else `LLeft) *+ q_src )
- in
- let t3 =
- ?< q_src><@ ((if ex then TagSet.diff TagSet.any test
- else TagSet.any), false)>=>
- (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then
- `RRight else `Right) *+ q_src
in
- let _ = add_trans num conf.tr_aux t3
+ let t3 =
+ ?< q_src><@ ((if ex then TagSet.diff TagSet.any test
+ else TagSet.any), false)>=>
+ (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then
+ `RRight else `Right) *+ q_src
in
- ldst, q_dst,
+ let _ = add_trans num conf.tr_aux t3
+ in
+ ldst, q_dst,
(if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path)
-
-
- | Attribute ->
+
+
+ | Attribute ->
let q_dstreal = Ata.State.make() in
(* attributes are always the first child *)
- let t1 = ?< q_src><(TagSet.attribute,false)>=>
+ let t1 = ?< q_src><(TagSet.attribute,false)>=>
`Left *+ q_dst in
let t2 = ?< q_dst><(test, is_last && not(existential))>=>
if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in
- let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst
+ let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst
in
add_trans num conf.tr t1;
add_trans num conf.tr_aux t2;
add_trans num conf.tr_aux tsa;
- [q_dst;q_dstreal], q_dstreal,
+ [q_dst;q_dstreal], q_dstreal,
ctx_path
new_ctx)
and is_rec = function
[] -> false
- | ((axis,_,_),_)::_ ->
+ | ((axis,_,_),_)::_ ->
match axis with
Descendant | Ancestor -> true
| _ -> false
-
-and compile_path ?(existential=false) annot_path config q_src states idx ctx_path =
- List.fold_left
- (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) ->
+
+and compile_path ?(existential=false) annot_path config q_src states idx ctx_path =
+ List.fold_left
+ (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) ->
let add_states,new_dst,new_ctx =
compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num
in
let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in
- let nanc_st,npar_st,npre_st,new_bw =
+ let nanc_st,npar_st,npre_st,new_bw =
match step with
|PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true
|(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true
)
(states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) )
annot_path
-
+
and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst =
let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 =
compile_pred conf q_src idx ctx_path dir p1 ddst in
- let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 =
+ let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 =
compile_pred conf q_src idx1 ctx_path dir p2 ddst
in
Ata.StateSet.union a_st1 a_st2,
Ata.StateSet.union pre_st1 pre_st2,
idx2, (f f1 f2)
-and compile_pred conf q_src idx ctx_path dir pred qdst =
+and compile_pred conf q_src idx ctx_path dir pred qdst =
match pred with
- | Or(p1,p2) ->
+ | Or(p1,p2) ->
binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst
- | And(p1,p2) ->
+ | And(p1,p2) ->
binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst
| Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst
- | Not(p) ->
- let a_st,anc_st,par_st,pre_st,idx,f =
+ | Not(p) ->
+ let a_st,anc_st,par_st,pre_st,idx,f =
compile_pred conf q_src idx ctx_path dir p qdst
in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f
and compile_expr conf states q_src idx ctx_path dir e qdst =
match e with
- | Path (p) ->
+ | Path (p) ->
let q = Ata.State.make () in
let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in
- let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ =
+ let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ =
compile_path ~existential:true annot_path conf q states idx ctx_path
- in
+ in
let ret_dir = match annot_path with
| ((FollowingSibling,_,_),_)::_ -> `Right
| _ -> `Left
| p::l -> (p,`Left) :: (dirannot l)
let compile ?(querystring="") path =
- let steps =
+ let steps =
match path with
- | Absolute(steps)
+ | Absolute(steps)
| Relative(steps) -> steps
| AbsoluteDoS(steps) -> steps@[(DescendantOrSelf,TagSet.node,Expr(True))]
in
has_backward = false;
tr_parent_loop = Hashtbl.create 5;
tr = Hashtbl.create 5;
- tr_aux = Hashtbl.create 5;
+ tr_aux = Hashtbl.create 5;
entry_points = [];
contains = None;
univ_states = [];
starstate = None;
- }
+ }
in
let q0 = Ata.State.make() in
- let states = Ata.StateSet.from_list [config.st_univ;config.st_root]
+ let states = Ata.StateSet.from_list [config.st_univ;config.st_root]
in
let num = 0 in
(* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root);
add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ);
add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ);
*)
- let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ =
+ let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ =
compile_path dirsteps config q0 states 0 [(config.st_root,[]) ]
in
- let fst_tr =
- ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=>
+ let fst_tr =
+ ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=>
((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_)
in
add_trans num config.tr fst_tr;
if config.has_backward then begin
- add_trans num config.tr_aux
+ add_trans num config.tr_aux
(?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root);
- add_trans num config.tr_aux
- (?< (config.st_from_root) >< (TagSet.any,false) >=>
- `RRight *+ config.st_from_root);
-
- end;
+ add_trans num config.tr_aux
+ (?< (config.st_from_root) >< (TagSet.any,false) >=>
+ `RRight *+ config.st_from_root);
+
+ end;
let phi = Hashtbl.create 37 in
- let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) ->
- let lt = try
+ let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) ->
+ let lt = try
Hashtbl.find phi s
with Not_found -> []
in
Hashtbl.iter (fadd) config.tr;
Hashtbl.iter (fadd) config.tr_aux;
Hashtbl.iter (fadd) config.tr_parent_loop;
- let final =
- let s = anc_st
+ let final =
+ let s = anc_st
in if has_backward then Ata.StateSet.add config.st_from_root s else s
in { Ata.id = Oo.id (object end);
Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty;
Ata.starstate = config.starstate;
Ata.query_string = querystring;
},config.entry_points,!contains
-
-
+
+
end