X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=xPath.ml;h=1c2c127d60a4692590b0bf9a81f0244dd2dd16f0;hb=ac8c1ac563a2c089f789eed5a03ff5b84a2c4fe0;hp=661863d677524d227f5c65dc8ea58feb8b1ee879;hpb=280fbebb046069cea454507fa7933b4330bff1eb;p=SXSI%2Fxpathcomp.git diff --git a/xPath.ml b/xPath.ml index 661863d..1c2c127 100644 --- a/xPath.ml +++ b/xPath.ml @@ -4,103 +4,106 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) - - -INCLUDE "debug.ml";; -#load "pa_extend.cmo";; - - +#load "pa_extend.cmo";; +let contains = ref None module Ast = struct - -type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list -and step = axis*test*predicate -and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling - | Parent - -and test = TagSet.Xml.t - -and predicate = Or of predicate*predicate - | And of predicate*predicate - | Not of predicate - | Expr of expression -and expression = Path of path - | Function of string*expression list - | Int of int - | String of string - | True | False -type t = path - - -let pp fmt = Format.fprintf fmt -let print_list printer fmt sep l = - match l with - [] -> () - | [e] -> printer fmt e - | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es - - -let rec print fmt p = - let l = match p with - | Absolute l -> pp fmt "/"; l - | AbsoluteDoS l -> pp fmt "/"; - print_step fmt (DescendantOrSelf,TagSet.Xml.node,Expr True); - pp fmt "/"; l - | Relative l -> l - in - print_list print_step fmt "/" (List.rev l) -and print_step fmt (axis,test,predicate) = + (* The steps are in reverse order !!!! *) + type path = Absolute of step list | AbsoluteDoS of step list| Relative of step list + and step = axis*test*predicate + and axis = Self | Attribute | Child | Descendant | DescendantOrSelf | FollowingSibling + | Parent | Ancestor | AncestorOrSelf | PrecedingSibling | Preceding | Following + + and test = TagSet.t + + and predicate = Or of predicate*predicate + | And of predicate*predicate + | Not of predicate + | Expr of expression + and expression = Path of path + | Function of string*expression list + | Int of int + | String of string + | True | False + type t = path + + + + + let pp fmt = Format.fprintf fmt + let print_list printer fmt sep l = + match l with + [] -> () + | [e] -> printer fmt e + | e::es -> printer fmt e; List.iter (fun x -> pp fmt sep;printer fmt x) es + + + let rec print fmt p = + let l = match p with + | Absolute l -> pp fmt "/"; l + | AbsoluteDoS l -> pp fmt "/"; + print_step fmt (DescendantOrSelf,TagSet.node,Expr True); + pp fmt "/"; l + | Relative l -> l + in + print_list print_step fmt "/" (List.rev l) + and print_step fmt (axis,test,predicate) = print_axis fmt axis;pp fmt "::";print_test fmt test; - pp fmt "["; print_predicate fmt predicate; pp fmt "]" -and print_axis fmt a = pp fmt "%s" (match a with - Self -> "self" - | Child -> "child" - | Descendant -> "descendant" - | DescendantOrSelf -> "descendant-or-self" - | FollowingSibling -> "following-sibling" - | Attribute -> "attribute" - | Parent -> "parent") -and print_test fmt ts = - try - pp fmt "%s" (List.assoc ts - [ (TagSet.Xml.pcdata,"text()"); (TagSet.Xml.node,"node()"); - (TagSet.Xml.star),"*"]) - with - Not_found -> pp fmt "%s" - (if TagSet.Xml.is_finite ts - then Tag.to_string (TagSet.Xml.choose ts) - else "") - -and print_predicate fmt = function - | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q - | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q - | Not p -> pp fmt "not "; print_predicate fmt p - | Expr e -> print_expression fmt e - -and print_expression fmt = function - | Path p -> print fmt p - | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")" - | Int i -> pp fmt "%i" i - | String s -> pp fmt "\"%s\"" s - | t -> pp fmt "%b" (t== True) - + pp fmt "["; print_predicate fmt predicate; pp fmt "]" + and print_axis fmt a = pp fmt "%s" (match a with + Self -> "self" + | Child -> "child" + | Descendant -> "descendant" + | DescendantOrSelf -> "descendant-or-self" + | FollowingSibling -> "following-sibling" + | Attribute -> "attribute" + | Ancestor -> "ancestor" + | AncestorOrSelf -> "ancestor-or-self" + | PrecedingSibling -> "preceding-sibling" + | Parent -> "parent" + | _ -> assert false + ) + and print_test fmt ts = + try + pp fmt "%s" (List.assoc ts + [ (TagSet.pcdata,"text()"); (TagSet.node,"node()"); + (TagSet.star),"*"]) + with + Not_found -> pp fmt "%s" + (if TagSet.is_finite ts + then Tag.to_string (TagSet.choose ts) + else "") + + and print_predicate fmt = function + | Or(p,q) -> print_predicate fmt p; pp fmt " or "; print_predicate fmt q + | And(p,q) -> print_predicate fmt p; pp fmt " and "; print_predicate fmt q + | Not p -> pp fmt "not "; print_predicate fmt p + | Expr e -> print_expression fmt e + + and print_expression fmt = function + | Path p -> print fmt p + | Function (f,l) -> pp fmt "%s(" f;print_list print_expression fmt "," l;pp fmt ")" + | Int i -> pp fmt "%i" i + | String s -> pp fmt "\"%s\"" s + | t -> pp fmt "%b" (t== True) + end -module Parser = +module Parser = struct - open Ast + open Ast open Ulexer let predopt = function None -> Expr True | Some p -> p module Gram = Camlp4.Struct.Grammar.Static.Make(Ulexer) let query = Gram.Entry.mk "query" - + exception Error of Gram.Loc.t*string - let test_of_keyword t loc = + let test_of_keyword t loc = match t with - | "text()" -> TagSet.Xml.pcdata - | "node()" -> TagSet.Xml.node - | "*" -> TagSet.Xml.star - | "and" | "not" | "or" -> TagSet.Xml.singleton (Tag.tag t) + | "text()" -> TagSet.pcdata + | "node()" -> TagSet.node + | "*" -> TagSet.star + | "and" | "not" | "or" -> TagSet.singleton (Tag.tag t) | _ -> raise (Error(loc,"Invalid test name "^t )) let axis_to_string a = let r = Format.str_formatter in @@ -111,8 +114,8 @@ GLOBAL: query; query : [ [ p = path; `EOI -> p ]] ; - - path : [ + + path : [ [ "//" ; l = slist -> AbsoluteDoS l ] | [ "/" ; l = slist -> Absolute l ] | [ l = slist -> Relative l ] @@ -120,9 +123,9 @@ GLOBAL: query; ; slist: [ - [ l = slist ;"/"; s = step -> s::l ] -| [ l = slist ; "//"; s = step -> s::(DescendantOrSelf,TagSet.Xml.node,Expr True)::l] -| [ s = step -> [ s ] ] + [ l = slist ;"/"; s = step -> s@l ] +| [ l = slist ; "//"; s = step -> s@[(DescendantOrSelf, TagSet.node,Expr True)]@l] +| [ s = step -> s ] ]; step : [ @@ -131,39 +134,69 @@ step : [ if OPT is None then this is a child::descendant if not, this is a real axis name *) [ axis = axis ; o = OPT ["::" ; t = test -> t ] ; p = top_pred -> - match o with - | Some(t) -> (axis,t,p) - | None -> (Child,TagSet.Xml.singleton (Tag.tag (axis_to_string axis)),p) ] - -| [ "." ; p = top_pred -> (Self,TagSet.Xml.node,p) ] -| [ test = test; p = top_pred -> (Child,test, p) ] -| [ att = ATT ; p = top_pred -> + let a,t,p = + match o with + | Some(t) -> (axis,t,p) + | None -> (Child,TagSet.singleton (Tag.tag (axis_to_string axis)),p) + in match a with + | Following -> [ (DescendantOrSelf,t,p); + (FollowingSibling,TagSet.star,Expr(True)); + (Ancestor,TagSet.star,Expr(True)) ] + + | Preceding -> [ (DescendantOrSelf,t,p); + (PrecedingSibling,TagSet.star,Expr(True)); + (Ancestor,TagSet.star,Expr(True)) ] + | _ -> [ a,t,p ] + +] + +| [ "." ; p = top_pred -> [(Self,TagSet.node,p)] ] +| [ ".." ; p = top_pred -> [(Parent,TagSet.star,p)] ] +| [ "contains"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some((`CONTAINS,s)) in (Child,TagSet.singleton Tag.pcdata, p)] + ] +| [ "equals"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some((`EQUALS,s)) in (Child,TagSet.singleton Tag.pcdata, p)] + ] +| [ "startswith"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some((`STARTSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)] + ] +| [ "endswith"; "(" ; s = STRING ; ")";p=top_pred -> [ + let _ = contains := Some((`ENDSWITH,s)) in (Child,TagSet.singleton Tag.pcdata, p)] + ] +| [ test = test; p = top_pred -> [(Child,test, p)] ] +| [ att = ATT ; p = top_pred -> match att with - | "*" -> (Attribute,TagSet.Xml.star,p) - | _ -> (Attribute, TagSet.Xml.singleton (Tag.tag att) ,p )] + | "*" -> [(Attribute,TagSet.star,p)] + | _ -> [(Attribute, TagSet.singleton (Tag.tag att) ,p )]] ] ; top_pred : [ [ p = OPT [ "["; p=predicate ;"]" -> p ] -> predopt p ] ] ; -axis : [ - [ "self" -> Self | "child" -> Child | "descendant" -> Descendant +axis : [ + [ "self" -> Self | "child" -> Child | "descendant" -> Descendant | "descendant-or-self" -> DescendantOrSelf + | "ancestor-or-self" -> AncestorOrSelf | "following-sibling" -> FollowingSibling | "attribute" -> Attribute | "parent" -> Parent + | "ancestor" -> Ancestor + | "preceding-sibling" -> PrecedingSibling + | "preceding" -> Preceding + | "following" -> Following ] - + ]; -test : [ +test : [ [ s = KWD -> test_of_keyword s _loc ] -| [ t = TAG -> TagSet.Xml.singleton (Tag.tag t) ] +| [ t = TAG -> TagSet.singleton (Tag.tag t) ] ]; -predicate: [ +predicate: [ [ p = predicate; "or"; q = predicate -> Or(p,q) ] | [ p = predicate; "and"; q = predicate -> And(p,q) ] | [ "not" ; p = predicate -> Not p ] @@ -183,231 +216,295 @@ END ;; let parse_string = Gram.parse_string query (Ulexer.Loc.mk "") let parse = Gram.parse_string query (Ulexer.Loc.mk "") -end - -module Functions = struct - - type value = [ `NodeSet of Automaton.BST.t - | `Int of int | `String of string - | `Bool of bool | `True | `False ] - type expr = [ value | `Call of (string*(expr list)) - | `Auto of Automaton.t ] - - - let count = function [`NodeSet(s) ] -> `Int(Automaton.BST.cardinal s) - | _ -> failwith "count" - - - let equal = function [ `Int i; `Int j ] -> `Bool (i == j) - |_ -> failwith "equal" - - let globals : (string*(value list -> value)) list = [ +end - ("count",count); - ("equal",equal); -] - let text t = Tree.Binary.string (Tree.Binary.left t) - - let rec eval_expr tree (e:expr) : value = match e with - | `Call (f,args) -> (List.assoc f globals) (List.map (eval_expr tree) args) - | `Auto(a) -> `NodeSet(ignore (Automaton.dump Format.err_formatter a; - Tree.Binary.print_xml_fast stderr tree; - Printf.eprintf "\n=======================\n%!"; - Automaton.TopDown.run a tree); - Printf.eprintf "Results : %i\n%!" - (Automaton.BST.cardinal a.Automaton.result); - Automaton.BST.iter (fun t -> Tree.Binary.print_xml_fast stderr t; - Printf.eprintf "^^^^^^^^^^^^^^^^^^^^^^^^\n%!") - a.Automaton.result; - a.Automaton.result) - | #value as x -> x - - let truth_value = - function `NodeSet s -> not (Automaton.BST.is_empty s) - |`Bool(b) -> b - | _ -> failwith "truth_value" - -end module Compile = struct - open Ast - open Automaton - - - type direction = Left | Right | Final - let (==>) a (b,c,d) = Transition.Label(a,b,c,d) - let (@@) b (c,d) = (b,c,d) - - let star = TagSet.Xml.star - let any = TagSet.Xml.any - let notstar = TagSet.Xml.add Tag.pcdata (TagSet.Xml.add Tag.attribute TagSet.Xml.empty) - let swap dir a b = match dir with - | Left | Final -> (a,b) - | Right -> (b,a) - - let split_dest q l = - let rec aux ((qacc,nqacc) as acc) = function - | [] -> acc - | t::r -> - aux (if State.equal (Transition.dest1 t) q - || State.equal (Transition.dest2 t) q - then t::qacc , nqacc - else qacc , (t::nqacc)) r - in - aux ([],[]) l - - - let mk_tag_t dir s ts q1 q2 = (s==> ts @@ (swap dir q1 q2));; - let mk_self_trs ts acc l = - List.fold_left - (fun acc t -> - let s = Transition.source t in - let d1 = Transition.dest1 t in - let d2 = Transition.dest2 t in - let tself = (s ==> ts @@ (d1,d2)) in - (Transition.cap t tself)::acc ) (acc) l - - let mk_pred_trs f acc l = - List.fold_left - (fun acc t -> - let s = Transition.source t in - let d1 = Transition.dest1 t in - let d2 = Transition.dest2 t in - let tself = Transition.External(s,f,d1,d2) in - (Transition.cap t tself)::acc ) (acc) l - - let mk_dself_trs q' ts acc l = - List.fold_left - (fun acc t -> - let t',s,d2 = match t with - | Transition.Label(s,ts,_,d2) -> Transition.Label(s,ts,q',d2),s,d2 - | Transition.External (s,f,_,d2) -> Transition.External(s,f,q',d2),s,d2 +open Ast +type transition = Ata.State.t*TagSet.t*Ata.Transition.t + +type config = { st_root : Ata.State.t; (* state matching the root element (initial state) *) + st_univ : Ata.State.t; (* universal state accepting anything *) + st_from_root : Ata.State.t; (* state chaining the root and the current position *) + mutable final_state : Ata.StateSet.t; + mutable has_backward: bool; + (* To store transitions *) + (* Key is the from state, (i,l) -> i the number of the step and l the list of trs *) + tr_parent_loop : (Ata.State.t,int*(transition list)) Hashtbl.t; + tr : (Ata.State.t,int*(transition list)) Hashtbl.t; + tr_aux : (Ata.State.t,int*(transition list)) Hashtbl.t; + mutable entry_points : (Tag.t*Ata.StateSet.t) list; + mutable contains : string option; + mutable univ_states : Ata.State.t list; + mutable starstate : Ata.StateSet.t option; + } +let dummy_conf = { st_root = -1; + st_univ = -1; + st_from_root = -1; + final_state = Ata.StateSet.empty; + has_backward = false; + tr_parent_loop = Hashtbl.create 0; + tr = Hashtbl.create 0; + tr_aux = Hashtbl.create 0; + entry_points = []; + contains = None; + univ_states = []; + starstate = None; + } + + +let _r = + function (`Left|`Last) -> `Right + | `Right -> `Left + | `RRight -> `LLeft + | `LLeft -> `RRight + + +let _l = + function (`Left|`Last) -> `Left + | `Right -> `Right + | `RRight -> `RRight + | `LLeft -> `LLeft + + +open Ata.Transition.Infix +open Ata.Formula.Infix + + +(* Todo : fix *) +let add_trans num htr ((q,ts,_)as tr) = + Hashtbl.add htr q (num,[tr]) + +let vpush x y = (x,[]) :: y +let hpush x y = + match y with + | (z,r)::l -> (z,x::r) ::l + | _ -> assert false + +let vpop = function + (x,_)::r -> x,r + | _ -> assert false + +let hpop = function + | (x,z::y) ::r -> z,(x,y)::r + | _-> assert false + +let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num = + let ex = existential in + let axis,test,pred = step in + let is_last = dir = `Last in + let { st_root = q_root; + st_univ = q_univ; + st_from_root = q_frm_root } = conf + in + let q_dst = Ata.State.make() in + let p_st, p_anc, p_par, p_pre, p_num, p_f = + compile_pred conf q_src num ctx_path dir pred q_dst + in + let new_st,new_dst, new_ctx = + match axis with + | Child | Descendant -> + if (TagSet.is_finite test) + then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points; + let left,right = + if nrec then `LLeft,`RRight + else `Left,`Right + in + let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star + then conf.starstate <- Some(Ata.StateSet.singleton q_src) + in + let t1,ldst = ?< q_src><(test, is_last && not(ex))>=> + p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst), + ( if is_last then [] else [q_dst]) + in + + let _ = add_trans num conf.tr t1 in + let _ = if axis=Descendant then + add_trans num conf.tr_aux ( + ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test + else TagSet.star),false)>=> + (if TagSet.equal test TagSet.star then + `Left else `LLeft) *+ q_src ) + in + let t3 = + ?< q_src><@ ((if ex then TagSet.diff TagSet.any test + else TagSet.any), false)>=> + (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then + `RRight else `Right) *+ q_src + in + let _ = add_trans num conf.tr_aux t3 + in + ldst, q_dst, + (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path) + + + | Attribute -> + let q_dstreal = Ata.State.make() in + (* attributes are always the first child *) + let t1 = ?< q_src><(TagSet.attribute,false)>=> + `Left *+ q_dst in + let t2 = ?< q_dst><(test, is_last && not(existential))>=> + if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in + let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst + in + add_trans num conf.tr t1; + add_trans num conf.tr_aux t2; + add_trans num conf.tr_aux tsa; + [q_dst;q_dstreal], q_dstreal, + ctx_path + + + | _ -> assert false + in + (* todo change everything to Ata.StateSet *) + (Ata.StateSet.elements (Ata.StateSet.union p_st (Ata.StateSet.from_list new_st)), + new_dst, + new_ctx) +and is_rec = function + [] -> false + | ((axis,_,_),_)::_ -> + match axis with + Descendant | Ancestor -> true + | _ -> false + +and compile_path ?(existential=false) annot_path config q_src states idx ctx_path = + List.fold_left + (fun (a_st,a_dst,anc_st,par_st,pre_st,ctx_path,num,has_backward,a_isrec) (step,dir) -> + let add_states,new_dst,new_ctx = + compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num in - let tself = (s ==> ts @@ (q',d2)) in - (Transition.cap t' tself)::acc ) (acc) l - - let from_list = List.fold_left (fun acc e -> SSet.add e acc) SSet.empty - - let dir = function (FollowingSibling,_,_) -> Right - | _ -> Left - - let rev_map_dir p = - let rec map_dir (d,acc) = function - | [] -> acc - | s::r -> map_dir ((dir s),(s,d)::acc) r - in let l = match p with - | Absolute p | Relative p -> map_dir (Final,[]) p - | AbsoluteDoS p -> - let l = (map_dir (Final,[]) p) - in ((DescendantOrSelf,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l - in ((Child,TagSet.Xml.node,Expr True),dir (fst(List.hd l)))::l - - - let rec compile_step q dir trs final initial ignore (axis,test,pred) = - let q' = State.mk() in - let trs,final,initial = match axis,test with - | Self,ts -> - let tchange,tkeep = split_dest q trs in - let trs' = mk_self_trs ts tkeep tchange in - (trs',q::final,initial) - - | Child,ts -> - (mk_tag_t dir q ts q' ignore) ::( q==> any @@ (ignore,q))::trs, q'::final,initial - - | Descendant,ts -> - (mk_tag_t dir q ts q' ignore) :: - (q==> star @@ (q,q))::(q==> notstar @@ (ignore,q))::trs,q'::final,initial - - | DescendantOrSelf,ts -> - let tchange,tkeep = split_dest q trs in - let trs' = mk_dself_trs q' ts trs tchange in - (mk_tag_t dir q ts q' ignore) :: - (q==> star @@ (q,q))::(q==> notstar @@ (ignore,q))::trs',q'::final,initial - - | FollowingSibling,ts -> - (mk_tag_t dir q ts q' ignore) :: (q ==> any @@ (ignore,q))::trs,q'::final,initial - - (* q' is not returned and thus not added to the set of final states. - It's ok since we should never be in a final state on a node - <@> *) - - | Attribute,ts -> let q'' = State.mk() in - (mk_tag_t Left q (TagSet.Xml.attribute) q' ignore):: - (mk_tag_t Left q' (ts) q'' ignore)::( q==> any @@ (ignore,q))::trs, q''::q'::final,initial - - | Parent,ts -> let q'' = List.hd initial in - (mk_tag_t Left q' (star) q q'):: - ( q'' ==> ts @@ (q',q'')):: - ( q'' ==> star @@ (q'',q'')):: - ( q'' ==> notstar @@ (ignore,q''))::trs,q'::q''::final,q''::initial - - in - let q_out = List.hd final in - let tchange,tkeep = split_dest q_out trs in - let trs' = compile_pred q_out tkeep tchange pred in - (trs',final,initial) - - and compile_pred q_out tkeep tchange p = - let rec pred_rec = function - - | Or(p1,p2) -> cup (pred_rec p1) (pred_rec p2) - | And(p1,p2) -> cap (pred_rec p1) (pred_rec p2) - | Not(p) -> neg (pred_rec p) - | Expr e -> match compile_expr e with - | `True -> `Label (TagSet.Xml.any) - | `False -> `Label (TagSet.Xml.empty) - | e -> `Fun (fun t -> let r = Functions.truth_value (Functions.eval_expr t e) - in Printf.eprintf "Truth value is %b\n%!" r;r) - - in match pred_rec p with - `Fun f -> mk_pred_trs f tkeep tchange - | `Label ts -> mk_self_trs ts tkeep tchange - - and compile_expr = function - True -> `True - | False -> `False - | Path p -> `Auto(compile p) - | Int i -> `Int i - | String s -> `String s - | Function (f,elist) -> `Call(f,List.map compile_expr elist) - - and cup a b = match a,b with - | `Label l1 , `Label l2 -> `Label(TagSet.Xml.cup l1 l2) - | `Fun f1 , `Fun f2 -> `Fun (fun x -> (f1 x)||(f2 x)) - | `Fun f , `Label l | `Label l, `Fun f -> - `Fun (fun x -> - (TagSet.Xml.mem (Tree.Binary.tag x) l) - || (f x)) - - and cap a b = match a,b with - | `Label l1, `Label l2 -> `Label (TagSet.Xml.cap l1 l2) - | `Fun f1,`Fun f2 -> `Fun (fun x -> (f1 x)&&(f2 x)) - | `Fun f,`Label l | `Label l,`Fun f -> - `Fun (fun x -> - (TagSet.Xml.mem (Tree.Binary.tag x) l) - && f x) - and neg = function - `Label l -> `Label(TagSet.Xml.neg l) - | `Fun f -> `Fun (fun x -> not (f x)) - - and compile p = - let p = rev_map_dir p in - let ignore = State.mk() - in - let q0 = State.mk() in - let transitions = Transition.empty () in - let tlist,qlist,initacc = List.fold_left - (fun (tlist,qlist,initacc) (s,dir) -> - let q = List.hd qlist in - compile_step q dir tlist qlist initacc ignore s ) ([],[q0;ignore],[q0]) p - in - List.iter (Transition.add transitions) tlist; - let qmark = List.hd qlist in - { Automaton.mk() with - initial = from_list initacc; - final = from_list qlist; - transitions = transitions; - marking = from_list [qmark]; - ignore = from_list [qmark;ignore]; - } + let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in + let nanc_st,npar_st,npre_st,new_bw = + match step with + |PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true + |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true + | _ -> anc_st,par_st,pre_st,has_backward + in + new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r) + ) + (states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) ) + annot_path + +and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = + let a_st1,anc_st1,par_st1,pre_st1,idx1,f1 = + compile_pred conf q_src idx ctx_path dir p1 ddst in + let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 = + compile_pred conf q_src idx1 ctx_path dir p2 ddst + in + Ata.StateSet.union a_st1 a_st2, + Ata.StateSet.union anc_st1 anc_st2, + Ata.StateSet.union par_st1 par_st2, + Ata.StateSet.union pre_st1 pre_st2, + idx2, (f f1 f2) + +and compile_pred conf q_src idx ctx_path dir pred qdst = + match pred with + | Or(p1,p2) -> + binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst + | And(p1,p2) -> + binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst + | Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst + | Not(p) -> + let a_st,anc_st,par_st,pre_st,idx,f = + compile_pred conf q_src idx ctx_path dir p qdst + in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f + +and compile_expr conf states q_src idx ctx_path dir e qdst = + match e with + | Path (p) -> + let q = Ata.State.make () in + let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in + let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = + compile_path ~existential:true annot_path conf q states idx ctx_path + in + let ret_dir = match annot_path with + | ((FollowingSibling,_,_),_)::_ -> `Right + | _ -> `Left + in + let _ = match annot_path with + | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ata.StateSet.add qdst conf.final_state + | _ -> () + in let _ = conf.univ_states <- a_dst::conf.univ_states in + (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) *+ q)) + | True -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.true_ + | False -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.false_ + | _ -> assert false + + +and dirannot = function + [] -> [] + | [p] -> [p,`Last] + | p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l) + | p::l -> (p,`Left) :: (dirannot l) + +let compile ?(querystring="") path = + let steps = + match path with + | Absolute(steps) + | Relative(steps) -> steps + | AbsoluteDoS(steps) -> steps@[(DescendantOrSelf,TagSet.node,Expr(True))] + in + let steps = List.rev steps in + let dirsteps = dirannot steps in + let config = { st_root = Ata.State.make(); + st_univ = Ata.State.make(); + final_state = Ata.StateSet.empty; + st_from_root = Ata.State.make(); + has_backward = false; + tr_parent_loop = Hashtbl.create 5; + tr = Hashtbl.create 5; + tr_aux = Hashtbl.create 5; + entry_points = []; + contains = None; + univ_states = []; + starstate = None; + } + in + let q0 = Ata.State.make() in + let states = Ata.StateSet.from_list [config.st_univ;config.st_root] + in + let num = 0 in + (* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root); + add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_from_root config.st_univ); + add_trans num config.tr_aux (mk_step config.st_no_nil (TagSet.add Tag.pcdata TagSet.star) `Left config.st_univ config.st_univ); + *) + let a_st,a_dst,anc_st,par_st,pre_st,_,_,has_backward,_ = + compile_path dirsteps config q0 states 0 [(config.st_root,[]) ] + in + let fst_tr = + ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> + ((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_) + in + add_trans num config.tr fst_tr; + if config.has_backward then begin + add_trans num config.tr_aux + (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root); + add_trans num config.tr_aux + (?< (config.st_from_root) >< (TagSet.any,false) >=> + `RRight *+ config.st_from_root); + + end; + let phi = Hashtbl.create 37 in + let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) -> + let lt = try + Hashtbl.find phi s + with Not_found -> [] + in + Hashtbl.replace phi s ((t,tr)::lt) + ) l in + Hashtbl.iter (fadd) config.tr; + Hashtbl.iter (fadd) config.tr_aux; + Hashtbl.iter (fadd) config.tr_parent_loop; + let final = + let s = anc_st + in if has_backward then Ata.StateSet.add config.st_from_root s else s + in { Ata.id = Oo.id (object end); + Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty; + Ata.init = Ata.StateSet.singleton config.st_root; + Ata.trans = phi; + Ata.starstate = config.starstate; + Ata.query_string = querystring; + },config.entry_points,!contains + + end