From: kim Date: Mon, 16 Feb 2009 00:07:48 +0000 (+0000) Subject: Merged from branch stable-succint-refactor X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=5b4679e20761058f1e04c123da52631c0dd265cc;p=SXSI%2Fxpathcomp.git Merged from branch stable-succint-refactor git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@179 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index ffc996c..43e5142 100644 --- a/Makefile +++ b/Makefile @@ -42,10 +42,9 @@ CXX = g++ -DDEBUG OCAMLOPT = ocamlopt -g -cc "$(CXX)" SYNT_DEBUG = -ppopt -DDEBUG else -CXX = g++ -OCAMLOPT = ocamlopt -cc "$(CXX)" -noassert -inline 100 +CXX = g++ +OCAMLOPT = ocamlopt -cc "$(CXX)" -ccopt -O3 -noassert -inline 100 endif - ifeq ($(PROFILE), true) SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE endif diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index e3b5f44..e20a749 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -211,6 +211,25 @@ extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){ CAMLreturn(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){ + CAMLparam3(tree,id,tag); + CAMLreturn(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag)))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_next(value tree, value id, value tag){ + CAMLparam3(tree,id,tag); + CAMLreturn(Val_int (XMLTREE(tree)->TaggedNext(TREENODEVAL(id),(TagType) Int_val(tag)))); +} + + + + +extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){ + CAMLparam3(tree,id,tag); + CAMLreturn(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag)))); +} + + extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ CAMLparam2(tree,id); CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); @@ -255,6 +274,12 @@ extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){ CAMLreturn (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){ + CAMLparam3(tree,id,tag); + CAMLreturn (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag)))); +} + + extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ CAMLparam2(tree,str); CAMLlocal1(id); diff --git a/ata.ml b/ata.ml index c12be8a..aec548f 100644 --- a/ata.ml +++ b/ata.ml @@ -2,7 +2,11 @@ INCLUDE "debug.ml" module Tree = Tree.Binary -let gen_id() = Oo.id (object end) +let gen_id = + let id = ref (-1) in + fun () -> incr id;!id + + module State = struct type t = int @@ -13,17 +17,24 @@ let mk_state = State.mk type state = State.t -type predicate = Ptset.t*Ptset.t -> Tree.t -> [ `True | `False | `Maybe ] +type predicate = [ `Left of (Tree.t -> bool) | `Right of (Tree.t -> bool) | + `True + ] +let eval_pred t = + function `True -> true + | `Left f | `Right f -> f t + type formula_expr = | False | True - | Or of formula * formula - | And of formula * formula - | Atom of ([ `Left | `Right ]*bool*state*predicate option) + | Or of formula * formula + | And of formula * formula + | Atom of ([ `Left | `Right ]*bool*state) and formula = { fid: int; - pos : formula_expr; - neg : formula; - st : Ptset.t*Ptset.t; + pos : formula_expr; + neg : formula; + st : Ptset.t*Ptset.t; + size: int; } @@ -35,14 +46,14 @@ struct | True -> 1 | And(f1,f2) -> 2+17*f1.fid + 37*f2.fid | Or(f1,f2) -> 3+101*f1.fid + 253*f2.fid - | Atom(d,b,s,_) -> 5+(if d=`Left then 11 else 19)*(if b then 23 else 31)*s + | Atom(d,b,s) -> 5+(if d=`Left then 11 else 19)*(if b then 23 else 31)*s let hash t = (hash t.pos) land max_int let equal f1 f2 = match f1.pos,f2.pos with | False,False | True,True -> true - | Atom(d1,b1,s1,_), Atom(d2,b2,s2,_) when (d1 = d2) && (b1=b2) &&(s1=s2) -> true + | Atom(d1,b1,s1), Atom(d2,b2,s2) when (d1 = d2) && (b1=b2) &&(s1=s2) -> true | Or(g1,g2),Or(h1,h2) | And(g1,g2),And(h1,h2) -> g1.fid == h1.fid && g2.fid == h2.fid | _ -> false @@ -52,8 +63,8 @@ module WH = Weak.Make(FormNode) let f_pool = WH.create 107 let true_,false_ = - let rec t = { fid = 1; pos = True; neg = f ; st = Ptset.empty,Ptset.empty} - and f = { fid = 0; pos = False; neg = t; st = Ptset.empty,Ptset.empty } + let rec t = { fid = 1; pos = True; neg = f ; st = Ptset.empty,Ptset.empty; size =1; } + and f = { fid = 0; pos = False; neg = t; st = Ptset.empty,Ptset.empty; size = 1; } in WH.add f_pool f; WH.add f_pool t; @@ -63,27 +74,29 @@ let is_true f = f.fid == 1 let is_false f = f.fid == 0 -let cons pos neg s1 s2 = +let cons pos neg s1 s2 size1 size2 = let rec pnode = { fid = gen_id (); pos = pos; neg = nnode; - st = s1; } + st = s1; + size = size1;} and nnode = { fid = gen_id (); pos = neg; neg = pnode; st = s2; + size = size2; } in (WH.merge f_pool pnode),(WH.merge f_pool nnode) -let atom_ ?(pred=None) d p s = +let atom_ d p s = let si = Ptset.singleton s in let ss = match d with | `Left -> si,Ptset.empty | `Right -> Ptset.empty,si - in fst (cons (Atom(d,p,s,pred)) (Atom(d,not p,s,pred)) ss ss ) + in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1) let merge_states f1 f2 = let sp = @@ -95,47 +108,63 @@ let merge_states f1 f2 = in sp,sn +let full_or_ f1 f2 = + let f1,f2 = if f1.fid < f2.fid then f2,f1 else f1,f2 in + let sp,sn = merge_states f1 f2 in + let psize = f1.size + f2.size in + let nsize = f1.neg.size + f2.neg.size in + fst (cons (Or(f1,f2)) (And(f1.neg,f2.neg)) sp sn psize nsize ) + let or_ f1 f2 = + let f1,f2 = if f1.fid < f2.fid then f2,f1 else f1,f2 in if is_true f1 || is_true f2 then true_ else if is_false f1 && is_false f2 then false_ else if is_false f1 then f2 else if is_false f2 then f1 else + let psize = f1.size + f2.size in + let nsize = f1.neg.size + f2.neg.size in let sp,sn = merge_states f1 f2 in - fst (cons (Or(f1,f2)) (And(f1.neg,f2.neg)) sp sn) + fst (cons (Or(f1,f2)) (And(f1.neg,f2.neg)) sp sn psize nsize) let and_ f1 f2 = + let f1,f2 = if f1.fid < f2.fid then f2,f1 else f1,f2 in if is_true f1 && is_true f2 then true_ else if is_false f1 || is_false f2 then false_ else if is_true f1 then f2 else if is_true f2 then f1 else + let psize = f1.size + f2.size in + let nsize = f1.neg.size + f2.neg.size in let sp,sn = merge_states f1 f2 in - fst (cons (And(f1,f2)) (Or(f1.neg,f2.neg)) sp sn) + fst (cons (And(f1,f2)) (Or(f1.neg,f2.neg)) sp sn psize nsize) let not_ f = f.neg -type property = [ `None | `Existential ] -let get_prop h s = - try - Hashtbl.find h s - with - Not_found -> `None - + +module HTagSetKey = +struct + type t = Ptset.t*Tag.t + let int_hash key = key lsl 31 lor (key lsl 8) + let equal (s1,s2) (t1,t2) = Tag.equal s2 t2 && Ptset.equal s1 t1 + let hash (s,t) = int_hash (Ptset.hash s) lxor ( int_hash (Tag.hash t)) +end +module HTagSet = Hashtbl.Make(HTagSetKey) + type t = { id : int; - states : Ptset.t; + mutable states : Ptset.t; init : Ptset.t; - final : Ptset.t; + mutable final : Ptset.t; universal : Ptset.t; (* Transitions of the Alternating automaton *) - (* (tags,q) -> (marking,formula) *) - phi : ((TagSet.t*state),(bool*formula)) Hashtbl.t; - delta : (TagSet.t,(Ptset.t*bool*Ptset.t*Ptset.t)) Hashtbl.t; - properties : (state,property) Hashtbl.t; + phi : (state,(TagSet.t*(bool*formula*predicate)) list) Hashtbl.t; + delta : (state*Tag.t, (bool*formula*predicate)) Hashtbl.t; +(* delta : (state,(bool*formula*predicate) TagMap.t) Hashtbl.t; *) + sigma : (bool*formula*(predicate list*predicate list)*bool) HTagSet.t; } module Pair (X : Set.OrderedType) (Y : Set.OrderedType) = @@ -161,7 +190,7 @@ type t = { Format.fprintf ppf " }" let rec pr_frm ppf f = match f.pos with | True -> Format.fprintf ppf "⊤" - | False -> Format.fprintf ppf "⊤" + | False -> Format.fprintf ppf "⊥" | And(f1,f2) -> Format.fprintf ppf "("; (pr_frm ppf f1); @@ -172,18 +201,17 @@ type t = { (pr_frm ppf f1); Format.fprintf ppf " ∨ "; (pr_frm ppf f2); - | Atom(dir,b,s,p) -> Format.fprintf ppf "%s%s[%i]%s" + | Atom(dir,b,s) -> Format.fprintf ppf "%s%s[%i]" (if b then "" else "¬") - (if dir = `Left then "↓₁" else "↓₂")s - (match p with None -> "" | _ -> " ") + (if dir = `Left then "↓₁" else "↓₂") s let dnf_hash = Hashtbl.create 17 let rec dnf_aux f = match f.pos with | False -> PL.empty | True -> PL.singleton (Ptset.empty,Ptset.empty) - | Atom(`Left,_,s,_) -> PL.singleton (Ptset.singleton s,Ptset.empty) - | Atom(`Right,_,s,_) -> PL.singleton (Ptset.empty,Ptset.singleton s) + | Atom(`Left,_,s) -> PL.singleton (Ptset.singleton s,Ptset.empty) + | Atom(`Right,_,s) -> PL.singleton (Ptset.empty,Ptset.singleton s) | Or(f1,f2) -> PL.union (dnf f1) (dnf f2) | And(f1,f2) -> let pl1 = dnf f1 @@ -206,47 +234,22 @@ type t = { let d = dnf_aux f in Hashtbl.add dnf_hash f.fid d;d - + + let can_top_down f = + let nf = dnf f in + if (PL.cardinal nf > 3)then None + else match PL.elements nf with + | [(s1,s2); (t1,t2); (u1,u2)] when + Ptset.is_empty s1 && Ptset.is_empty s2 && Ptset.is_empty t1 && Ptset.is_empty u2 + -> Some(true,t2,u1) + | [(t1,t2); (u1,u2)] when Ptset.is_empty t1 && Ptset.is_empty u2 + -> Some(false,t2,u1) + | _ -> None + + let equal_form f1 f2 = (f1.fid == f2.fid) || (FormNode.equal f1 f2) || (PL.equal (dnf f1) (dnf f2)) - - let alt_trans_to_nfa ?(accu=[]) ts s mark f = - (* todo memoize *) - let f' = dnf f in - PL.fold (fun (s1,s2) acc -> (ts,s,mark,s1,s2)::acc) f' accu - - let possible_trans ?(accu=[]) a q tag = - (* todo change the data structure to avoid creating (,) *) - let ata_trans = - Hashtbl.fold (fun (ts,s) (m,f) acc -> - if (q==s) && (TagSet.mem tag ts) - then (ts,s,m,f)::acc - else acc) a.phi [] - in - if ata_trans != [] - then begin - List.iter (fun (ts,s,m,f) -> - (* The following builds too many transitions in the nfa - let ts' = TagSet.remove tag ts - in - Hashtbl.remove a.phi (ts,s); - if not (TagSet.is_empty ts') - then Hashtbl.add a.phi (ts',s) (m,f) - *) - Hashtbl.remove a.phi (ts,s) - ) ata_trans; - (* let tstag = TagSet.tag tag in *) - let nfa_trs = List.fold_left (fun acc (ts,s,m,f) -> - alt_trans_to_nfa ~accu:acc ts s m f) [] ata_trans - in - List.iter (fun (ts,s,m,s1,s2) -> - Hashtbl.add a.delta ts ((Ptset.singleton s),m,s1,s2)) nfa_trs - end; - Hashtbl.fold (fun ts (s,m,s1,s2) acc -> - if (Ptset.mem q s) && (TagSet.mem tag ts) - then (m,s1,s2)::acc else acc) a.delta accu - let dump ppf a = Format.fprintf ppf "Automaton (%i) :\n" a.id; Format.fprintf ppf "States : "; pr_st ppf (Ptset.elements a.states); @@ -254,43 +257,42 @@ type t = { Format.fprintf ppf "\nFinal states : "; pr_st ppf (Ptset.elements a.final); Format.fprintf ppf "\nUniversal states : "; pr_st ppf (Ptset.elements a.universal); Format.fprintf ppf "\nAlternating transitions :\n------------------------------\n"; - let l = Hashtbl.fold (fun k t acc -> (k,t)::acc) a.phi [] in + let l = Hashtbl.fold (fun k t acc -> + (List.map (fun (t,(m,f,p)) -> (t,k),(m,f,p)) t)@ acc) a.phi [] in let l = List.sort (fun ((tsx,x),_) ((tsy,y),_) -> if x-y == 0 then TagSet.compare tsx tsy else x-y) l in - List.iter (fun ((ts,q),(b,f)) -> + List.iter (fun ((ts,q),(b,f,_)) -> let s = - try - Tag.to_string (TagSet.choose ts) - with - | _ -> "*" + if TagSet.is_finite ts + then "{" ^ (TagSet.fold (fun t a -> a ^ " " ^ (Tag.to_string t)) ts "") ^"}" + else let cts = TagSet.neg ts in + if TagSet.is_empty cts then "*" else + (TagSet.fold (fun t a -> a ^ " " ^ (Tag.to_string t)) cts "*\\{" + )^ "}" in Format.fprintf ppf "(%s,%i) %s " s q (if b then "=>" else "->"); pr_frm ppf f; Format.fprintf ppf "\n")l; Format.fprintf ppf "NFA transitions :\n------------------------------\n"; - Hashtbl.iter (fun (ts) (q,b,s1,s2) -> - - let s = - try - Tag.to_string (TagSet.choose ts) - with - | _ -> "*" - in - pr_st ppf (Ptset.elements q); - Format.fprintf ppf ",%s %s " s (if b then "=>" else "->"); - Format.fprintf ppf "("; - pr_st ppf (Ptset.elements s1); - Format.fprintf ppf ","; - pr_st ppf (Ptset.elements s2); - Format.fprintf ppf ")\n" ) a.delta; + HTagSet.iter (fun (qs,t) (b,f,_,_) -> + pr_st ppf (Ptset.elements qs); + Format.fprintf ppf ",%s %s " (Tag.to_string t) (if b then "=>" else "->"); + pr_frm ppf f; + Format.fprintf ppf "(fid=%i) left=" f.fid; + let l,r = f.st in pr_st ppf (Ptset.elements l); + Format.fprintf ppf ", right="; + pr_st ppf (Ptset.elements r); + Format.fprintf ppf "\n"; + ) a.sigma; Format.fprintf ppf "=======================================\n" module Transitions = struct - type t = state*TagSet.t*bool*formula + type t = state*TagSet.t*bool*formula*predicate let ( ?< ) x = x - let ( >< ) state label = state,label - let ( >=> ) (state,(label,mark)) form = (state,label,mark,form) + let ( >< ) state (l,b) = state,(l,b,`True) + let ( ><@ ) state (l,b,p) = state,(l,b,p) + let ( >=> ) (state,(label,mark,pred)) form = (state,label,mark,form,pred) let ( +| ) f1 f2 = or_ f1 f2 let ( *& ) f1 f2 = and_ f1 f2 let ( ** ) d s = atom_ d true s @@ -299,11 +301,44 @@ type t = { end type transition = Transitions.t - let equal_trans (q1,t1,m1,f1) (q2,t2,m2,f2) = + let equal_trans (q1,t1,m1,f1,_) (q2,t2,m2,f2,_) = (q1 == q2) && (TagSet.equal t1 t2) && (m1 == m2) && (equal_form f1 f2) - module TS : Set.S with type elt = Tree.t = Set.Make(Tree) - let res = ref TS.empty + module TS = + struct + type node = Nil | Cons of Tree.t * node | Concat of node*node + and t = { node : node; size : int } + let node n s = { node=n; size = s } + + let empty = node Nil 0 + + let cons e t = node (Cons(e,t.node)) (t.size+1) + let concat t1 t2 = node (Concat (t1.node,t2.node)) (t1.size+t2.size) + let append e t = concat t (cons e empty) + + let to_list_rev t = + let rec aux acc l rest = + match l with + | Nil -> begin + match rest with + | Nil -> acc + | Cons(e,t) -> aux (e::acc) t Nil + | Concat(t1,t2) -> aux acc t1 t2 + end + | Cons(e,r) -> aux (e::acc) r rest + | Concat(t1,t2) -> aux acc t1 (Concat(t2,rest)) + in + aux [] t.node Nil + let length = function { size = s } -> s + + let iter f { node = n } = + let rec loop = function + | Nil -> () + | Cons(e,n) -> let _ = f e in loop n + | Concat(n1,n2) -> let _ = loop n1 in loop n2 + in loop n + + end module BottomUpNew = struct @@ -366,178 +401,205 @@ END - let hfeval = Hashtbl.create 17 - let miss = ref 0 + module HFEval = Hashtbl.Make( + struct + type t = int*Ptset.t*Ptset.t + let equal (a,b,c) (d,e,f) = + a==d && (Ptset.equal b e) && (Ptset.equal c f) + let hash (a,b,c) = + a+17*(Ptset.hash b) + 31*(Ptset.hash c) + end) + + let hfeval = HFEval.create 4097 + + + let eval_form_bool f s1 s2 = + let rec eval f = match f.pos with + | Atom(`Left,b,q) -> if b == (Ptset.mem q s1) then (true,true,false) else false,false,false + | Atom(`Right,b,q) -> if b == (Ptset.mem q s2) then (true,false,true) else false,false,false + (* test some inlining *) + | True -> true,true,true + | False -> false,false,false + | _ -> + try + HFEval.find hfeval (f.fid,s1,s2) + with + | Not_found -> let r = + match f.pos with + | Or(f1,f2) -> + let b1,rl1,rr1 = eval f1 + in + if b1 && rl1 && rr1 then (true,true,true) + else + let b2,rl2,rr2 = eval f2 + in + let rl1,rr1 = if b1 then rl1,rr1 else false,false + and rl2,rr2 = if b2 then rl2,rr2 else false,false + in (b1 || b2, rl1||rl2,rr1||rr2) + | And(f1,f2) -> + let b1,rl1,rr1 = eval f1 in + if b1 && rl1 && rr1 then (true,true,true) + else if b1 + then let b2,rl2,rr2 = eval f2 in + if b2 then (true,rl1||rl2,rr1||rr2) + else (false,false,false) + else (false,false,false) + | _ -> assert false + in + HFEval.add hfeval (f.fid,s1,s2) r; + r + in eval f + + + module HFEvalDir = Hashtbl.Make( + struct + type t = int*Ptset.t*[`Left | `Right ] + let equal (a,b,c) (d,e,f) = + a==d && (Ptset.equal b e) && (c = f) + let hash_dir = function `Left -> 7919 + | `Right -> 3517 + + let hash (a,b,c) = + a+17*(Ptset.hash b) + 31*(hash_dir c) + end) + + let hfeval_dir = HFEvalDir.create 4097 + + + let eval_dir dir f s = + let rec eval f = match f.pos with + | Atom(d,b,q) when d = dir -> if b == (Ptset.mem q s) then true_ else false_ + | Atom(_,b,q) -> f + (* test some inlining *) + | True -> true_ + | False -> false_ + | _ -> + try + HFEvalDir.find hfeval_dir (f.fid,s,dir) + with + | Not_found -> + let r = + match f.pos with + | Or(f1,f2) -> + let f1 = eval f1 + in + if is_true f1 then true_ + else if is_false f1 then eval f2 + else or_ f1 f2 + | And(f1,f2) -> + let f1 = eval f1 in + if is_false f1 then false_ + else if is_true f1 then eval f2 + else and_ f1 f2 + | _ -> assert false + in + HFEvalDir.add hfeval_dir (f.fid,s,dir) r; + r + + in eval f + + + + let fstate_pool = Hashtbl.create 11 + + let merge_pred a b = match a,b with + | Some(f1), Some(f2) -> Some(fun x -> f1 x || f2 x) + | None,None -> None + | None,Some(_) -> b + | Some(_),None -> a + + let acc_pred p l1 l2 = match p with + | `Left _ -> p::l1,l2 + | `Right _ -> l1,p::l2 + | _ -> l1,l2 + + + let merge_trans t a tag q acc = + List.fold_left (fun (accf,accm,acchtrue) (ts,(m,f,pred)) -> + if TagSet.mem tag ts + then + let tmpf,hastrue = + if is_true f then + let newfinal = + try Hashtbl.find fstate_pool f.fid with + | Not_found -> let s = mk_state() in + a.states <- Ptset.add s a.states; + a.final <- Ptset.add s a.final; + Hashtbl.add fstate_pool f.fid s;s + in + (atom_ `Left true newfinal),true + else f,false in + (or_ tmpf accf,accm||m,acchtrue||hastrue) + else (accf,accm,acchtrue) + ) acc (Hashtbl.find a.phi q) + + let miss = ref 0 let call = ref 0 - let rec findlist s1 s2 = function - | [] -> raise Not_found - | ((ss1,ss2),r)::_ when - (not (Ptset.is_empty s1)) && (Ptset.subset s1 ss1) && - (not (Ptset.is_empty s2)) && (Ptset.subset s2 ss2) -> r - | _::r -> findlist s1 s2 r - - let eval_form f s1 s2 res1 res2 = - - let rec eval_aux f = match f.pos with - | Atom(`Left,b,q,_) -> if b == (Ptset.mem q s1) then (true,res1) else false,TS.empty - | Atom(`Right,b,q,_) -> if b == (Ptset.mem q s2) then (true,res2) else false,TS.empty - | True -> true,(TS.union res1 res2) - | False -> false,TS.empty - | Or(f1,f2) -> - let b1,r1 = eval_aux f1 - and b2,r2 = eval_aux f2 - in - let r1 = if b1 then r1 else TS.empty - and r2 = if b2 then r2 else TS.empty - in (b1 || b2, TS.union r1 r2) - - | And(f1,f2) -> - let b1,r1 = eval_aux f1 - and b2,r2 = eval_aux f2 - in - if b1 && b2 then (true, TS.union r1 r2) - else (false,TS.empty) - - in incr call;eval_aux f + let get_trans t a tag r = + try + let mark,f,predl,has_true = + HTagSet.find a.sigma (r,tag) + in f.st,f,mark,has_true,r,predl + with + Not_found -> + let f,mark,has_true,accq = + Ptset.fold (fun q (accf,accm,acchtrue,accq) -> + let naccf,naccm,nacctrue = + merge_trans t a tag q (accf,accm,acchtrue ) + in + if is_false naccf then (naccf,naccm,nacctrue,accq) + else (naccf,naccm,nacctrue,Ptset.add q accq) + ) + r (false_,false,false,Ptset.empty) + in + HTagSet.add a.sigma (accq,tag) (mark,f,([],[]),has_true); + f.st,f,mark,has_true,accq,([],[]) + + + let check_pred l t = true (*l = [] || + List.exists (function p -> + match p with + `Left f | `Right f -> f t + | _ -> assert false) l + *) - (* If true, then the formule may evaluate to true in the future, - if false it will always return false, i.e. necessary conditions are not - satisfied - *) - - let val3 = function true -> `True - | false -> `False - - let or3 a b = match a,b with - | `True,_ | _,`True -> `True - | `False,`False -> `False - | _ -> `Maybe - - let and3 a b = match a,b with - | `True,`True -> `True - | `False,_ | _,`False -> `False - | _ -> `Maybe - let not3 = function - | `True -> `False - | `False -> `True - | `Maybe -> `Maybe - - let true3 = function true -> `Maybe - | false -> `False - - let may_eval (s1,s2) f t = - let rec aux f = match f.pos with - | True -> `True - | False -> `False - | Or(f1,f2) -> or3 (aux f1) (aux f2) - | And(f1,f2) -> and3 (aux f1) (aux f2) - | Atom(dir,b,q,predo) -> - and3 (true3 ((Ptset.mem q (match dir with - | `Left -> s1 - | `Right -> s2)) == b)) - (match predo with - | Some pred -> (pred (s1,s2) t) - | None -> `True) - - in aux f - - let rec accepting_among a t r = - let r = Ptset.diff r a.final in - let rest = Ptset.inter a.final r in - if Ptset.is_empty r then r,TS.empty else - if (not (Tree.is_node t)) + let rec accepting_among2 a t r acc = + let orig = r in + let rest = Ptset.inter r a.final in + let r = Ptset.diff r rest in + if Ptset.is_empty r then rest,acc else + if (not (Tree.is_node t)) then - let _ = D(Hashtbl.add traces (-10) (TNil(r,Ptset.inter a.final r))) - in - Ptset.inter a.final r,TS.empty + orig,acc else - let tag = Tree.tag t - and t1 = Tree.first_child t - and t2 = Tree.next_sibling t + let tag = Tree.tag t in + let t1 = Tree.first_child t + and t2 = Tree.next_sibling t in + let (r1,r2),formula,mark,has_true,r,_ = get_trans t a tag r + in + let s1,res1 = accepting_among2 a t1 r1 acc in - let r1,r2,trs = - Hashtbl.fold (fun (ts,q) ((m,f)as tr) ((ar1,ar2,lt)as acc) -> - if (TagSet.mem tag ts) && Ptset.mem q r - then begin - (* Format.fprintf Format.err_formatter "Tree with tag %s qualifies for transition : (%s,%i)%s" - (Tag.to_string tag) - (try - Tag.to_string (TagSet.choose ts) - with - | _ -> "*" ) - q - (if m then "=>" else "->"); - pr_frm Format.err_formatter f; - Format.fprintf Format.err_formatter "\n"; *) - let ls,rs = f.st in - Ptset.union ls ar1,Ptset.union rs ar2,(q,tr)::lt - end - else acc - ) a.phi (Ptset.empty,Ptset.empty,[]) - in - let rtrue,rfalse,rmay,trs,selnodes = - List.fold_left (fun (at,af,am,atrs,selnodes) (q,(m,f)) -> - let ppf = Format.err_formatter in - match (*may_eval (r1,r2) f t *) `Maybe with - | `True -> - (* Format.fprintf ppf "Will skip (%i) %s " q (if m then "=>" else "->"); - pr_frm ppf f; - Format.fprintf ppf ", always true \n"; *) - (Ptset.add q at),af,am,atrs,TS.add t selnodes - | `False -> - (*Format.fprintf ppf "Will skip (%i) %s " q (if m then "=>" else "->"); - pr_frm ppf f; - Format.fprintf ppf ", always false \n"; *) - at,(Ptset.add q af),am,atrs,selnodes - - | `Maybe -> -(* Format.fprintf ppf "Must take (%i) %s " q (if m then "=>" else "->"); - pr_frm ppf f; - Format.fprintf ppf "\n"; *) - at,af,(Ptset.add q am),(q,(m,f))::atrs,selnodes) - (Ptset.empty,Ptset.empty,Ptset.empty,[],TS.empty) trs - in - let rr1,rr2,trs = - List.fold_left (fun ((ar1,ar2,trs)as acc) ((q,(_,f)as tr)) -> - if Ptset.mem q rmay - then let ls,rs = f.st in - Ptset.union ls ar1,Ptset.union rs ar2,tr::trs - else acc) (Ptset.empty,Ptset.empty,[]) trs - in - let s1,res1 = accepting_among a t1 rr1 - and s2,res2 = accepting_among a t2 rr2 - in - let res,set,mark,trs = List.fold_left (fun ((sel_nodes,res,amark,acctr) as acc) (q,(mark,f)) -> - let b,resnodes = eval_form f s1 s2 res1 res2 in - (* if b then begin - pr_st Format.err_formatter (Ptset.elements s1); - Format.fprintf Format.err_formatter ","; - pr_st Format.err_formatter (Ptset.elements s2); - Format.fprintf Format.err_formatter " satisfies "; - pr_frm Format.err_formatter f; - Format.fprintf Format.err_formatter " for input tree %s\n" (Tag.to_string tag); - end; *) - if b - then - (TS.union - (if mark then TS.add t resnodes else resnodes) - sel_nodes) - ,Ptset.add q res,amark||mark,(q,mark,f)::acctr - else acc - ) (TS.empty,rtrue,false,[]) trs - in - - let set = Ptset.union a.final set in - let _ = D(Hashtbl.add traces (Tree.id t) (TNode(r,set,mark,trs))) in - set,res - - + let formula = eval_dir `Left formula s1 in + if is_false formula then rest,acc + else + if is_true formula then (* tail call equivalent to a top down *) + accepting_among2 a t2 orig (if mark then TS.append t res1 else res1) + else + let s2,res2 = accepting_among2 a t2 r2 res1 + in + let formula = eval_dir `Right formula s2 + in + if is_false formula then rest,res1 + else + orig,(if mark then TS.append t (res2) + else res2) + + let run a t = - let st,res = accepting_among a t a.init in + let st,res = accepting_among2 a t a.init TS.empty in let b = Ptset.is_empty (st) in - let _ = D(dump_trace t) in - if b then [] - else (TS.elements res) - + if b then TS.empty + else + res end diff --git a/ata.mli b/ata.mli index f5e2c4f..e446c8a 100644 --- a/ata.mli +++ b/ata.mli @@ -1,17 +1,29 @@ +(* module Ptset : sig + include Set.S with type elt = int + val from_list : elt list -> t + end +*) + type state = int val mk_state : unit -> state -type predicate = Ptset.t*Ptset.t -> Tree.Binary.t -> [ `True | `False | `Maybe ] +type predicate = [ `Left of (Tree.Binary.t -> bool) | `Right of (Tree.Binary.t -> bool) | + `True + ] + + +val eval_pred : Tree.Binary.t -> predicate -> bool + type formula_expr = False | True | Or of formula * formula | And of formula * formula - | Atom of ([ `Left | `Right ] * bool * state * predicate option) -and formula = { fid : int; pos : formula_expr; neg : formula; st : Ptset.t*Ptset.t;} + | Atom of ([ `Left | `Right ] * bool * state) +and formula = { fid : int; pos : formula_expr; neg : formula; st : Ptset.t*Ptset.t; size: int;} val true_ : formula val false_ : formula -val atom_ : ?pred:predicate option -> [`Left | `Right ] -> bool -> state -> formula +val atom_ : [`Left | `Right ] -> bool -> state -> formula val and_ : formula -> formula -> formula val or_ : formula -> formula -> formula val not_ : formula -> formula @@ -19,26 +31,29 @@ val equal_form : formula -> formula -> bool val pr_frm : Format.formatter -> formula -> unit -type property = [ `None | `Existential ] +module HTagSet : Hashtbl.S with type key = Ptset.t*Tag.t type t = { id : int; - states : Ptset.t; + mutable states : Ptset.t; init : Ptset.t; - final : Ptset.t; + mutable final : Ptset.t; universal : Ptset.t; - phi : (TagSet.t * state, bool * formula) Hashtbl.t; - delta : (TagSet.t, Ptset.t * bool * Ptset.t * Ptset.t) Hashtbl.t; - properties : (state,property) Hashtbl.t; + phi : (state,(TagSet.t*(bool*formula*predicate)) list) Hashtbl.t; + delta : (state*Tag.t, (bool*formula*predicate)) Hashtbl.t; +(* delta : (state,(bool*formula*predicate) TagMap.t) Hashtbl.t; *) + sigma : (bool*formula*(predicate list*predicate list)*bool) HTagSet.t; + } val dump : Format.formatter -> t -> unit module Transitions : sig -type t = state*TagSet.t*bool*formula +type t = state*TagSet.t*bool*formula*predicate (* Doing this avoid the parenthesis *) val ( ?< ) : state -> state -val ( >< ) : state -> TagSet.t*bool -> state*(TagSet.t*bool) -val ( >=> ) : state*(TagSet.t*bool) -> formula -> t +val ( >< ) : state -> TagSet.t*bool -> state*(TagSet.t*bool*predicate) +val ( ><@ ) : state -> TagSet.t*bool*predicate -> state*(TagSet.t*bool*predicate) +val ( >=> ) : state*(TagSet.t*bool*predicate) -> formula -> t val ( +| ) : formula -> formula -> formula val ( *& ) : formula -> formula -> formula val ( ** ) : [`Left | `Right ] -> state -> formula @@ -47,10 +62,18 @@ end type transition = Transitions.t val equal_trans : transition -> transition -> bool +module TS : sig + type t + val empty : t + val cons : Tree.Binary.t -> t -> t + val append : Tree.Binary.t -> t -> t + val concat : t -> t -> t + val to_list_rev : t -> Tree.Binary.t list + val length : t -> int + val iter : (Tree.Binary.t -> unit) -> t -> unit +end module BottomUpNew : sig - val miss : int ref - val call : int ref - val run : t -> Tree.Binary.t -> Tree.Binary.t list + val run : t -> Tree.Binary.t -> TS.t end diff --git a/main.ml b/main.ml index 8b2bc46..50dd048 100644 --- a/main.ml +++ b/main.ml @@ -22,6 +22,84 @@ let time f x = let total_time () = List.fold_left (+.) 0. !l;; +let test_slashslash tree k = + let test = + match k with "*" -> TagSet.remove (Tag.tag "") TagSet.star + | s -> TagSet.singleton (Tag.tag k) + in + let attorstring = TagSet.cup TagSet.pcdata TagSet.attribute in + let rec aux t acc = + if Tree.Binary.is_node t + then + let tag = Tree.Binary.tag t in + let l = Tree.Binary.first_child t + and r = Tree.Binary.next_sibling t + in + let acc = + if TagSet.mem tag test + then + TS.append t acc + else + acc + in + let rl = if TagSet.mem tag attorstring then acc else aux l acc + in aux r rl + else + acc + in + let _ = Printf.eprintf "Testing optimal //%s ... " k in + let r = time (aux tree ) TS.empty in + Printf.eprintf "Result set is %i nodes\n%!" (TS.length r) + + +let test_jump tree k = + let ttag = Tag.tag k in + + let rec loop acc tree = + if Tree.Binary.is_node tree + then + let acc = TS.cons tree acc in + loop acc (Tree.Binary.tagged_next tree ttag) + else + acc + + in + let _ = Printf.eprintf "Testing jumping for tag %s ... " k in + let r = time (loop TS.empty ) (Tree.Binary.tagged_next tree ttag) in + Printf.eprintf "Result set is %i nodes\n%!" (TS.length r) + + + +let test_traversal tree k = + let ttag = Tag.tag k in + let iid t = if Tree.Binary.is_node t then Tree.Binary.id t else -1 in + let rec aux t = + if Tree.Binary.is_node t + then + let tag = Tree.Binary.tag t in + let l = Tree.Binary.first_child t + and r = Tree.Binary.next_sibling t + in + let _ = Printf.eprintf "Tree with id %i and tag=%s, tagged_desc %s is %i tagged_foll is %i, tagged_next is %i\n%!" + (Tree.Binary.id t) (Tag.to_string tag) (k) + (iid (Tree.Binary.tagged_desc t ttag)) + (iid (Tree.Binary.tagged_foll t ttag)) + (iid (Tree.Binary.tagged_next t ttag)) + in + aux l; + aux r; + + else + () + in + aux tree + +let test_count_subtree tree k = + let ttag = Tag.tag k in + let _ = Printf.eprintf "Counting subtrees with tag %s ... %!" k in + let r = time(Tree.Binary.subtree_tags tree) ttag in + Printf.eprintf "%i nodes \n%!" r + let main v query output = let _ = Tag.init (Tree.Binary.tag_pool v) in Printf.eprintf "Parsing query : "; @@ -30,13 +108,21 @@ let main v query output = XPath.Parser.parse_string query with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 - in - Printf.eprintf "Compiling query : "; + in + XPath.Ast.print Format.err_formatter query; + Format.fprintf Format.err_formatter "\n%!"; + Printf.eprintf "Compiling query : "; let auto = time XPath.Compile.compile query in - XPath.Ast.print Format.err_formatter query; + + let _ = Ata.dump Format.err_formatter auto ; + Format.fprintf Format.err_formatter "\n%!" + in + let _ = test_count_subtree v "keyword" in + let _ = test_jump v "keyword" in + Printf.eprintf "Execution time : "; - let result = time (BottomUpNew.run auto) v in - Printf.eprintf "Number of nodes in the result set : %i\n" (List.length result); + let result = time (BottomUpNew.run auto) v in + Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result); begin match output with | None -> () @@ -46,10 +132,12 @@ let main v query output = time( fun () -> let oc = open_out f in output_string oc "\n"; - List.iter (fun t -> Tree.Binary.print_xml_fast oc t; + TS.iter (fun t -> Tree.Binary.print_xml_fast oc t; output_char oc '\n') result) (); end; - Printf.eprintf "Total time : %fms\n Coherence : %i\n%!" (total_time()) + (* let _ = Ata.dump Format.err_formatter auto in + Format.fprintf Format.err_formatter "\n%!"; *) + Printf.eprintf "Total time : %fms\n%!" (total_time()) ;; @@ -81,8 +169,7 @@ in IFDEF DEBUG THEN Printf.eprintf "\n=================================================\nDEBUGGING\n%!"; -Format.eprintf "\nAutomaton is:\n%!"; -Ata.dump Format.err_formatter auto; + Tree.DEBUGTREE.print_stats Format.err_formatter;; Gc.full_major() ENDIF diff --git a/ptset.ml b/ptset.ml index 5c029f7..091d4a8 100644 --- a/ptset.ml +++ b/ptset.ml @@ -38,8 +38,14 @@ module Node = | _ -> false end -module WH = Weak.Make(Node) - +module WH =Weak.Make(Node) +(* struct + include Hashtbl.Make(Node) + let merge h v = + if mem h v then v + else (add h v v;v) +end +*) let pool = WH.create 4093 (* Neat trick thanks to Alain Frisch ! *) @@ -63,14 +69,14 @@ let rec norm n = (* WH.merge pool *) -let branch (p,m,l,r) = norm (Branch(p,m,l,r)) +let branch p m l r = norm (Branch(p,m,l,r)) let leaf k = norm (Leaf k) (* To enforce the invariant that a branch contains two non empty sub-trees *) let branch_ne = function | (_,_,e,t) when is_empty e -> t | (_,_,t,e) when is_empty e -> t - | (p,m,t0,t1) -> branch (p,m,t0,t1) + | (p,m,t0,t1) -> branch p m t0 t1 (********** from here on, only use the smart constructors *************) @@ -113,10 +119,10 @@ let rec min_elt n = match n.node with let hbit = Array.init 256 naive_highest_bit let highest_bit_32 x = - let n = x lsr 24 in if n != 0 then hbit.(n) lsl 24 - else let n = x lsr 16 in if n != 0 then hbit.(n) lsl 16 - else let n = x lsr 8 in if n != 0 then hbit.(n) lsl 8 - else hbit.(x) + let n = x lsr 24 in if n != 0 then Array.unsafe_get hbit n lsl 24 + else let n = x lsr 16 in if n != 0 then Array.unsafe_get hbit n lsl 16 + else let n = x lsr 8 in if n != 0 then Array.unsafe_get hbit n lsl 8 + else Array.unsafe_get hbit x let highest_bit_64 x = let n = x lsr 32 in if n != 0 then (highest_bit_32 n) lsl 32 @@ -129,27 +135,27 @@ let rec min_elt n = match n.node with let branching_bit p0 p1 = highest_bit (p0 lxor p1) - let join (p0,t0,p1,t1) = + let join p0 t0 p1 t1 = let m = branching_bit p0 p1 in if zero_bit p0 m then - branch (mask p0 m, m, t0, t1) + branch (mask p0 m) m t0 t1 else - branch (mask p0 m, m, t1, t0) + branch (mask p0 m) m t1 t0 let match_prefix k p m = (mask k m) == p let add k t = let rec ins n = match n.node with | Empty -> leaf k - | Leaf j -> if j == k then n else join (k, leaf k, j, n) + | Leaf j -> if j == k then n else join k (leaf k) j n | Branch (p,m,t0,t1) -> if match_prefix k p m then if zero_bit k m then - branch (p, m, ins t0, t1) + branch p m (ins t0) t1 else - branch (p, m, t0, ins t1) + branch p m t0 (ins t1) else - join (k, leaf k, p, n) + join k (leaf k) p n in ins t @@ -170,12 +176,12 @@ let rec min_elt n = match n.node with (* should run in O(1) thanks to Hash consing *) - let equal = (=) + let equal a b = a==b || a.id == b.id - let compare = compare + let compare a b = if a == b then 0 else a.id - b.id - let rec merge (s,t) = + let rec merge s t = if (equal s t) (* This is cheap thanks to hash-consing *) then s else @@ -186,23 +192,22 @@ let rec min_elt n = match n.node with | _, Leaf k -> add k s | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> if m == n && match_prefix q p m then - branch (p, m, merge (s0,t0), merge (s1,t1)) + branch p m (merge s0 t0) (merge s1 t1) else if m > n && match_prefix q p m then if zero_bit q m then - branch (p, m, merge (s0,t), s1) + branch p m (merge s0 t) s1 else - branch (p, m, s0, merge (s1,t)) - else if m < n && match_prefix p q n then - + branch p m s0 (merge s1 t) + else if m < n && match_prefix p q n then if zero_bit p n then - branch (q, n, merge (s,t0), t1) + branch q n (merge s t0) t1 else - branch (q, n, t0, merge (s,t1)) + branch q n t0 (merge s t1) else (* The prefixes disagree. *) - join (p, s, q, t) + join p s q t - let union s t = merge (s,t) + let rec subset s1 s2 = (equal s1 s2) || match (s1.node,s2.node) with @@ -220,9 +225,12 @@ let rec min_elt n = match n.node with subset l1 r2 && subset r1 r2 else false + + let union s t = + merge s t let rec inter s1 s2 = - if (equal s1 s2) + if equal s1 s2 then s1 else match (s1.node,s2.node) with @@ -232,7 +240,7 @@ let rec min_elt n = match n.node with | _, Leaf k2 -> if mem k2 s1 then s2 else empty | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> if m1 == m2 && p1 == p2 then - merge (inter l1 l2, inter r1 r2) + merge (inter l1 l2) (inter r1 r2) else if m1 > m2 && match_prefix p2 p1 m1 then inter (if zero_bit p2 m1 then l1 else r1) s2 else if m1 < m2 && match_prefix p1 p2 m2 then @@ -241,7 +249,7 @@ let rec min_elt n = match n.node with empty let rec diff s1 s2 = - if (equal s1 s2) + if equal s1 s2 then empty else match (s1.node,s2.node) with @@ -251,12 +259,12 @@ let rec min_elt n = match n.node with | _, Leaf k2 -> remove k2 s1 | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> if m1 == m2 && p1 == p2 then - merge (diff l1 l2, diff r1 r2) + merge (diff l1 l2) (diff r1 r2) else if m1 > m2 && match_prefix p2 p1 m1 then if zero_bit p2 m1 then - merge (diff l1 s2, r1) + merge (diff l1 s2) r1 else - merge (l1, diff r1 s2) + merge l1 (diff r1 s2) else if m1 < m2 && match_prefix p1 p2 m2 then if zero_bit p1 m2 then diff s1 l2 else diff s1 r2 else diff --git a/tagSet.ml b/tagSet.ml index 8495f6c..503df80 100644 --- a/tagSet.ml +++ b/tagSet.ml @@ -1,3 +1,9 @@ +(* module Ptset = +struct + include Set.Make (struct type t = int let compare = (-) end) + let hash = Hashtbl.hash +end + *) include FiniteCofinite.Make(Ptset) let tag t = singleton t diff --git a/tests/test.xml b/tests/test.xml index 1358477..a266087 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,7 +1,15 @@ - - YTYYYYYYYYYY + + adadadad + + + + + + + + diff --git a/tree.ml b/tree.ml index 2218c28..d0cd550 100644 --- a/tree.ml +++ b/tree.ml @@ -39,6 +39,12 @@ sig val contains_old : t -> string -> bool val dump : t -> unit val get_string : t -> string_content -> string + val has_tagged_desc : t -> Tag.t -> bool + val has_tagged_foll : t -> Tag.t -> bool + val tagged_desc : t -> Tag.t -> t + val tagged_foll : t -> Tag.t -> t + val tagged_next : t -> Tag.t -> t + val subtree_tags : t -> Tag.t -> int end module XML = @@ -122,6 +128,10 @@ struct external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" external is_ancestor : t -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" + external tagged_desc : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" + external tagged_foll : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_foll" + external tagged_next : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_next" + external subtree_tags : t -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" let print_skel t = let rec aux id = @@ -189,6 +199,7 @@ struct end let is_node = function { node=Node(_) } -> true | _ -> false + let get_string t (i:string_content) = Text.get_text t.doc i open Tree let node_of_t t = { doc= t; @@ -317,7 +328,47 @@ struct | Node(SC(i,_)) -> Text.equal i id | _ -> false - + + let tagged_foll t tag = + if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_foll" + else match t with + | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_foll d n tag) } + | { doc=d; node=Node(SC (_,n)) } when is_nil n -> { t with node= Nil } + | { doc=d; node=Node(SC (_,n)) } -> + let nnode = + if tag_id d n == tag then n + else + let n' = tagged_desc d n tag in + if is_nil n' then tagged_foll d n tag + else n' + in {t with node= norm nnode} + | _ -> { t with node=Nil } + + + let tagged_desc t tag = + if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_desc" + else match t with + | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) } + | _ -> { t with node=Nil } + + + let tagged_next t tag = + if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_next" + else match t with + | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_next d n tag) } + | { doc=d; node=Node(SC (_,n)) } -> { t with node = norm (tagged_next d n tag) } + | _ -> { t with node=Nil } + + let subtree_tags t tag = + match t with + { doc = d; node = Node(NC n) } -> subtree_tags d n tag + | _ -> 0 + + + + let has_tagged_foll t tag = is_node (tagged_foll t tag) + let has_tagged_desc t tag = is_node (tagged_desc t tag) + let contains t s = Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s) diff --git a/tree.mli b/tree.mli index 43e8ff7..a2ad7f3 100644 --- a/tree.mli +++ b/tree.mli @@ -37,6 +37,12 @@ sig val contains_old : t -> string -> bool val dump : t -> unit val get_string : t -> string_content -> string + val has_tagged_desc : t -> Tag.t -> bool + val has_tagged_foll : t -> Tag.t -> bool + val tagged_desc : t -> Tag.t -> t + val tagged_foll : t -> Tag.t -> t + val tagged_next : t -> Tag.t -> t + val subtree_tags : t -> Tag.t -> int end module Binary : BINARY diff --git a/xPath.ml b/xPath.ml index ecfd4bd..2c520a1 100644 --- a/xPath.ml +++ b/xPath.ml @@ -243,7 +243,7 @@ let _l = function (`Left|`Last) -> `Left open Ata.Transitions -let add_trans num htr ((q,_,_,_) as tr) = +let add_trans num htr ((q,_,_,_,_) as tr) = try let (i,ltr) = Hashtbl.find htr q in if List.exists (Ata.equal_trans tr) ltr @@ -255,7 +255,7 @@ let add_trans num htr ((q,_,_,_) as tr) = exception Exit of Ata.state * Ata.transition list let rec replace s f = match f.Ata.pos with - | Ata.Atom(_,b,q,_) when q = s -> if b then Ata.true_ else Ata.false_ + | Ata.Atom(_,b,q) when q = s -> if b then Ata.true_ else Ata.false_ | Ata.Or(f1,f2) -> (replace s f1) +| (replace s f2) | Ata.And(f1,f2) -> (replace s f1) *& (replace s f2) | _ -> f @@ -264,11 +264,14 @@ let rec replace s f = let or_self conf old_dst q_src q_dst dir test pred mark = try let (num,l) = Hashtbl.find conf.tr q_src in - let l2 = List.fold_left (fun acc (q,t,m,f) -> - (q,TagSet.cap t test,mark, + let l2 = List.fold_left (fun acc (q,t,m,f,_) -> + (q, + TagSet.cap t test, + mark, (if mark then replace old_dst f else f) *& pred *& - (if mark then Ata.true_ else (_l dir) ** q_dst))::acc) + (if mark then Ata.true_ else (_l dir) ** q_dst), + `True)::acc) l l in Hashtbl.replace conf.tr q_src (num,l2) with Not_found -> () @@ -308,25 +311,53 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path step num = | Child | FollowingSibling | Descendant | DescendantOrSelf -> let axis = if axis = DescendantOrSelf - then begin - or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); - Descendant end + then + begin + or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); + Descendant + end else axis in - let t1 = ?< q_src><(test, is_last && not(existential))>=> + let t1 = ?< q_src><(test, is_last && not(ex))>=> p_f *& (if is_last then Ata.true_ else (_l dir) ** q_dst) in - let t2 = ?< q_src><(TagSet.star, false)>=> - (if axis=Descendant then `Left ** q_src +|`Right ** q_src - else `Right ** q_src) in - let tsa = ?< q_src><(att_or_str, false)>=> `Right ** q_src + + let _ = add_trans num conf.tr t1 in + + + let _ = if axis=Descendant then + add_trans num conf.tr_aux ( + ?< q_src><@ ((if ex then TagSet.diff TagSet.star test + else TagSet.star),false, + if TagSet.is_finite test + then `Left(fun t -> + if (Tree.Binary.is_node t) + then + let mytag = Tree.Binary.tag t in + TagSet.exists (fun tag -> + tag == mytag || + Tree.Binary.has_tagged_desc t tag + ) + test + else true + ) + + else `True )>=> `Left ** q_src ) + in + let t3 = + ?< q_src><@ ((if ex then TagSet.diff TagSet.any test + else TagSet.any), false, + if axis=Descendant&&TagSet.is_finite test + then `True (*`Right(fun t -> + TagSet.exists (fun tag -> Tree.Binary.has_tagged_foll t tag) + test) *) + else `True )>=> `Right ** q_src + in + let _ = add_trans num conf.tr_aux t3 in - add_trans num conf.tr t1; - add_trans num conf.tr_aux t2; - add_trans num conf.tr_aux tsa; [q_dst], q_dst, (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path) - + | Attribute -> let q_dstreal = Ata.mk_state() in (* attributes are always the first child *) @@ -482,31 +513,37 @@ let compile path = (`Left** q0) *& (if config.has_backward then `Left ** config.st_from_root else Ata.true_) in add_trans num config.tr fst_tr; - if config.has_backward then begin + if config.has_backward then begin add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.star,false) >=> `Left ** config.st_from_root +| `Right ** config.st_from_root); add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.cup TagSet.pcdata TagSet.attribute,false) >=> - `Right ** config.st_from_root); + `Right ** config.st_from_root); - end; + end; let phi = Hashtbl.create 37 in - let fadd = fun _ (_,l) -> List.iter (fun (s,t,m,f) -> Hashtbl.add phi (t,s) (m,f)) l in + let fadd = fun _ (_,l) -> List.iter (fun (s,t,m,f,p) -> + let lt = try + Hashtbl.find phi s + with Not_found -> [] + in + Hashtbl.replace phi s ((t,(m,f,p))::lt) + ) l in Hashtbl.iter (fadd) config.tr; Hashtbl.iter (fadd) config.tr_aux; Hashtbl.iter (fadd) config.tr_parent_loop; let final = - let s = Ptset.union anc_st (Ptset.from_list [a_dst;config.st_univ]) - in if has_backward then s else Ptset.add config.st_from_root s + let s = Ptset.union anc_st (Ptset.from_list []) + in if has_backward then Ptset.add config.st_from_root s else s in { Ata.id = Oo.id (object end); - Ata.states = a_st; + Ata.states = if has_backward then Ptset.add config.st_from_root a_st else a_st; Ata.init = Ptset.singleton config.st_root; Ata.final = Ptset.union anc_st config.final_state; Ata.universal = Ptset.union anc_st config.final_state; Ata.phi = phi; Ata.delta = Hashtbl.create 17; - Ata.properties = Hashtbl.create 0; + Ata.sigma = Ata.HTagSet.create 17; }