From 09870a49122b3d7048422818dbb0a038513b4d14 Mon Sep 17 00:00:00 2001 From: kim Date: Sun, 26 Apr 2009 09:53:41 +0000 Subject: [PATCH] Half way through refactoring git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@353 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- Makefile | 4 +- ata.ml | 999 +++++++++++++++++++++++-------------------------- ata.mli | 135 ++++--- debug.ml | 14 +- depend | 37 +- main.ml | 14 +- ptset.ml | 442 +++------------------- ptset.mli | 108 +++--- tag.ml | 5 +- tagSet.ml | 4 +- tagSet.mli | 2 +- tests/base.xml | 1 - tests/test.xml | 7 +- tree.ml | 102 ++--- tree.mli | 17 +- utils.ml | 26 +- xPath.ml | 201 ++++------ xPath.mli | 2 +- 18 files changed, 811 insertions(+), 1309 deletions(-) diff --git a/Makefile b/Makefile index 956e51d..5c2a200 100644 --- a/Makefile +++ b/Makefile @@ -3,8 +3,8 @@ DEBUG=false PROFILE=true VERBOSE=false -BASESRC=custom.ml ptset.ml finiteCofinite.ml tag.ml tagSet.ml options.ml tree.ml ata.ml -BASEMLI=sigs.mli ptset.mli finiteCofinite.mli tag.mli tagSet.mli options.mli tree.mli ata.mli +BASESRC=custom.ml memoizer.ml hcons.ml ptset.ml finiteCofinite.ml tag.ml tagSet.ml options.ml tree.ml ata.ml +BASEMLI=sigs.mli memoizer.mli hcons.mli ptset.mli finiteCofinite.mli tag.mli tagSet.mli options.mli tree.mli ata.mli MLSRCS = memory.ml $(BASESRC) ulexer.ml xPath.ml main.ml MLISRCS = memory.mli $(BASEMLI) ulexer.mli xPath.mli BASEOBJS= $(BASESRC:.ml=.cmx) diff --git a/ata.ml b/ata.ml index bb38863..6cdd9ef 100644 --- a/ata.ml +++ b/ata.ml @@ -7,423 +7,375 @@ let miss_trans = ref 0 let cpt_eval = ref 0 let miss_eval = ref 0 -let gen_id = - let id = ref (-1) in - fun () -> incr id;!id - -let h_union = Hashtbl.create 4097 - -let pt_cup s1 s2 = - (* special case, since this is a union we want hash(s1,s2) = hash(s2,s1) *) - let x = Ptset.hash s1 - and y = Ptset.hash s2 in - let h = if x < y then HASHINT2(x,y) else HASHINT2(y,x) in - try - Hashtbl.find h_union h - with - | Not_found -> let s = Ptset.union s1 s2 - in - Hashtbl.add h_union h s;s - -module State = struct +(* Todo : move elsewhere *) +external vb : bool -> int = "%identity" +module State : +sig + include Sigs.T with type t = int + val make : unit -> t +end = +struct type t = int - let mk = gen_id + let make = + let id = ref (-1) in + fun () -> incr id;!id + let compare = (-) + let equal = (==) + external hash : t -> int = "%identity" + let print fmt x = Format.fprintf fmt "%i" x + let dump fmt x = print fmt x + let check x = + if x < 0 then failwith (Printf.sprintf "State: Assertion %i < 0 failed" x) +end +module StateSet = struct + include Ptset.Int + let print ppf s = + Format.pp_print_string ppf "{ "; + iter (fun i -> Format.fprintf ppf "%i " i) s; + Format.pp_print_string ppf "}"; + Format.pp_print_flush ppf () end -let mk_state = State.mk + +module Formula = +struct + type 'hcons expr = + | False | True + | Or of 'hcons * 'hcons + | And of 'hcons * 'hcons + | Atom of ([ `Left | `Right | `LLeft | `RRight ]*bool*State.t) + type 'hcons node = { + pos : 'hcons expr; + mutable neg : 'hcons; + st : (StateSet.t*StateSet.t*StateSet.t)*(StateSet.t*StateSet.t*StateSet.t); + size: int; (* Todo check if this is needed *) + } + + external hash_const_variant : [> ] -> int = "%identity" + module rec HNode : Hcons.S with type data = Node.t = Hcons.Make (Node) + and Node : Hashtbl.HashedType with type t = HNode.t node = + struct + type t = HNode.t node + let equal x y = x.size == y.size && + match x.pos,y.pos with + | False,False + | True,True -> true + | Or(xf1,xf2),Or(yf1,yf2) + | And(xf1,xf2),And(yf1,yf2) -> (HNode.equal xf1 yf1) && (HNode.equal xf2 yf2) + | Atom(d1,p1,s1), Atom(d2,p2,s2) -> d1 == d2 && (p1==p2) && s1 == s2 + | _ -> false + let hash f = + match f.pos with + | False -> 0 + | True -> 1 + | Or (f1,f2) -> HASHINT3(PRIME2,HNode.hash f1,HNode.hash f2) + | And (f1,f2) -> HASHINT3(PRIME3,HNode.hash f1,HNode.hash f2) + | Atom(d,p,s) -> HASHINT4(PRIME4,hash_const_variant d,vb p,s) + end -type state = State.t + type t = HNode.t + let hash = HNode.hash + let uid = HNode.uid + let equal = HNode.equal + let expr f = (HNode.node f).pos + let st f = (HNode.node f ).st + let size f = (HNode.node f).size + + let prio f = + match expr f with + | True | False -> 10 + | Atom _ -> 8 + | And _ -> 6 + | Or _ -> 1 + + let rec print ?(parent=false) ppf f = + if parent then Format.fprintf ppf "("; + let _ = match expr f with + | True -> Format.fprintf ppf "T" + | False -> Format.fprintf ppf "F" + | And(f1,f2) -> + print ~parent:(prio f > prio f1) ppf f1; + Format.fprintf ppf " ∧ "; + print ~parent:(prio f > prio f2) ppf f2; + | Or(f1,f2) -> + (print ppf f1); + Format.fprintf ppf " ∨ "; + (print ppf f2); + | Atom(dir,b,s) -> Format.fprintf ppf "%s%s[%i]" + (if b then "" else "¬") + (match dir with + | `Left -> "↓₁" + | `Right -> "↓₂" + | `LLeft -> "⇓₁" + | `RRight -> "⇓₂") s + in + if parent then Format.fprintf ppf ")" + + let print ppf f = print ~parent:false ppf f + + let is_true f = (expr f) == True + let is_false f = (expr f) == False + + + let cons pos neg s1 s2 size1 size2 = + let nnode = HNode.make { pos = neg; neg = (Obj.magic 0); st = s2; size = size2 } in + let pnode = HNode.make { pos = pos; neg = nnode ; st = s1; size = size1 } + in + (HNode.node nnode).neg <- pnode; (* works because the neg field isn't taken into + account for hashing ! *) + pnode,nnode + + let empty_triple = StateSet.empty,StateSet.empty,StateSet.empty + let empty_hex = empty_triple,empty_triple + let true_,false_ = cons True False empty_hex empty_hex 0 0 + let atom_ d p s = + let si = StateSet.singleton s in + let ss = match d with + | `Left -> (si,StateSet.empty,si),empty_triple + | `Right -> empty_triple,(si,StateSet.empty,si) + | `LLeft -> (StateSet.empty,si,si),empty_triple + | `RRight -> empty_triple,(StateSet.empty,si,si) + in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1) + + let not_ f = (HNode.node f).neg + let union_hex ((l1,ll1,lll1),(r1,rr1,rrr1)) ((l2,ll2,lll2),(r2,rr2,rrr2)) = + (StateSet.mem_union l1 l2 ,StateSet.mem_union ll1 ll2,StateSet.mem_union lll1 lll2), + (StateSet.mem_union r1 r2 ,StateSet.mem_union rr1 rr2,StateSet.mem_union rrr1 rrr2) + + let merge_states f1 f2 = + let sp = + union_hex (st f1) (st f2) + and sn = + union_hex (st (not_ f1)) (st (not_ f2)) + in + sp,sn + let order f1 f2 = if uid f1 < uid f2 then f2,f1 else f1,f2 - -type formula_expr = - | False | True - | Or of formula * formula - | And of formula * formula - | Atom of ([ `Left | `Right | `LLeft | `RRight ]*bool*state) -and formula = { fid: int; - fkey : int; - pos : formula_expr; - neg : formula; - st : (Ptset.t*Ptset.t*Ptset.t)*(Ptset.t*Ptset.t*Ptset.t); - size: int; - } - -external hash_const_variant : [> ] -> int = "%identity" -external vb : bool -> int = "%identity" + let or_ f1 f2 = + (* Tautologies: x|x, x|not(x) *) -let hash_node_form t = match t with - | False -> 0 - | True -> 1 - | And(f1,f2) -> (2+17*f1.fkey + 37*f2.fkey) (*land max_int *) - | Or(f1,f2) -> (3+101*f1.fkey + 253*f2.fkey) (*land max_int *) - | Atom(v,b,s) -> HASHINT3(hash_const_variant v,(3846*(vb b) +257),s) + if equal f1 f2 then f1 else + if equal f1 (not_ f2) then true_ else - + (* simplification *) + if is_true f1 || is_true f2 then true_ else + if is_false f1 && is_false f2 then false_ else + if is_false f1 then f2 else + if is_false f2 then f1 else -module FormNode = -struct - type t = formula + (* commutativity of | *) - let hash t = t.fkey - let equal f1 f2 = - if f1.fid == f2.fid || f1.fkey == f2.fkey || f1.pos == f2.pos then true - else - match f1.pos,f2.pos with - | False,False | True,True -> true - | Atom(d1,b1,s1), Atom(d2,b2,s2) when (b1==b2) && (s1==s2) && (d1 = d2) -> true - | Or(g1,g2),Or(h1,h2) - | And(g1,g2),And(h1,h2) -> g1.fid == h1.fid && g2.fid == h2.fid - | _ -> false + let f1,f2 = order f1 f2 in + let psize = (size f1) + (size f2) in + let nsize = (size (not_ f1)) + (size (not_ f2)) in + let sp,sn = merge_states f1 f2 in + fst (cons (Or(f1,f2)) (And(not_ f1,not_ f2)) sp sn psize nsize) + + + let and_ f1 f2 = -end -module WH = Weak.Make(FormNode) - -let f_pool = WH.create 107 - -let empty_triple = Ptset.empty,Ptset.empty,Ptset.empty -let empty_hex = empty_triple,empty_triple - -let true_,false_ = - let rec t = { fid = 1; pos = True; fkey=1; neg = f ; st = empty_hex; size =1; } - and f = { fid = 0; pos = False; fkey=0; neg = t; st = empty_hex; size = 1; } - in - WH.add f_pool f; - WH.add f_pool t; - t,f - -let is_true f = f.fid == 1 -let is_false f = f.fid == 0 - - -let cons pos neg s1 s2 size1 size2 = - let rec pnode = - { fid = gen_id (); - fkey = hash_node_form pos; - pos = pos; - neg = nnode; - st = s1; - size = size1;} - and nnode = { - fid = gen_id (); - pos = neg; - fkey = hash_node_form neg; - neg = pnode; - st = s2; - size = size2; - } - in - (WH.merge f_pool pnode),(WH.merge f_pool nnode) - -let atom_ d p s = - let si = Ptset.singleton s in - let ss = match d with - | `Left -> (si,Ptset.empty,si),empty_triple - | `Right -> empty_triple,(si,Ptset.empty,si) - | `LLeft -> (Ptset.empty,si,si),empty_triple - | `RRight -> empty_triple,(Ptset.empty,si,si) - in fst (cons (Atom(d,p,s)) (Atom(d,not p,s)) ss ss 1 1) - -let union_hex ((l1,ll1,lll1),(r1,rr1,rrr1)) ((l2,ll2,lll2),(r2,rr2,rrr2)) = - (pt_cup l1 l2 ,pt_cup ll1 ll2,pt_cup lll1 lll2), - (pt_cup r1 r2 ,pt_cup rr1 rr2,pt_cup rrr1 rrr2) - -let merge_states f1 f2 = - let sp = - union_hex f1.st f2.st - and sn = - union_hex f1.neg.st f2.neg.st - in - sp,sn - -let full_or_ f1 f2 = - let f1,f2 = if f1.fid < f2.fid then f2,f1 else f1,f2 in - let sp,sn = merge_states f1 f2 in - let psize = f1.size + f2.size in - let nsize = f1.neg.size + f2.neg.size in - fst (cons (Or(f1,f2)) (And(f1.neg,f2.neg)) sp sn psize nsize ) - -let or_ f1 f2 = - let f1,f2 = if f1.fid < f2.fid then f2,f1 else f1,f2 in - if is_true f1 || is_true f2 then true_ - else if is_false f1 && is_false f2 then false_ - else if is_false f1 then f2 - else if is_false f2 then f1 - else - let psize = f1.size + f2.size in - let nsize = f1.neg.size + f2.neg.size in - let sp,sn = merge_states f1 f2 in - fst (cons (Or(f1,f2)) (And(f1.neg,f2.neg)) sp sn psize nsize) - - - -let and_ f1 f2 = - let f1,f2 = if f1.fid < f2.fid then f2,f1 else f1,f2 in - if is_true f1 && is_true f2 then true_ - else if is_false f1 || is_false f2 then false_ - else if is_true f1 then f2 - else if is_true f2 then f1 - else - let psize = f1.size + f2.size in - let nsize = f1.neg.size + f2.neg.size in - let sp,sn = merge_states f1 f2 in - fst (cons (And(f1,f2)) (Or(f1.neg,f2.neg)) sp sn psize nsize) - + (* Tautologies: x&x, x¬(x) *) -let not_ f = f.neg + if equal f1 f2 then f1 else + if equal f1 (not_ f2) then false_ else -let k_hash (s,t) = HASHINT2(Ptset.hash s,Tag.hash t) + (* simplifications *) -module HTagSetKey = -struct - type t = Ptset.t*Tag.t - let equal (s1,s2) (t1,t2) = (s2 == t2) && Ptset.equal s1 t1 - let hash = k_hash + if is_true f1 && is_true f2 then true_ else + if is_false f1 || is_false f2 then false_ else + if is_true f1 then f2 else + if is_true f2 then f1 else + + (* commutativity of & *) + + let f1,f2 = order f1 f2 in + let psize = (size f1) + (size f2) in + let nsize = (size (not_ f1)) + (size (not_ f2)) in + let sp,sn = merge_states f1 f2 in + fst (cons (And(f1,f2)) (Or(not_ f1,not_ f2)) sp sn psize nsize) + module Infix = struct + let ( +| ) f1 f2 = or_ f1 f2 + let ( *& ) f1 f2 = and_ f1 f2 + let ( *+ ) d s = atom_ d true s + let ( *- ) d s = atom_ d false s + end end + +module Transition = struct + + type node = State.t*bool*Formula.t*bool + include Hcons.Make(struct + type t = node + let hash (s,m,f,b) = HASHINT4(s,Formula.uid f,vb m,vb b) + let equal (s,b,f,m) (s',b',f',m') = + s == s' && b==b' && m==m' && Formula.equal f f' + end) + + let print ppf f = let (st,mark,form,_) = node f in + Format.fprintf ppf "%i %s" st (if mark then "⇒" else "→"); + Formula.print ppf form; + Format.pp_print_flush ppf () + module Infix = struct + let ( ?< ) x = x + let ( >< ) state (l,mark) = state,(l,mark,true) + let ( ><@ ) state (l,mark) = state,(l,mark,false) + let ( >=> ) (state,(label,mark,bur)) form = (state,label,(make (state,mark,form,bur))) + end -module HTagSet = Hashtbl.Make(HTagSetKey) +end -type skiplist = Nothing | All - | Zero of skiplist - | One of skiplist | Two of skiplist | Three of skiplist - | Four of skiplist | Five of skiplist | Six of skiplist - | Seven of skiplist | Eight of skiplist | Nine of skiplist +module SetTagKey = +struct + type t = Ptset.Int.t*Tag.t + let equal (s1,t1) (s2,t2) = (t1 == t2) && Ptset.Int.equal s1 s2 + let hash (s,t) = HASHINT2(Ptset.Int.hash s,Tag.hash t) +end +module TransTable = Hashtbl +module CachedTransTable = Hashtbl.Make(SetTagKey) -type formlist = Nil | Cons of state*formula*int*bool*formlist +module Formlist = struct + include Ptset.Make(Transition) + let print ppf fl = + iter (fun t -> Transition.print ppf t; Format.pp_print_newline ppf ()) fl +end + type 'a t = { id : int; - mutable states : Ptset.t; - init : Ptset.t; - mutable final : Ptset.t; - universal : Ptset.t; - starstate : Ptset.t option; + mutable states : Ptset.Int.t; + init : Ptset.Int.t; + starstate : Ptset.Int.t option; (* Transitions of the Alternating automaton *) - phi : (state,(TagSet.t*(bool*formula*bool)) list) Hashtbl.t; - sigma : (int,('a t -> Tree.t -> Tree.t -> Ptset.t*'a)) Hashtbl.t; -} - - module Pair (X : Set.OrderedType) (Y : Set.OrderedType) = - struct - type t = X.t*Y.t - let compare (x1,y1) (x2,y2) = - let r = X.compare x1 x2 in - if r == 0 then Y.compare y1 y2 - else r - end + trans : (State.t,(TagSet.t*Transition.t) list) Hashtbl.t; + query_string: string; + } - module PL = Set.Make (Pair (Ptset) (Ptset)) - - - let pr_st ppf l = Format.fprintf ppf "{"; - begin - match l with - | [] -> () - | [s] -> Format.fprintf ppf " %i" s - | p::r -> Format.fprintf ppf " %i" p; - List.iter (fun i -> Format.fprintf ppf "; %i" i) r - end; - Format.fprintf ppf " }" - let rec pr_frm ppf f = match f.pos with - | True -> Format.fprintf ppf "⊤" - | False -> Format.fprintf ppf "⊥" - | And(f1,f2) -> - Format.fprintf ppf "("; - (pr_frm ppf f1); - Format.fprintf ppf ") ∧ ("; - (pr_frm ppf f2); - Format.fprintf ppf ")" - | Or(f1,f2) -> - (pr_frm ppf f1); - Format.fprintf ppf " ∨ "; - (pr_frm ppf f2); - | Atom(dir,b,s) -> Format.fprintf ppf "%s%s[%i]" - (if b then "" else "¬") - (match dir with - | `Left -> "↓₁" - | `Right -> "↓₂" - | `LLeft -> "⇓₁" - | `RRight -> "⇓₂") s - - let dump ppf a = - Format.fprintf ppf "Automaton (%i) :\n" a.id; - Format.fprintf ppf "States : "; pr_st ppf (Ptset.elements a.states); - Format.fprintf ppf "\nInitial states : "; pr_st ppf (Ptset.elements a.init); - Format.fprintf ppf "\nFinal states : "; pr_st ppf (Ptset.elements a.final); - Format.fprintf ppf "\nUniversal states : "; pr_st ppf (Ptset.elements a.universal); - Format.fprintf ppf "\nAlternating transitions :\n------------------------------\n"; - let l = Hashtbl.fold (fun k t acc -> - (List.map (fun (t,(m,f,p)) -> (t,k),(m,f,p)) t)@ acc) a.phi [] in - let l = List.sort (fun ((tsx,x),_) ((tsy,y),_) -> if x-y == 0 then TagSet.compare tsx tsy else x-y) l in - List.iter (fun ((ts,q),(b,f,_)) -> - - let s = - if TagSet.is_finite ts - then "{" ^ (TagSet.fold (fun t a -> a ^ " '" ^ (Tag.to_string t)^"'") ts "") ^" }" - else let cts = TagSet.neg ts in - if TagSet.is_empty cts then "*" else - (TagSet.fold (fun t a -> a ^ " " ^ (Tag.to_string t)) cts "*\\{" - )^ "}" - in - Format.fprintf ppf "(%s,%i) %s " s q (if b then "=>" else "->"); - pr_frm ppf f; - Format.fprintf ppf "\n")l; - - Format.fprintf ppf "NFA transitions :\n------------------------------\n"; -(* HTagSet.iter (fun (qs,t) (disp,b,_,flist,_,_) -> - let (ls,lls,_),(rs,rrs,_) = - List.fold_left (fun ((a1,b1,c1),(a2,b2,c2)) (_,f) -> - let (x1,y1,z1),(x2,y2,z2) = f.st in - ((Ptset.union x1 a1),(Ptset.union y1 b1),(Ptset.union c1 z1)), - ((Ptset.union x2 a2),(Ptset.union y2 b2),(Ptset.union c2 z2))) - ((Ptset.empty,Ptset.empty,Ptset.empty), - (Ptset.empty,Ptset.empty,Ptset.empty)) - flist - in - pr_st ppf (Ptset.elements qs); - Format.fprintf ppf ",%s %s " (Tag.to_string t) (if b then "=>" else "->"); - List.iter (fun (q,f) -> - Format.fprintf ppf "\n%i," q; - pr_frm ppf f) flist; - Format.fprintf ppf "\nleft="; - pr_st ppf (Ptset.elements ls); - Format.fprintf ppf " , "; - pr_st ppf (Ptset.elements lls); - Format.fprintf ppf ", right="; - pr_st ppf (Ptset.elements rs); - Format.fprintf ppf ", "; - pr_st ppf (Ptset.elements rrs); - Format.fprintf ppf ", first=%s, next=%s\n\n" disp.flabel disp.nlabel; - ) a.sigma; *) - Format.fprintf ppf "=======================================\n%!" + +let dump ppf a = + Format.fprintf ppf "Automaton (%i) :\n" a.id; + Format.fprintf ppf "States : "; StateSet.print ppf a.states; + Format.fprintf ppf "\nInitial states : "; StateSet.print ppf a.init; + Format.fprintf ppf "\nAlternating transitions :\n"; + let l = Hashtbl.fold (fun k t acc -> + (List.map (fun (ts,tr) -> (ts,k),Transition.node tr) t) @ acc) a.trans [] in + let l = List.sort (fun ((tsx,x),_) ((tsy,y),_) -> + if y-x == 0 then TagSet.compare tsy tsx else y-x) l in + let maxh,maxt,l_print = + List.fold_left ( + fun (maxh,maxt,l) ((ts,q),(_,b,f,_)) -> + let s = + if TagSet.is_finite ts + then "{" ^ (TagSet.fold (fun t a -> a ^ " '" ^ (Tag.to_string t)^"'") ts "") ^" }" + else let cts = TagSet.neg ts in + if TagSet.is_empty cts then "*" else + (TagSet.fold (fun t a -> a ^ " " ^ (Tag.to_string t)) cts "*\\{" + )^ "}" + in + let s = Printf.sprintf "(%s,%i)" s q in + let s_frm = + Formula.print Format.str_formatter f; + Format.flush_str_formatter() + in + (max (String.length s) maxh, max (String.length s_frm) maxt, + (s,(if b then "⇒" else "→"),s_frm)::l)) (0,0,[]) l + in + Format.fprintf ppf "%s\n%!" (String.make (maxt+maxh+3) '_'); + List.iter (fun (s,m,f) -> let s = s ^ (String.make (maxh-(String.length s)) ' ') in + Format.fprintf ppf "%s %s %s\n" s m f) l_print; + Format.fprintf ppf "%s\n%!" (String.make (maxt+maxh+3) '_') - module Transitions = struct - type t = state*TagSet.t*bool*formula*bool - let ( ?< ) x = x - let ( >< ) state (l,b) = state,(l,b,false) - let ( ><@ ) state (l,b) = state,(l,b,true) - let ( >=> ) (state,(label,mark,pred)) form = (state,label,mark,form,pred) - let ( +| ) f1 f2 = or_ f1 f2 - let ( *& ) f1 f2 = and_ f1 f2 - let ( ** ) d s = atom_ d true s - - - end - type transition = Transitions.t - let equal_trans (q1,t1,m1,f1,_) (q2,t2,m2,f2,_) = - (q1 == q2) && (TagSet.equal t1 t2) && (m1 == m2) (*&& (equal_form f1 f2) *) +module MemoForm = Memoizer.Make( + Hashtbl.Make(struct + type t = Formula.t*(StateSet.t*StateSet.t) + let equal (f1,(s1,t1)) (f2,(s2,t2)) = + Formula.equal f1 f2 && StateSet.equal s1 s2 && StateSet.equal t1 t2 + let hash (f,(s,t)) = + HASHINT3(Formula.uid f ,StateSet.uid s,StateSet.uid t) + end)) - - module HFEval = Hashtbl.Make( - struct - type t = int*Ptset.t*Ptset.t - let equal (a,b,c) (d,e,f) = - a==d && (Ptset.equal b e) && (Ptset.equal c f) - let hash (a,b,c) = - HASHINT3(a,Ptset.hash b,Ptset.hash c) - end) - - - - - let hfeval = HFEval.create 4097 - let eval_form_bool f s1 s2 = - let rec eval f = match f.pos with - (* test some inlining *) - | True -> true,true,true - | False -> false,false,false - | _ -> - try - HFEval.find hfeval (f.fid,s1,s2) - with - | Not_found -> let r = - match f.pos with - | Atom((`Left|`LLeft),b,q) -> - if b == (Ptset.mem q s1) - then (true,true,false) - else false,false,false - | Atom(_,b,q) -> - if b == (Ptset.mem q s2) - then (true,false,true) - else false,false,false - | Or(f1,f2) -> - let b1,rl1,rr1 = eval f1 - in - if b1 && rl1 && rr1 then (true,true,true) - else - let b2,rl2,rr2 = eval f2 - in - let rl1,rr1 = if b1 then rl1,rr1 else false,false - and rl2,rr2 = if b2 then rl2,rr2 else false,false - in (b1 || b2, rl1||rl2,rr1||rr2) - | And(f1,f2) -> - let b1,rl1,rr1 = eval f1 in - if b1 && rl1 && rr1 then (true,true,true) - else if b1 - then let b2,rl2,rr2 = eval f2 in - if b2 then (true,rl1||rl2,rr1||rr2) - else (false,false,false) - else (false,false,false) - | _ -> assert false +module F = Formula + + let eval_form_bool f s1 s2 = + let sets = (s1,s2) in + let eval = MemoForm.make_rec( + fun eval (f,_) -> + match F.expr f with + | F.True -> true,true,true + | F.False -> false,false,false + | F.Atom((`Left|`LLeft),b,q) -> + if b == (StateSet.mem q s1) + then (true,true,false) + else false,false,false + | F.Atom(_,b,q) -> + if b == (StateSet.mem q s2) + then (true,false,true) + else false,false,false + | F.Or(f1,f2) -> + let b1,rl1,rr1 = eval (f1,sets) in - HFEval.add hfeval (f.fid,s1,s2) r; - r - in eval f - - - let form_list_fold_left f acc fl = - let rec loop acc fl = - match fl with - | Nil -> acc - | Cons(s,frm,h,m,fll) -> loop (f acc s frm h m) fll + if b1 && rl1 && rr1 then (true,true,true) else + let b2,rl2,rr2 = eval (f2,sets) in + let rl1,rr1 = if b1 then rl1,rr1 else false,false + and rl2,rr2 = if b2 then rl2,rr2 else false,false + in (b1 || b2, rl1||rl2,rr1||rr2) + + | F.And(f1,f2) -> + let b1,rl1,rr1 = eval (f1,sets) in + if b1 && rl1 && rr1 then (true,true,true) else + if b1 then + let b2,rl2,rr2 = eval (f2,sets) in + if b2 then (true,rl1||rl2,rr1||rr2) else (false,false,false) + else (false,false,false) + ) in - loop acc fl - - let h_formlist = Hashtbl.create 4096 - let rec eval_formlist ?(memo=true) s1 s2 fl = - match fl with - | Nil -> Ptset.empty,false,false,false,false - | Cons(q,f,h,mark,fll) -> - let k = (h,Ptset.hash s1,Ptset.hash s2,mark) - in - - try - if memo then Hashtbl.find h_formlist k - else (raise Not_found) - with - Not_found -> - let s,b',b1',b2',amark = eval_formlist (~memo:memo) s1 s2 fll in - let b,b1,b2 = eval_form_bool f s1 s2 in - let r = if b then (Ptset.add q s, b, b1'||b1,b2'||b2,mark||amark) - else s,b',b1',b2',amark - in(* - Format.fprintf Format.err_formatter "\nEvaluating formula (%i) %i %s" h q (if mark then "=>" else "->"); - pr_frm (Format.err_formatter) f; - Format.fprintf Format.err_formatter " in context "; - pr_st Format.err_formatter (Ptset.elements s1); - Format.fprintf Format.err_formatter ", "; - pr_st Format.err_formatter (Ptset.elements s2); - Format.fprintf Format.err_formatter " result is %b\n%!" b; *) - (Hashtbl.add h_formlist k r;r) - + eval (f,sets) + + + module MemoFormlist = Memoizer.Make( + Hashtbl.Make(struct + type t = Formlist.t*(StateSet.t*StateSet.t) + let equal (f1,(s1,t1)) (f2,(s2,t2)) = + Formlist.equal f1 f2 && StateSet.equal s1 s2 && StateSet.equal t1 t2 + let hash (f,(s,t)) = + HASHINT3(Formlist.uid f ,StateSet.uid s,StateSet.uid t) + end)) + + let eval_formlist ?(memo=true) s1 s2 fl = + let sets = (s1,s2) in + let eval = MemoFormlist.make_rec ( + fun eval (fl,_) -> + if Formlist.is_empty fl + then StateSet.empty,false,false,false,false + else + let f,fll = Formlist.uncons fl in + let q,mark,f,_ = Transition.node f in + let b,b1,b2 = eval_form_bool f s1 s2 in + let s,b',b1',b2',amark = eval (fll,sets) in + if b then (StateSet.add q s, b, b1'||b1,b2'||b2,mark||amark) + else s,b',b1',b2',amark ) + in eval (fl,sets) - let tags_of_state a q = Hashtbl.fold - (fun p l acc -> - if p == q then - List.fold_left - (fun acc (ts,(_,_,aux)) -> + let tags_of_state a q = + Hashtbl.fold + (fun p l acc -> + if p == q then List.fold_left + + (fun acc (ts,t) -> + let _,_,_,aux = Transition.node t in if aux then acc else - TagSet.cup ts acc) acc l - else acc) a.phi TagSet.empty - + TagSet.cup ts acc) acc l + + else acc) a.trans TagSet.empty + let tags a qs = - let ts = Ptset.fold (fun q acc -> TagSet.cup acc (tags_of_state a q)) qs TagSet.empty + let ts = Ptset.Int.fold (fun q acc -> TagSet.cup acc (tags_of_state a q)) qs TagSet.empty in if TagSet.is_finite ts then `Positive(TagSet.positive ts) @@ -431,8 +383,8 @@ type 'a t = { let inter_text a b = match b with - | `Positive s -> let r = Ptset.inter a s in (r,Ptset.mem Tag.pcdata r, true) - | `Negative s -> let r = Ptset.diff a s in (r, Ptset.mem Tag.pcdata r, false) + | `Positive s -> let r = Ptset.Int.inter a s in (r,Ptset.Int.mem Tag.pcdata r, true) + | `Negative s -> let r = Ptset.Int.diff a s in (r, Ptset.Int.mem Tag.pcdata r, false) let mk_nil_ctx x _ = Tree.mk_nil x let next_sibling_ctx x _ = Tree.next_sibling x @@ -510,36 +462,21 @@ type 'a t = { module Run (RS : ResultSet) = struct + + let fmt = Format.err_formatter let pr x = Format.fprintf fmt x - module Formlist = - struct - type t = formlist - let nil : t = Nil - let cons q f i m l = Cons(q,f,i,m,l) - let hash = function Nil -> 0 | Cons(_,_,i,_,_) -> max_int land i - let pr fmt l = - let rec loop = function - | Nil -> () - | Cons(q,f,_,m,l) -> - Format.fprintf fmt "%i %s" q (if m then "=>" else "->"); - pr_frm fmt f; - Format.fprintf fmt "\n%!"; - loop l - in - loop l - end - type ptset_list = Nil | Cons of Ptset.t*int*ptset_list + type ptset_list = Nil | Cons of Ptset.Int.t*int*ptset_list let hpl l = match l with | Nil -> 0 | Cons (_,i,_) -> i - let cons s l = Cons (s,(Ptset.hash s) + 65599 * (hpl l), l) + let cons s l = Cons (s,(Ptset.Int.hash s) + 65599 * (hpl l), l) let rec empty_size n = if n == 0 then Nil - else cons Ptset.empty (empty_size (n-1)) + else cons Ptset.Int.empty (empty_size (n-1)) let fold_pl f l acc = let rec loop l acc = match l with @@ -573,66 +510,61 @@ type 'a t = { in loop Nil l - let td_trans = Hashtbl.create 4096 + module IntSet = Set.Make(struct type t = int let compare = (-) end) + + +IFDEF DEBUG +THEN +INCLUDE "html_trace.ml" + +END + let td_trans = Hashtbl.create 4096 + let mk_fun f s = D_IGNORE_(register_funname f s,f) + let mk_app_fun f arg s = let g = f arg in + D_IGNORE_(register_funname g ((get_funname f) ^ " " ^ s), g) + + let string_of_ts tags = (Ptset.Int.fold (fun t a -> a ^ " " ^ (Tag.to_string t) ) tags "{")^ " }" let choose_jump tagset qtags1 qtagsn a f_nil f_text f_t1 f_s1 f_tn f_sn f_notext = let tags1,hastext1,fin1 = inter_text tagset (tags a qtags1) in let tagsn,hastextn,finn = inter_text tagset (tags a qtagsn) in -(* Format.fprintf Format.err_formatter "Tags below states "; - pr_st Format.err_formatter (Ptset.elements qtags1); - Format.fprintf Format.err_formatter " are { "; - Ptset.iter (fun t -> Format.fprintf Format.err_formatter "%s " (Tag.to_string t)) tags1; - Format.fprintf Format.err_formatter "}, %b,%b\n%!" hastext1 fin1; - - Format.fprintf Format.err_formatter "Tags below states "; - pr_st Format.err_formatter (Ptset.elements qtagsn); - Format.fprintf Format.err_formatter " are { "; - Ptset.iter (fun t -> Format.fprintf Format.err_formatter "%s " (Tag.to_string t)) tagsn; - Format.fprintf Format.err_formatter "}, %b,%b\n%!" hastextn finn; -*) if (hastext1||hastextn) then f_text (* jumping to text nodes doesn't work really well *) - else if (Ptset.is_empty tags1) && (Ptset.is_empty tagsn) then f_nil - else if (Ptset.is_empty tagsn) then - if (Ptset.is_singleton tags1) then f_t1 (Ptset.choose tags1) (* TaggedChild/Sibling *) - else f_s1 tags1 (* SelectChild/Sibling *) - else if (Ptset.is_empty tags1) then - if (Ptset.is_singleton tagsn) then f_tn (Ptset.choose tagsn) (* TaggedDesc/Following *) - else f_sn tagsn (* SelectDesc/Following *) + else if (Ptset.Int.is_empty tags1) && (Ptset.Int.is_empty tagsn) then f_nil + else if (Ptset.Int.is_empty tagsn) then + if (Ptset.Int.is_singleton tags1) + then (* TaggedChild/Sibling *) + let tag = (Ptset.Int.choose tags1) in mk_app_fun f_t1 tag (Tag.to_string tag) + else (* SelectChild/Sibling *) + mk_app_fun f_s1 tags1 (string_of_ts tags1) + else if (Ptset.Int.is_empty tags1) then + if (Ptset.Int.is_singleton tagsn) + then (* TaggedDesc/Following *) + let tag = (Ptset.Int.choose tagsn) in mk_app_fun f_tn tag (Tag.to_string tag) + else (* SelectDesc/Following *) + mk_app_fun f_sn tagsn (string_of_ts tagsn) else f_notext let choose_jump_down a b c d = choose_jump a b c d - (Tree.mk_nil) - (Tree.text_below) - (*fun x -> let i,j = Tree.doc_ids x in - let res = Tree.text_below x in - Printf.printf "Calling text_below %s (tag=%s), docids= (%i,%i), res=%s\n" - (Tree.dump_node x) (Tag.to_string (Tree.tag x)) i j (Tree.dump_node res); - res*) - (fun _ -> Tree.node_child ) (* !! no tagged_child in Tree.ml *) - (fun _ -> Tree.node_child ) (* !! no select_child in Tree.ml *) - (Tree.tagged_desc) - (fun _ -> Tree.node_child ) (* !! no select_desc *) - (Tree.node_child) + (mk_fun (Tree.mk_nil) "Tree.mk_nil") + (mk_fun (Tree.text_below) "Tree.text_below") + (mk_fun (fun _ -> Tree.node_child) "[TaggedChild]Tree.node_child") (* !! no tagged_child in Tree.ml *) + (mk_fun (fun _ -> Tree.node_child) "[SelectChild]Tree.node_child") (* !! no select_child in Tree.ml *) + (mk_fun (Tree.tagged_desc) "Tree.tagged_desc") + (mk_fun (fun _ -> Tree.node_child ) "[SelectDesc]Tree.node_child") (* !! no select_desc *) + (mk_fun (Tree.node_child) "Tree.node_child") let choose_jump_next a b c d = choose_jump a b c d - (fun t _ -> Tree.mk_nil t) - (Tree.text_next) - (*fun x y -> let i,j = Tree.doc_ids x in - let res = Tree.text_next x y in - Printf.printf "Calling text_next %s (tag=%s) ctx=%s, docids= (%i,%i), res=%s\n" - (Tree.dump_node x) (Tag.to_string (Tree.tag x)) (Tree.dump_node y) i j (Tree.dump_node res); - res*) - - (fun _ -> Tree.node_sibling_ctx) (* !! no tagged_sibling in Tree.ml *) - (fun _ -> Tree.node_sibling_ctx) (* !! no select_child in Tree.ml *) - (Tree.tagged_foll_below) - (fun _ -> Tree.node_sibling_ctx) (* !! no select_foll *) - (Tree.node_sibling_ctx) - - + (mk_fun (fun t _ -> Tree.mk_nil t) "Tree.mk_nil2") + (mk_fun (Tree.text_next) "Tree.text_next") + (mk_fun (fun _ -> Tree.node_sibling_ctx) "[TaggedSibling]Tree.node_sibling_ctx")(* !! no tagged_sibling in Tree.ml *) + (mk_fun (fun _ -> Tree.node_sibling_ctx) "[SelectSibling]Tree.node_sibling_ctx")(* !! no select_sibling in Tree.ml *) + (mk_fun (Tree.tagged_foll_below) "Tree.tagged_foll_below") + (mk_fun (fun _ -> Tree.node_sibling_ctx) "[SelectFoll]Tree.node_sibling_ctx")(* !! no select_foll *) + (mk_fun (Tree.node_sibling_ctx) "Tree.node_sibling_ctx") + let get_trans slist tag a t = try Hashtbl.find td_trans (tag,hpl slist) @@ -640,36 +572,34 @@ type 'a t = { | Not_found -> let fl_list,llist,rlist,ca,da,sa,fa = fold_pl - (fun set _ (fll_acc,lllacc,rllacc,ca,da,sa,fa) -> (* For each set *) + (fun set _ (fll_acc,lllacc,rllacc,ca,da,sa,fa) -> (* For each set *) let fl,ll,rr,ca,da,sa,fa = - Ptset.fold - (fun q acc -> - fst ( - List.fold_left - (fun (((fl_acc,ll_acc,rl_acc,c_acc,d_acc,s_acc,f_acc),h_acc) as acc) - (ts,(m,f,_)) -> - if (TagSet.mem tag ts) - then - let (child,desc,below),(sibl,foll,after) = f.st in - let h_acc = HASHINT3(h_acc,f.fid,HASHINT2(q,vb m)) in - ((Formlist.cons q f h_acc m fl_acc, - Ptset.union ll_acc below, - Ptset.union rl_acc after, - Ptset.union child c_acc, - Ptset.union desc d_acc, - Ptset.union sibl s_acc, - Ptset.union foll f_acc), - h_acc) - else acc ) (acc,0) ( - try Hashtbl.find a.phi q - with - Not_found -> Printf.eprintf "Looking for state %i, doesn't exist!!!\n%!" - q;[] - )) + StateSet.fold + (fun q acc -> + List.fold_left + (fun ((fl_acc,ll_acc,rl_acc,c_acc,d_acc,s_acc,f_acc) as acc) + (ts,t) -> + if (TagSet.mem tag ts) + then + let _,_,f,_ = Transition.node t in + let (child,desc,below),(sibl,foll,after) = Formula.st f in + (Formlist.add t fl_acc, + StateSet.union ll_acc below, + StateSet.union rl_acc after, + StateSet.union child c_acc, + StateSet.union desc d_acc, + StateSet.union sibl s_acc, + StateSet.union foll f_acc) + else acc ) acc ( + try Hashtbl.find a.trans q + with + Not_found -> Printf.eprintf "Looking for state %i, doesn't exist!!!\n%!" + q;[] + ) - ) set (Formlist.nil,Ptset.empty,Ptset.empty,ca,da,sa,fa) + ) set (Formlist.empty,StateSet.empty,StateSet.empty,ca,da,sa,fa) in fl::fll_acc, cons ll lllacc, cons rr rllacc,ca,da,sa,fa) - slist ([],Nil,Nil,Ptset.empty,Ptset.empty,Ptset.empty,Ptset.empty) + slist ([],Nil,Nil,StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty) in (* Logic to chose the first and next function *) let tags_below,tags_after = Tree.tags t tag in @@ -687,7 +617,7 @@ type 'a t = { if mark then RS.cons t (RS.concat res1 res2) else RS.concat res1 res2 else RS.empty - + let top_down ?(noright=false) a t slist ctx slot_size = let pempty = empty_size slot_size in let eval_fold2_slist fll sl1 sl2 res1 res2 t = @@ -695,14 +625,7 @@ type 'a t = { let rec fold l1 l2 fll i aq = match l1,l2,fll with | Cons(s1,_,ll1), Cons(s2, _ ,ll2),fl::fll -> let r',rb,rb1,rb2,mark = eval_formlist s1 s2 fl in -(* let _ = pr "Evaluation context : "; pr_st fmt (Ptset.elements s1); - pr_st fmt (Ptset.elements s2); - pr "Formlist (%i) : " (Formlist.hash fl); - Formlist.pr fmt fl; - pr "Results : "; pr_st fmt (Ptset.elements r'); - pr ", %b %b %b %b\n%!" rb rb1 rb2 mark - in *) - let _ = res.(i) <- merge rb rb1 rb2 mark t res1.(i) res2.(i) + let _ = res.(i) <- merge rb rb1 rb2 mark t res1.(i) res2.(i) in fold ll1 ll2 fll (i+1) (cons r' aq) | Nil, Nil,[] -> aq,res @@ -712,25 +635,17 @@ type 'a t = { in let null_result() = (pempty,Array.make slot_size RS.empty) in let rec loop t slist ctx = - let (a,b) = if Tree.is_nil t then null_result() else - let tag = Tree.tag t in + let tag = Tree.tag t in let fl_list,llist,rlist,first,next = get_trans slist tag a t in -(* let _ = pr "For tag %s,node %s, returning formulae list: \n%!" - (Tag.to_string tag) (Tree.dump_node t); - List.iter (fun f -> Formlist.pr fmt f;pr "\n%!") fl_list - in*) let sl1,res1 = loop (first t) llist t in let sl2,res2 = loop (next t ctx) rlist ctx in - eval_fold2_slist fl_list sl1 sl2 res1 res2 t - in -(* let _ = pr "Inside topdown call: tree was %s, tag = %s" (Tree.dump_node t) (if Tree.is_nil t then "###" - else Tag.to_string (Tree.tag t)); - iter_pl (fun s -> (pr_st fmt (Ptset.elements s))) a; - Array.iter (fun i -> pr "%i" (RS.length i)) b; - pr "\n%!"; in*) (a,b) - + let res = eval_fold2_slist fl_list sl1 sl2 res1 res2 t + in + D_IGNORE_( + register_trace t (slist,(fst res),sl1,sl2,fl_list,first,next,ctx), + res) in let loop_no_right t slist ctx = if Tree.is_nil t then null_result() @@ -739,20 +654,29 @@ type 'a t = { let fl_list,llist,rlist,first,next = get_trans slist tag a t in let sl1,res1 = loop (first t) llist t in let sl2,res2 = null_result() in - eval_fold2_slist fl_list sl1 sl2 res1 res2 t + let res = eval_fold2_slist fl_list sl1 sl2 res1 res2 t + in + D_IGNORE_( + register_trace t (slist,(fst res),sl1,sl2,fl_list,first,next,ctx), + res) in (if noright then loop_no_right else loop) t slist ctx + let run_top_down a t = let init = cons a.init Nil in let _,res = top_down a t init t 1 - in res.(0) + in + D_IGNORE_( + output_trace a t "trace.html" + (RS.fold (fun t a -> IntSet.add (Tree.id t) a) res.(0) IntSet.empty), + res.(0)) ;; module Configuration = struct - module Ptss = Set.Make(Ptset) - module IMap = Map.Make(Ptset) + module Ptss = Set.Make(StateSet) + module IMap = Map.Make(StateSet) type t = { hash : int; sets : Ptss.t; results : RS.t IMap.t } @@ -765,17 +689,17 @@ type 'a t = { if Ptss.mem s c.sets then { c with results = IMap.add s (RS.concat r (IMap.find s c.results)) c.results} else - { hash = HASHINT2(c.hash,Ptset.hash s); + { hash = HASHINT2(c.hash,Ptset.Int.hash s); sets = Ptss.add s c.sets; results = IMap.add s r c.results } let pr fmt c = Format.fprintf fmt "{"; - Ptss.iter (fun s -> pr_st fmt (Ptset.elements s); + Ptss.iter (fun s -> StateSet.print fmt s; Format.fprintf fmt " ") c.sets; Format.fprintf fmt "}\n%!"; IMap.iter (fun k d -> - pr_st fmt (Ptset.elements k); + StateSet.print fmt k; Format.fprintf fmt "-> %i\n" (RS.length d)) c.results; Format.fprintf fmt "\n%!" @@ -797,7 +721,7 @@ type 'a t = { in let h,s = Ptss.fold - (fun s (ah,ass) -> (HASHINT2(ah,Ptset.hash s), + (fun s (ah,ass) -> (HASHINT2(ah,Ptset.Int.hash s), Ptss.add s ass)) (Ptss.union c1.sets c2.sets) (0,Ptss.empty) in @@ -819,19 +743,19 @@ type 'a t = { Hashtbl.find h_fold (hs,Formlist.hash formlist,dir) with Not_found -> let res = - if dir then eval_formlist ~memo:false s Ptset.empty formlist - else eval_formlist ~memo:false Ptset.empty s formlist + if dir then eval_formlist ~memo:false s Ptset.Int.empty formlist + else eval_formlist ~memo:false Ptset.Int.empty s formlist in (Hashtbl.add h_fold (hs,Formlist.hash formlist,dir) res;res) in(* let _ = pr "Evaluating on set (%s) with tree %s=%s" (if dir then "left" else "right") (Tag.to_string (Tree.tag t)) (Tree.dump_node t) ; - pr_st fmt (Ptset.elements s); + StateSet.print fmt (Ptset.Int.elements s); pr ", formualae (with hash %i): \n" (Formlist.hash formlist); Formlist.pr fmt formlist; pr "result is "; - pr_st fmt (Ptset.elements r'); + StateSet.print fmt (Ptset.Int.elements r'); pr " %b %b %b %b \n%!" rb rb1 rb2 mark ; in *) if rb && ((dir&&rb1)|| ((not dir) && rb2)) @@ -857,22 +781,19 @@ type 'a t = { Hashtbl.find h_trans key with | Not_found -> - let f_list,_ = - Hashtbl.fold (fun q l acc -> - List.fold_left (fun (fl_acc,h_acc) (ts,(m,f,_)) -> - if TagSet.mem ptag ts - then - let h_acc = HASHINT3(h_acc,f.fid,HASHINT2(q,vb m)) in - (Formlist.cons q f h_acc m fl_acc, - h_acc) - else (fl_acc,h_acc)) - acc l) - a.phi (Formlist.nil,0) - in - let res = fold_pl (fun _ _ acc -> f_list::acc) slist [] - in - (Hashtbl.add h_trans key res;res) - + let f_list = + Hashtbl.fold (fun q l acc -> + List.fold_left (fun fl_acc (ts,t) -> + if TagSet.mem ptag ts then Formlist.add t fl_acc + else fl_acc) + + acc l) + a.trans Formlist.empty + in + let res = fold_pl (fun _ _ acc -> f_list::acc) slist [] + in + (Hashtbl.add h_trans key res;res) + let h_tdconf = Hashtbl.create 511 let rec bottom_up a tree conf next jump_fun root dotd init accu = @@ -933,7 +854,7 @@ type 'a t = { pr "accu is %i\n" (RS.length accu); in *) let accu,newconf = Configuration.IMap.fold (fun s res (ar,nc) -> - if Ptset.intersect s init then + if Ptset.Int.intersect s init then ( RS.concat res ar ,nc) else (ar,Configuration.add nc s res)) (newconf.Configuration.results) (accu,Configuration.empty) @@ -955,12 +876,12 @@ type 'a t = { | Not_found -> let res = Hashtbl.fold (fun q l acc -> if List.exists (fun (ts,_) -> TagSet.mem tag ts) l - then Ptset.add q acc - else acc) a.phi Ptset.empty + then Ptset.Int.add q acc + else acc) a.trans Ptset.Int.empty in Hashtbl.add h_tdconf tag res;res in (* let _ = pr ", among "; - pr_st fmt (Ptset.elements r); + StateSet.print fmt (Ptset.Int.elements r); pr "\n%!"; in *) let r = cons r Nil in @@ -970,19 +891,21 @@ type 'a t = { | _ -> assert false in (* pr "Result of topdown run is %!"; - pr_st fmt (Ptset.elements set); + StateSet.print fmt (Ptset.Int.elements set); pr ", number is %i\n%!" (RS.length res.(0)); *) Configuration.add Configuration.empty set res.(0) let run_bottom_up a t k = - let trlist = Hashtbl.find a.phi (Ptset.choose a.init) + let trlist = Hashtbl.find a.trans (Ptset.Int.choose a.init) in let init = List.fold_left - (fun acc (_,(_,f,_)) -> - Ptset.union acc (let (_,_,l) = fst (f.st) in l)) - Ptset.empty trlist + (fun acc (_,t) -> + let _,_,f,_ = Transition.node t in + let _,_,l = fst ( Formula.st f ) in + Ptset.Int.union acc l) + Ptset.Int.empty trlist in let tree1,jump_fun = match k with @@ -1003,7 +926,7 @@ type 'a t = { Configuration.pr fmt conf in *) let acc = Configuration.IMap.fold - ( fun s res acc -> if Ptset.intersect init s + ( fun s res acc -> if Ptset.Int.intersect init s then RS.concat res acc else acc) conf.Configuration.results acc in if Tree.is_nil next_of_next (*|| Tree.equal next next_of_next *)then diff --git a/ata.mli b/ata.mli index 362bfa8..26f5518 100644 --- a/ata.mli +++ b/ata.mli @@ -1,57 +1,91 @@ -type state = int -val mk_state : unit -> state +type jump_kind = [ `CONTAINS of string | `NOTHING | `TAG of Tag.t ] +module State : +sig + include Sigs.T with type t = int + val make : unit -> t +end -type formula_expr = - False - | True - | Or of formula * formula - | And of formula * formula - | Atom of ([ `Left | `Right | `LLeft | `RRight ] * bool * state) -and formula = { fid : int; fkey : int; pos : formula_expr; neg : formula; st : (Ptset.t*Ptset.t*Ptset.t)*(Ptset.t*Ptset.t*Ptset.t); size: int;} -val true_ : formula -val false_ : formula -val atom_ : [`Left | `Right | `LLeft | `RRight ] -> bool -> state -> formula -val and_ : formula -> formula -> formula -val or_ : formula -> formula -> formula -val not_ : formula -> formula -(*val equal_form : formula -> formula -> bool *) -val pr_frm : Format.formatter -> formula -> unit +module StateSet : + sig + include Ptset.S with type elt = int + val print : Format.formatter -> t -> unit + end +module Formula : + sig + type 'a expr = + False + | True + | Or of 'a * 'a + | And of 'a * 'a + | Atom of ([ `LLeft | `Left | `RRight | `Right ] * bool * State.t) -module HTagSet : Hashtbl.S with type key = Ptset.t*Tag.t + type t + val hash : t -> int + val uid : t -> int + val equal : t -> t -> bool + val expr : t -> t expr + val st : + t -> + (StateSet.t * StateSet.t * StateSet.t) * + (StateSet.t * StateSet.t * StateSet.t) + val size : t -> int + val print : Format.formatter -> t -> unit + val is_true : t -> bool + val is_false : t -> bool + val true_ : t + val false_ : t + val atom_ : + [ `LLeft | `Left | `RRight | `Right ] -> + bool -> StateSet.elt -> t + val not_ : t -> t + val or_ : t -> t -> t + val and_ : t -> t -> t + module Infix : sig + val ( +| ) : t -> t -> t + val ( *& ) : t -> t -> t + val ( *+ ) : + [ `LLeft | `Left | `RRight | `Right ] -> StateSet.elt -> t + val ( *- ) : + [ `LLeft | `Left | `RRight | `Right ] -> StateSet.elt -> t + end + end +module Transition : + sig + type node = State.t * bool * Formula.t * bool + type data = node + type t + val make : data -> t + val node : t -> data + val hash : t -> int + val uid : t -> int + val equal : t -> t -> bool + module Infix : sig + val ( ?< ) : State.t -> State.t + val ( >< ) : State.t -> TagSet.t * bool -> State.t*(TagSet.t*bool*bool) + val ( ><@ ) : State.t -> TagSet.t * bool -> State.t*(TagSet.t*bool*bool) + val ( >=> ) : State.t *(TagSet.t*bool*bool) -> Formula.t -> (State.t*TagSet.t*t) + end + val print : Format.formatter -> t -> unit + end + +module SetTagKey : Hashtbl.HashedType with type t = StateSet.t*Tag.t +module CachedTransTable : Hashtbl.S with type key = SetTagKey.t + +module Formlist : Ptset.S with type elt = Transition.t -type 'a t = { +type 'a t = { id : int; - mutable states : Ptset.t; - init : Ptset.t; - mutable final : Ptset.t; - universal : Ptset.t; - starstate : Ptset.t option; - (* Transitions of the Alternating automaton *) - phi : (state,(TagSet.t*(bool*formula*bool)) list) Hashtbl.t; - sigma : (int,('a t -> Tree.t -> Tree.t -> Ptset.t*'a)) Hashtbl.t; + mutable states : StateSet.t; + init : StateSet.t; + starstate : StateSet.t option; + trans : (State.t, (TagSet.t * Transition.t) list) Hashtbl.t; + query_string : string; } - val dump : Format.formatter -> 'a t -> unit - -module Transitions : sig -type t = state*TagSet.t*bool*formula*bool -(* Doing this avoid the parenthesis *) -val ( ?< ) : state -> state -val ( >< ) : state -> TagSet.t*bool -> state*(TagSet.t*bool*bool) -val ( ><@ ) : state -> TagSet.t*bool -> state*(TagSet.t*bool*bool) -val ( >=> ) : state*(TagSet.t*bool*bool) -> formula -> t -val ( +| ) : formula -> formula -> formula -val ( *& ) : formula -> formula -> formula -val ( ** ) : [`Left | `Right | `LLeft | `RRight ] -> state -> formula -end -type transition = Transitions.t -val equal_trans : transition -> transition -> bool - - - module type ResultSet = +module type ResultSet = sig type t val empty : t @@ -63,10 +97,9 @@ val equal_trans : transition -> transition -> bool val length : t -> int end - module IdSet : ResultSet - - val top_down_count : 'a t -> Tree.t -> int - val top_down : 'a t -> Tree.t -> IdSet.t +module IdSet : ResultSet - type jump_kind = [ `TAG of Tag.t | `CONTAINS of string | `NOTHING ] - val bottom_up_count : 'a t -> Tree.t -> jump_kind -> int +val top_down_count : 'a t -> Tree.t -> int +val top_down : 'a t -> Tree.t -> IdSet.t +val bottom_up_count : + 'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> int diff --git a/debug.ml b/debug.ml index f754993..39b4fcf 100644 --- a/debug.ml +++ b/debug.ml @@ -17,22 +17,12 @@ IFDEF DEBUG THEN module Loc = Camlp4.PreCast.Loc - -DEFINE D(x) = ignore(x); -DEFINE MM(v,l) = (let ____x = v in (Memory.register ____x (Loc.to_string (l)));____x) -let () = Memory.schedule_stats () - +DEFINE D_IGNORE_(e1,e2) = (let () = e1 in ();e2) ELSE +DEFINE D_IGNORE_(e1,e2) = (e2) -DEFINE D(x) = (); -DEFINE MM(v,l) = (v) - -END (* IFDEF DEBUG *) -IFDEF PROFILE -THEN DEFINE P(x) = ignore(x); -ELSE DEFINE P(x) = (); END (* IFDEF DEBUG *) diff --git a/depend b/depend index 25b054e..8cdff3e 100644 --- a/depend +++ b/depend @@ -2,8 +2,12 @@ memory.cmo: memory.cmi memory.cmx: memory.cmi custom.cmo: sigs.cmi custom.cmx: sigs.cmi -ptset.cmo: ptset.cmi -ptset.cmx: ptset.cmi +memoizer.cmo: memoizer.cmi +memoizer.cmx: memoizer.cmi +hcons.cmo: hcons.cmi +hcons.cmx: hcons.cmi +ptset.cmo: hcons.cmi ptset.cmi +ptset.cmx: hcons.cmx ptset.cmi finiteCofinite.cmo: sigs.cmi finiteCofinite.cmi finiteCofinite.cmx: sigs.cmi finiteCofinite.cmi tag.cmo: tag.cmi @@ -12,25 +16,26 @@ tagSet.cmo: tag.cmi ptset.cmi finiteCofinite.cmi tagSet.cmi tagSet.cmx: tag.cmx ptset.cmx finiteCofinite.cmx tagSet.cmi options.cmo: options.cmi options.cmx: options.cmi -tree.cmo: tag.cmi options.cmi tree.cmi -tree.cmx: tag.cmx options.cmx tree.cmi -ata.cmo: tree.cmi tagSet.cmi tag.cmi ptset.cmi ata.cmi -ata.cmx: tree.cmx tagSet.cmx tag.cmx ptset.cmx ata.cmi +tree.cmo: tag.cmi ptset.cmi options.cmi tree.cmi +tree.cmx: tag.cmx ptset.cmx options.cmx tree.cmi +ata.cmo: tree.cmi tagSet.cmi tag.cmi sigs.cmi ptset.cmi hcons.cmi ata.cmi +ata.cmx: tree.cmx tagSet.cmx tag.cmx sigs.cmi ptset.cmx hcons.cmx ata.cmi ulexer.cmo: ulexer.cmi ulexer.cmx: ulexer.cmi xPath.cmo: ulexer.cmi tagSet.cmi tag.cmi ptset.cmi ata.cmi xPath.cmi xPath.cmx: ulexer.cmx tagSet.cmx tag.cmx ptset.cmx ata.cmx xPath.cmi -main.cmo: xPath.cmi ulexer.cmi tree.cmi tag.cmi options.cmi -main.cmx: xPath.cmx ulexer.cmx tree.cmx tag.cmx options.cmx +main.cmo: xPath.cmi ulexer.cmi tree.cmi tag.cmi options.cmi ata.cmi +main.cmx: xPath.cmx ulexer.cmx tree.cmx tag.cmx options.cmx ata.cmx memory.cmi: sigs.cmi: -ptset.cmi: -finiteCofinite.cmo: sigs.cmi finiteCofinite.cmi -finiteCofinite.cmx: sigs.cmi finiteCofinite.cmi -options.cmi: +memoizer.cmi: +hcons.cmi: +ptset.cmi: hcons.cmi +finiteCofinite.cmi: sigs.cmi tag.cmi: -tagSet.cmi: tag.cmi finiteCofinite.cmi -tree.cmi: tag.cmi -ata.cmi: tree.cmi tagSet.cmi ptset.cmi +tagSet.cmi: tag.cmi ptset.cmi finiteCofinite.cmi +options.cmi: +tree.cmi: tag.cmi ptset.cmi +ata.cmi: tree.cmi tagSet.cmi tag.cmi sigs.cmi ptset.cmi ulexer.cmi: -xPath.cmi: tagSet.cmi ata.cmi +xPath.cmi: tagSet.cmi tag.cmi ptset.cmi ata.cmi diff --git a/main.ml b/main.ml index ce37dfc..4e12ae8 100644 --- a/main.ml +++ b/main.ml @@ -4,7 +4,6 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) -INCLUDE "debug.ml" open Ata @@ -24,19 +23,19 @@ let disabled_gc = { Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 } -let main v query output = +let main v query_string output = let _ = Tag.init (Tree.tag_pool v) in Printf.eprintf "Parsing query : "; let query = try time - XPath.Parser.parse_string query + XPath.Parser.parse_string query_string with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in XPath.Ast.print Format.err_formatter query; Format.fprintf Format.err_formatter "\n%!"; Printf.eprintf "Compiling query : "; - let auto,ltags,contains = time XPath.Compile.compile query in + let auto,ltags,contains = time (XPath.Compile.compile ~querystring:query_string) query in let _ = Ata.dump Format.err_formatter auto in let _ = Printf.eprintf "%!" in let jump_to = @@ -143,10 +142,3 @@ let v = in main v !Options.query !Options.output_file;; -IFDEF DEBUG -THEN -Printf.eprintf "\n=================================================\nDEBUGGING\n%!"; - -Tree.DEBUGTREE.print_stats Format.err_formatter;; -Gc.full_major() -ENDIF diff --git a/ptset.ml b/ptset.ml index e16cc2c..10c311c 100644 --- a/ptset.ml +++ b/ptset.ml @@ -5,403 +5,47 @@ (* checking *) (* *) (***************************************************************************) - - -type elt = int - -type t = { id : int; - key : int; (* hash *) - node : node; - } -and node = - | Empty - | Leaf of int - | Branch of int * int * t * t - - -(* faster if outside of a module *) -let hash_node x = match x with - | Empty -> 0 - | Leaf i -> (i+1) land max_int - (* power of 2 +/- 1 are fast ! *) - | Branch (b,i,l,r) -> - ((b lsl 1)+ b + i+(i lsl 4) + (l.key lsl 5)-l.key - + (r.key lsl 7) - r.key) land max_int - -module Node = - struct - type _t = t - type t = _t - external hash : t -> int = "%field1" - let equal x y = - if x.id == y.id || x.key == y.key || x.node == y.node then true - else - match (x.node,y.node) with - | Empty,Empty -> true - | Leaf k1, Leaf k2 when k1 == k2 -> true - | Branch(p1,m1,l1,r1), Branch(p2,m2,l2,r2) when m1==m2 && p1==p2 && - (l1.id == l2.id) && (r1.id == r2.id) -> true - | _ -> false - end - -module WH =Weak.Make(Node) - -let pool = WH.create 4093 - -(* Neat trick thanks to Alain Frisch ! *) - -let gen_uid () = Oo.id (object end) - -let empty = { id = gen_uid (); - key = 0; - node = Empty } - -let _ = WH.add pool empty - -let is_empty s = s.id==0 - -let rec norm n = - let v = { id = gen_uid (); - key = hash_node n; - node = n } - in - WH.merge pool v - -(* WH.merge pool *) - -let branch p m l r = norm (Branch(p,m,l,r)) -let leaf k = norm (Leaf k) - -(* To enforce the invariant that a branch contains two non empty sub-trees *) -let branch_ne = function - | (_,_,e,t) when is_empty e -> t - | (_,_,t,e) when is_empty e -> t - | (p,m,t0,t1) -> branch p m t0 t1 - -(********** from here on, only use the smart constructors *************) - -let zero_bit k m = (k land m) == 0 - -let singleton k = leaf k -let is_singleton n = - match n.node with Leaf _ -> true - | _ -> false - -let rec mem k n = match n.node with - | Empty -> false - | Leaf j -> k == j - | Branch (p, _, l, r) -> if k <= p then mem k l else mem k r - -let rec min_elt n = match n.node with - | Empty -> raise Not_found - | Leaf k -> k - | Branch (_,_,s,_) -> min_elt s - - let rec max_elt n = match n.node with - | Empty -> raise Not_found - | Leaf k -> k - | Branch (_,_,_,t) -> max_elt t - - let elements s = - let rec elements_aux acc n = match n.node with - | Empty -> acc - | Leaf k -> k :: acc - | Branch (_,_,l,r) -> elements_aux (elements_aux acc r) l - in - elements_aux [] s - - let mask k m = (k lor (m-1)) land (lnot m) - - let naive_highest_bit x = - assert (x < 256); - let rec loop i = - if i = 0 then 1 else if x lsr i = 1 then 1 lsl i else loop (i-1) - in - loop 7 - - let hbit = Array.init 256 naive_highest_bit - - let highest_bit_32 x = - let n = x lsr 24 in if n != 0 then Array.unsafe_get hbit n lsl 24 - else let n = x lsr 16 in if n != 0 then Array.unsafe_get hbit n lsl 16 - else let n = x lsr 8 in if n != 0 then Array.unsafe_get hbit n lsl 8 - else Array.unsafe_get hbit x - - let highest_bit_64 x = - let n = x lsr 32 in if n != 0 then (highest_bit_32 n) lsl 32 - else highest_bit_32 x - - let highest_bit = match Sys.word_size with - | 32 -> highest_bit_32 - | 64 -> highest_bit_64 - | _ -> assert false - - let branching_bit p0 p1 = highest_bit (p0 lxor p1) - - let join p0 t0 p1 t1 = - let m = branching_bit p0 p1 in - if zero_bit p0 m then - branch (mask p0 m) m t0 t1 - else - branch (mask p0 m) m t1 t0 - - let match_prefix k p m = (mask k m) == p - - let add k t = - let rec ins n = match n.node with - | Empty -> leaf k - | Leaf j -> if j == k then n else join k (leaf k) j n - | Branch (p,m,t0,t1) -> - if match_prefix k p m then - if zero_bit k m then - branch p m (ins t0) t1 - else - branch p m t0 (ins t1) - else - join k (leaf k) p n - in - ins t - - let remove k t = - let rec rmv n = match n.node with - | Empty -> empty - | Leaf j -> if k == j then empty else n - | Branch (p,m,t0,t1) -> - if match_prefix k p m then - if zero_bit k m then - branch_ne (p, m, rmv t0, t1) - else - branch_ne (p, m, t0, rmv t1) - else - n - in - rmv t - - (* should run in O(1) thanks to Hash consing *) - - let equal a b = a==b || a.id == b.id - - let compare a b = if a == b then 0 else a.id - b.id - - let h_merge = Hashtbl.create 4097 - let com_hash x y = (x*y - (x+y)) land max_int - - let rec merge s t = - if (equal s t) (* This is cheap thanks to hash-consing *) - then s - else - match s.node,t.node with - | Empty, _ -> t - | _, Empty -> s - | Leaf k, _ -> add k t - | _, Leaf k -> add k s - | Branch (p,m,s0,s1), Branch (q,n,t0,t1) -> - if m == n && match_prefix q p m then - branch p m (merge s0 t0) (merge s1 t1) - else if m > n && match_prefix q p m then - if zero_bit q m then - branch p m (merge s0 t) s1 - else - branch p m s0 (merge s1 t) - else if m < n && match_prefix p q n then - if zero_bit p n then - branch q n (merge s t0) t1 - else - branch q n t0 (merge s t1) - else - (* The prefixes disagree. *) - join p s q t - - - - - let rec subset s1 s2 = (equal s1 s2) || - match (s1.node,s2.node) with - | Empty, _ -> true - | _, Empty -> false - | Leaf k1, _ -> mem k1 s2 - | Branch _, Leaf _ -> false - | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> - if m1 == m2 && p1 == p2 then - subset l1 l2 && subset r1 r2 - else if m1 < m2 && match_prefix p1 p2 m2 then - if zero_bit p1 m2 then - subset l1 l2 && subset r1 l2 - else - subset l1 r2 && subset r1 r2 - else - false - - - - - let union s1 s2 = merge s1 s2 - - let rec inter s1 s2 = - if equal s1 s2 - then s1 - else - match (s1.node,s2.node) with - | Empty, _ -> empty - | _, Empty -> empty - | Leaf k1, _ -> if mem k1 s2 then s1 else empty - | _, Leaf k2 -> if mem k2 s1 then s2 else empty - | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> - if m1 == m2 && p1 == p2 then - merge (inter l1 l2) (inter r1 r2) - else if m1 > m2 && match_prefix p2 p1 m1 then - inter (if zero_bit p2 m1 then l1 else r1) s2 - else if m1 < m2 && match_prefix p1 p2 m2 then - inter s1 (if zero_bit p1 m2 then l2 else r2) - else - empty - - let rec diff s1 s2 = - if equal s1 s2 - then empty - else - match (s1.node,s2.node) with - | Empty, _ -> empty - | _, Empty -> s1 - | Leaf k1, _ -> if mem k1 s2 then empty else s1 - | _, Leaf k2 -> remove k2 s1 - | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> - if m1 == m2 && p1 == p2 then - merge (diff l1 l2) (diff r1 r2) - else if m1 > m2 && match_prefix p2 p1 m1 then - if zero_bit p2 m1 then - merge (diff l1 s2) r1 - else - merge l1 (diff r1 s2) - else if m1 < m2 && match_prefix p1 p2 m2 then - if zero_bit p1 m2 then diff s1 l2 else diff s1 r2 - else - s1 - - - - -(*s All the following operations ([cardinal], [iter], [fold], [for_all], - [exists], [filter], [partition], [choose], [elements]) are - implemented as for any other kind of binary trees. *) - -let rec cardinal n = match n.node with - | Empty -> 0 - | Leaf _ -> 1 - | Branch (_,_,t0,t1) -> cardinal t0 + cardinal t1 - -let rec iter f n = match n.node with - | Empty -> () - | Leaf k -> f k - | Branch (_,_,t0,t1) -> iter f t0; iter f t1 - -let rec fold f s accu = match s.node with - | Empty -> accu - | Leaf k -> f k accu - | Branch (_,_,t0,t1) -> fold f t0 (fold f t1 accu) - -let rec for_all p n = match n.node with - | Empty -> true - | Leaf k -> p k - | Branch (_,_,t0,t1) -> for_all p t0 && for_all p t1 - -let rec exists p n = match n.node with - | Empty -> false - | Leaf k -> p k - | Branch (_,_,t0,t1) -> exists p t0 || exists p t1 - -let rec filter pr n = match n.node with - | Empty -> empty - | Leaf k -> if pr k then n else empty - | Branch (p,m,t0,t1) -> branch_ne (p, m, filter pr t0, filter pr t1) - -let partition p s = - let rec part (t,f as acc) n = match n.node with - | Empty -> acc - | Leaf k -> if p k then (add k t, f) else (t, add k f) - | Branch (_,_,t0,t1) -> part (part acc t0) t1 - in - part (empty, empty) s - -let rec choose n = match n.node with - | Empty -> raise Not_found - | Leaf k -> k - | Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *) - - -let split x s = - let coll k (l, b, r) = - if k < x then add k l, b, r - else if k > x then l, b, add k r - else l, true, r - in - fold coll s (empty, false, empty) - - - -let rec dump n = - Printf.eprintf "{ id = %i; key = %i ; node=" n.id n.key; - match n.node with - | Empty -> Printf.eprintf "Empty; }\n" - | Leaf k -> Printf.eprintf "Leaf %i; }\n" k - | Branch (p,m,l,r) -> - Printf.eprintf "Branch(%i,%i,id=%i,id=%i); }\n" - p m l.id r.id; - dump l; - dump r - -(*i*) -let make l = List.fold_left (fun acc e -> add e acc ) empty l -(*i*) - -(*s Additional functions w.r.t to [Set.S]. *) - -let rec intersect s1 s2 = (equal s1 s2) || - match (s1.node,s2.node) with - | Empty, _ -> false - | _, Empty -> false - | Leaf k1, _ -> mem k1 s2 - | _, Leaf k2 -> mem k2 s1 - | Branch (p1,m1,l1,r1), Branch (p2,m2,l2,r2) -> - if m1 == m2 && p1 == p2 then - intersect l1 l2 || intersect r1 r2 - else if m1 < m2 && match_prefix p2 p1 m1 then - intersect (if zero_bit p2 m1 then l1 else r1) s2 - else if m1 > m2 && match_prefix p1 p2 m2 then - intersect s1 (if zero_bit p1 m2 then l2 else r2) - else - false - - -let hash s = s.key - -let from_list l = List.fold_left (fun acc i -> add i acc) empty l - -type int_vector - -external int_vector_alloc : int -> int_vector = "caml_int_vector_alloc" -external int_vector_set : int_vector -> int -> int -> unit = "caml_int_vector_set" -external int_vector_length : int_vector -> int = "caml_int_vector_length" -external int_vector_empty : unit -> int_vector = "caml_int_vector_empty" - -let empty_vector = int_vector_empty () - -let to_int_vector_ext s = - let l = cardinal s in - let v = int_vector_alloc l in - let i = ref 0 in - iter (fun e -> int_vector_set v !i e; incr i) s; - v - -let hash_vectors = Hashtbl.create 4097 - -let to_int_vector s = - try - Hashtbl.find hash_vectors s.key - with - Not_found -> - let v = to_int_vector_ext s in - Hashtbl.add hash_vectors s.key v; - v - +INCLUDE "utils.ml" +module type S = +sig + include Set.S + val intersect : t -> t -> bool + val is_singleton : t -> bool + val mem_union : t -> t -> t + val hash : t -> int + val uid : t -> int + val uncons : t -> elt*t + val from_list : elt list -> t +end + +module Int : S with type elt = int = +struct + type elt = int + external hash_elt : elt -> int = "%identity" + external uid_elt : elt -> int = "%identity" + let equal_elt : elt -> elt -> bool = (==);; +DEFINE USE_PTSET_INCLUDE +INCLUDE "ptset_include.ml" + +end +module Make ( H : Hcons.S ) : S with type elt = H.t = +struct + type elt = H.t + let hash_elt = H.hash + let uid_elt = H.uid + let equal_elt = H.equal +INCLUDE "ptset_include.ml" +end + +(* Have to benchmark wheter this whole include stuff is worth it *) +module I : S with type elt = int = Make ( struct type t = int + type data = t + external hash : t -> int = "%identity" + external uid : t -> int = "%identity" + let equal : t -> t -> bool = (==) + external make : t -> int = "%identity" + external node : t -> int = "%identity" + + end + ) diff --git a/ptset.mli b/ptset.mli index 0d29da8..2eef80c 100644 --- a/ptset.mli +++ b/ptset.mli @@ -22,73 +22,51 @@ [Set]. The representation is unique and thus structural comparison can be performed on Patricia trees. *) -type t - -type elt = int - -val empty : t - -val is_empty : t -> bool - -val mem : int -> t -> bool - -val add : int -> t -> t - -val singleton : int -> t - -val remove : int -> t -> t - -val union : t -> t -> t - -val subset : t -> t -> bool - -val inter : t -> t -> t - -val diff : t -> t -> t - -val equal : t -> t -> bool - -val compare : t -> t -> int - -val elements : t -> int list - -val choose : t -> int - -val cardinal : t -> int - -val iter : (int -> unit) -> t -> unit - -val fold : (int -> 'a -> 'a) -> t -> 'a -> 'a - -val for_all : (int -> bool) -> t -> bool - -val exists : (int -> bool) -> t -> bool - -val filter : (int -> bool) -> t -> t - -val partition : (int -> bool) -> t -> t * t - -val split : int -> t -> t * bool * t - -(*s Warning: [min_elt] and [max_elt] are linear w.r.t. the size of the - set. In other words, [min_elt t] is barely more efficient than [fold - min t (choose t)]. *) - -val min_elt : t -> int -val max_elt : t -> int - -(*s Additional functions not appearing in the signature [Set.S] from ocaml - standard library. *) - -(* [intersect u v] determines if sets [u] and [v] have a non-empty - intersection. *) - +module type S = +sig + + type elt + type t + val empty : t + val is_empty : t -> bool + val mem : elt -> t -> bool + val add : elt -> t -> t + val singleton : elt -> t + val remove : elt -> t -> t + val union : t -> t -> t + val inter : t -> t -> t + val diff : t -> t -> t + val compare : t -> t -> int + val equal : t -> t -> bool + val subset : t -> t -> bool + val iter : (elt -> unit) -> t -> unit + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val for_all : (elt -> bool) -> t -> bool + val exists : (elt -> bool) -> t -> bool + val filter : (elt -> bool) -> t -> t + val partition : (elt -> bool) -> t -> t * t + val cardinal : t -> int + val elements : t -> elt list + val min_elt : t -> elt + val max_elt : t -> elt + val choose : t -> elt + val split : elt -> t -> t * bool * t + (*s Additional functions not appearing in the signature [Set.S] from ocaml + standard library. *) + + (* [intersect u v] determines if sets [u] and [v] have a non-empty + intersection. *) + val intersect : t -> t -> bool -val is_singleton : t -> bool +val is_singleton : t -> bool +val mem_union : t -> t -> t val hash : t -> int +val uid : t -> int +val uncons : t -> elt * t +val from_list : elt list -> t +end -val from_list : int list -> t -type int_vector -val to_int_vector : t -> int_vector +module Int : S with type elt = int +module Make ( H : Hcons.S ) : S with type elt = H.t diff --git a/tag.ml b/tag.ml index 82c42b5..c500dab 100644 --- a/tag.ml +++ b/tag.ml @@ -40,8 +40,9 @@ let hash x = x let to_string t = - if t = pcdata then "<$>" - else if t = attribute then "<@>" + if t == pcdata then "<$>" + else if t == attribute then "<@>" + else if t == nullt then "" else tag_name (get_pool()) t diff --git a/tagSet.ml b/tagSet.ml index 76c1c98..74784cf 100644 --- a/tagSet.ml +++ b/tagSet.ml @@ -4,8 +4,8 @@ struct let hash = Hashtbl.hash end *) -module M : FiniteCofinite.S with type elt = Tag.t and type set = Ptset.t = - FiniteCofinite.Make(Ptset) +module M : FiniteCofinite.S with type elt = Tag.t and type set = Ptset.Int.t = + FiniteCofinite.Make(Ptset.Int) include M diff --git a/tagSet.mli b/tagSet.mli index d536c8c..a232c0a 100644 --- a/tagSet.mli +++ b/tagSet.mli @@ -5,7 +5,7 @@ (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) -include FiniteCofinite.S with type elt = Tag.t and type set = Ptset.t +include FiniteCofinite.S with type elt = Tag.t and type set = Ptset.Int.t val tag : Tag.t -> t val pcdata : t diff --git a/tests/base.xml b/tests/base.xml index 3eb6345..811cb1f 100644 --- a/tests/base.xml +++ b/tests/base.xml @@ -18,7 +18,6 @@ - pc104 Generic 104-key PC Generic diff --git a/tests/test.xml b/tests/test.xml index 9d8984e..f1dfe41 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,6 +1,7 @@ - - - + + + + diff --git a/tree.ml b/tree.ml index d0466c1..6e86622 100644 --- a/tree.ml +++ b/tree.ml @@ -42,9 +42,7 @@ external text_count : tree -> string -> int = "caml_text_collection_count" external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" external text_unsorted_contains : tree -> string -> unit = "caml_text_collection_unsorted_contains" external get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" -let get_cached_text t x = - if x == -1 then "" - else get_cached_text t x + external tree_serialize : tree -> string -> unit = "caml_xml_tree_serialize" @@ -78,17 +76,24 @@ external tree_doc_ids : tree -> [`Tree ] node -> [`Text ] node * [`Text ] node = let text_size tree = int_of_node (snd ( tree_doc_ids tree (Obj.magic 0) )) +let get_cached_text t x = + if x == -1 then "" + else + get_cached_text t x + + external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" external tree_tagged_desc : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" external tree_tagged_foll_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" external tree_subtree_tags : tree -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" +(* external tree_select_below : tree -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_select_below" external tree_select_desc_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_select_desc_only" external tree_select_next : tree -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_next" external tree_select_foll_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_foll_only" -external tree_select_desc_or_foll_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_foll_only" +external tree_select_desc_or_foll_only : tree -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_select_foll_only" *) type descr = | Nil @@ -97,41 +102,36 @@ type descr = type t = { doc : tree; node : descr; - ttable : (Tag.t,(Ptset.t*Ptset.t)) Hashtbl.t; + ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; } - - - - - let text_size t = text_size t.doc let collect_tags tree = let h_union = Hashtbl.create 511 in let pt_cup s1 s2 = (* special case, since this is a union we want hash(s1,s2) = hash(s2,s1) *) - let x = Ptset.hash s1 - and y = Ptset.hash s2 in - let h = if x < y then HASHINT2(x,y) else HASHINT2(y,x) in + let x = Ptset.Int.hash s1 + and y = Ptset.Int.hash s2 in + let h = if x < y then HASHINT2(x,y) else HASHINT2(y,x)in try Hashtbl.find h_union h with - | Not_found -> let s = Ptset.union s1 s2 + | Not_found -> let s = Ptset.Int.union s1 s2 in Hashtbl.add h_union h s;s in let h_add = Hashtbl.create 511 in let pt_add t s = - let k = HASHINT2(Tag.hash t,Ptset.hash s) in + let k = HASHINT2(Tag.hash t,Ptset.Int.hash s) in try Hashtbl.find h_add k with - | Not_found -> let r = Ptset.add t s in + | Not_found -> let r = Ptset.Int.add t s in Hashtbl.add h_add k r;r in let h = Hashtbl.create 511 in - let sing = Ptset.singleton Tag.pcdata in + let sing = Ptset.Int.singleton Tag.pcdata in let update t sb sa = let sbelow,safter = try @@ -144,7 +144,7 @@ let collect_tags tree = in let rec loop id acc = if equal_node id nil - then (Ptset.empty,acc) + then (Ptset.Int.empty,acc) else let below2,after2 = loop (tree_next_sibling tree id) acc in let below1,after1 = loop (tree_first_child tree id) after2 in @@ -152,7 +152,7 @@ let collect_tags tree = update tag below1 after2; pt_add tag (pt_cup below1 below2), (pt_add tag after1) in - let b,a = loop (tree_root tree) Ptset.empty in + let b,a = loop (tree_root tree) Ptset.Int.empty in update Tag.pcdata b a; h @@ -398,56 +398,24 @@ let tag t = match t.node with | Text(_) -> Tag.pcdata | Node(n) -> tree_tag_id t.doc n - | _ -> failwith "tag" - -(* - let string_below t id = - let strid = parent_doc t.doc id in - match t.node with - | Node(NC(i)) -> - (Tree.equal i strid) || (is_ancestor t.doc i strid) - | Node(SC(i,_)) -> Text.equal i id - | _ -> false - - - let tagged_foll t tag = - if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_foll" - else match t with - | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_foll d n tag) } - | { doc=d; node=Node(SC (_,n)) } when is_nil n -> { t with node= Nil } - | { doc=d; node=Node(SC (_,n)) } -> - let nnode = - if tag_id d n == tag then n - else - let n' = tagged_desc d n tag in - if is_nil n' then tagged_foll d n tag - else n' - in {t with node= norm nnode} - | _ -> { t with node=Nil } - + | Nil -> Tag.nullt - let tagged_desc t tag = - if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_desc" - else match t with - | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) } - | _ -> { t with node=Nil } - -*) +(* let select_next tb tf t s = match s.node with | Node (below) -> begin match t.node with | Node( n) -> - { t with node = norm (tree_select_next t.doc n (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) } + { t with node = norm (tree_select_next t.doc n (Ptset.Int.to_int_vector tb) (Ptset.Int.to_int_vector tf) below) } | Text (i,n) when equal_node nil n -> let p = tree_parent_doc t.doc i in - { t with node = norm (tree_select_next t.doc p (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) } + { t with node = norm (tree_select_next t.doc p (Ptset.Int.to_int_vector tb) (Ptset.Int.to_int_vector tf) below) } | Text(_,n) -> - if Ptset.mem (tree_tag_id t.doc n) (Ptset.union tb tf) + if Ptset.mem (tree_tag_id t.doc n) (Ptset.Int.union tb tf) then { t with node=Node(n) } else - let vb = Ptset.to_int_vector tb in - let vf = Ptset.to_int_vector tf in + let vb = Ptset.Int.to_int_vector tb in + let vf = Ptset.Int.to_int_vector tf in let node = let dsc = tree_select_below t.doc n vb vf in if equal_node nil dsc @@ -469,15 +437,15 @@ let select_next tb tf t s = begin match t.node with | Node(n) -> - { t with node= norm (tree_select_foll_only t.doc n (Ptset.to_int_vector tf) below) } + { t with node= norm (tree_select_foll_only t.doc n (Ptset.Int.to_int_vector tf) below) } | Text(i,n) when equal_node nil n -> let p = tree_parent_doc t.doc i in - { t with node= norm (tree_select_foll_only t.doc p (Ptset.to_int_vector tf) below) } + { t with node= norm (tree_select_foll_only t.doc p (Ptset.Int.to_int_vector tf) below) } | Text(_,n) -> if Ptset.mem (tree_tag_id t.doc n) tf then { t with node=Node(n) } else - let vf = Ptset.to_int_vector tf in + let vf = Ptset.Int.to_int_vector tf in let node = let dsc = tree_select_desc_only t.doc n vf in if tree_is_nil dsc @@ -492,9 +460,9 @@ let select_next tb tf t s = let select_below tc td t= match t.node with | Node( n) -> - let vc = Ptset.to_int_vector tc + let vc = Ptset.Int.to_int_vector tc in - let vd = Ptset.to_int_vector td + let vd = Ptset.Int.to_int_vector td in { t with node= norm(tree_select_below t.doc n vc vd) } | _ -> { t with node=Nil } @@ -503,12 +471,12 @@ let select_below tc td t= let select_desc_only td t = match t.node with | Node(n) -> - let vd = Ptset.to_int_vector td + let vd = Ptset.Int.to_int_vector td in { t with node = norm(tree_select_desc_only t.doc n vd) } | _ -> { t with node = Nil } - +*) let tagged_desc tag t = match t.node with | Node(n) -> @@ -858,3 +826,7 @@ let subtree_tags t tag = match t.node with | Nil -> 0 | Node(i) -> tree_subtree_tags t.doc i tag | Text(_,i) -> tree_subtree_tags t.doc i tag + +let get_text t = match t.node with + | Text(i,_) -> get_cached_text t.doc i + | _ -> "" diff --git a/tree.mli b/tree.mli index cc8a8e6..9169f39 100644 --- a/tree.mli +++ b/tree.mli @@ -26,18 +26,20 @@ val text_below : t -> t val text_next : t -> t -> t val tagged_desc : Tag.t -> t -> t val tagged_foll_below : Tag.t -> t -> t -> t -val select_desc_only : Ptset.t -> t -> t -val select_foll_only : Ptset.t -> t -> t -> t -val select_below : Ptset.t -> Ptset.t -> t -> t -val select_next : Ptset.t -> Ptset.t -> t -> t -> t +(* +val select_desc_only : Ptset.Int.t -> t -> t +val select_foll_only : Ptset.Int.t -> t -> t -> t +val select_below : Ptset.Int.t -> Ptset.Int.t -> t -> t +val select_next : Ptset.Int.t -> Ptset.Int.t -> t -> t -> t +*) val count : t -> string -> int val print_xml_fast : out_channel -> t -> unit val node_child : t -> t val node_sibling : t -> t val node_sibling_ctx : t -> t -> t -val tags_below : t -> Tag.t -> Ptset.t -val tags_after : t -> Tag.t -> Ptset.t -val tags : t -> Tag.t -> Ptset.t*Ptset.t +val tags_below : t -> Tag.t -> Ptset.Int.t +val tags_after : t -> Tag.t -> Ptset.Int.t +val tags : t -> Tag.t -> Ptset.Int.t*Ptset.Int.t val is_below_right : t -> t -> bool val is_left : t -> bool val tagged_lowest : t -> Tag.t -> t @@ -49,3 +51,4 @@ val unsorted_contains : t -> string -> unit val text_size : t -> int val doc_ids : t -> int*int val subtree_tags : t -> Tag.t -> int +val get_text : t -> string diff --git a/utils.ml b/utils.ml index e6cbff0..ce08290 100644 --- a/utils.ml +++ b/utils.ml @@ -9,16 +9,36 @@ THEN DEFINE HALFWORDSIZE = 32 DEFINE INTSIZE = 63 DEFINE HALFINTSIZE = 31 + DEFINE HALF_MAX_INT = 2305843009213693951 ELSE DEFINE WORDSIZE = 32 DEFINE HALFWORDSIZE = 16 DEFINE INTSIZE = 31 DEFINE HALFINTSIZE = 15 + DEFINE HALF_MAX_INT = 536870911 END -DEFINE ROTATEHALF (x) = (((x) lsl HALFINTSIZE) lor ((x) lsr HALFINTSIZE)) -DEFINE HASHINT2 (x,y) = ((((x) lsl 16)+((x) lsl 8)-(x))+(y)) -DEFINE HASHINT3 (x,y,z) = (((((x) lsl 16)+((x) lsl 8)-(x))+(y))*65599+(z)) +(* x+65599*y, as in Hashtbl.hash *) + +DEFINE HASHINT2 (x,y) = ((x) + ( ((y) lsl 16) + ((y) lsl 8) - (y))) +DEFINE HASHINT3 (x,y,z) = (HASHINT2(HASHINT2(x,y),z)) +DEFINE HASHINT4 (x,y,z,t) = (HASHINT2((HASHINT2(HASHINT2(x,y),z)),t)) + +DEFINE PRIME1 = 7 +DEFINE PRIME2 = 19 +DEFINE PRIME3 = 83 +DEFINE PRIME4 = 223 +DEFINE PRIME5 = 491 +DEFINE PRIME6 = 733 +DEFINE PRIME7 = 1009 +DEFINE PRIME8 = 4093 +DEFINE PRIME9 = 65599 (* Magic Constant used for hashing *) + +DEFINE SMALL_H_SIZE = PRIME2 +DEFINE MED_H_SIZE = PRIME5 +DEFINE BIG_H_SIZE = PRIME8 + + END (* IFNDEF UTILS__ML__ *) diff --git a/xPath.ml b/xPath.ml index 3fbfacf..bef0336 100644 --- a/xPath.ml +++ b/xPath.ml @@ -4,7 +4,6 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) -INCLUDE "debug.ml";; #load "pa_extend.cmo";; let contains = ref None module Ast = @@ -216,26 +215,27 @@ end module Compile = struct open Ast +type transition = Ata.State.t*TagSet.t*Ata.Transition.t -type config = { st_root : Ata.state; (* state matching the root element (initial state) *) - st_univ : Ata.state; (* universal state accepting anything *) - st_from_root : Ata.state; (* state chaining the root and the current position *) - mutable final_state : Ptset.t; +type config = { st_root : Ata.State.t; (* state matching the root element (initial state) *) + st_univ : Ata.State.t; (* universal state accepting anything *) + st_from_root : Ata.State.t; (* state chaining the root and the current position *) + mutable final_state : Ata.StateSet.t; mutable has_backward: bool; (* To store transitions *) (* Key is the from state, (i,l) -> i the number of the step and l the list of trs *) - tr_parent_loop : (Ata.state,int*(Ata.transition list)) Hashtbl.t; - tr : (Ata.state,int*(Ata.transition list)) Hashtbl.t; - tr_aux : (Ata.state,int*(Ata.transition list)) Hashtbl.t; - mutable entry_points : (Tag.t*Ptset.t) list; + tr_parent_loop : (Ata.State.t,int*(transition list)) Hashtbl.t; + tr : (Ata.State.t,int*(transition list)) Hashtbl.t; + tr_aux : (Ata.State.t,int*(transition list)) Hashtbl.t; + mutable entry_points : (Tag.t*Ata.StateSet.t) list; mutable contains : string option; - mutable univ_states : Ata.state list; - mutable starstate : Ptset.t option; + mutable univ_states : Ata.State.t list; + mutable starstate : Ata.StateSet.t option; } let dummy_conf = { st_root = -1; st_univ = -1; st_from_root = -1; - final_state = Ptset.empty; + final_state = Ata.StateSet.empty; has_backward = false; tr_parent_loop = Hashtbl.create 0; tr = Hashtbl.create 0; @@ -261,45 +261,14 @@ let _l = | `LLeft -> `LLeft -open Ata.Transitions +open Ata.Transition.Infix +open Ata.Formula.Infix -let add_trans num htr ((q,_,_,_,_) as tr) = - try - let (i,ltr) = Hashtbl.find htr q in - if List.exists (Ata.equal_trans tr) ltr - then () - else Hashtbl.replace htr q (i,(tr::ltr)) - with - | Not_found -> Hashtbl.add htr q (num,[tr]) +(* Todo : fix *) +let add_trans num htr ((q,ts,_)as tr) = + Hashtbl.add htr q (num,[tr]) -exception Exit of Ata.state * Ata.transition list -let rec replace s f = - match f.Ata.pos with - | Ata.Atom(_,b,q) when q = s -> if b then Ata.true_ else Ata.false_ - | Ata.Or(f1,f2) -> (replace s f1) +| (replace s f2) - | Ata.And(f1,f2) -> (replace s f1) *& (replace s f2) - | _ -> f - - -let or_self conf old_dst q_src q_dst dir test pred mark = - try - let (num,l) = Hashtbl.find conf.tr q_src in - let l2 = List.fold_left (fun acc (q,t,m,f,_) -> - (q, - TagSet.cap t test, - mark, - (if mark then replace old_dst f else f) - *& pred *& - (if mark then Ata.true_ else (_l dir) ** q_dst), - false)::acc) - l l - in Hashtbl.replace conf.tr q_src (num,l2) - with Not_found -> () - - -let nst = Ata.mk_state -let att_or_str = TagSet.add Tag.pcdata TagSet.attribute let vpush x y = (x,[]) :: y let hpush x y = match y with @@ -322,7 +291,7 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num st_univ = q_univ; st_from_root = q_frm_root } = conf in - let q_dst = Ata.mk_state() in + let q_dst = Ata.State.make() in let p_st, p_anc, p_par, p_pre, p_num, p_f = compile_pred conf q_src num ctx_path dir pred q_dst in @@ -330,45 +299,47 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num match axis with | Child | Descendant -> if (TagSet.is_finite test) - then conf.entry_points <- (TagSet.choose test,Ptset.singleton q_src)::conf.entry_points; + then conf.entry_points <- (TagSet.choose test,Ata.StateSet.singleton q_src)::conf.entry_points; let left,right = if nrec then `LLeft,`RRight else `Left,`Right in let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star - then conf.starstate <- Some(Ptset.singleton q_src) + then conf.starstate <- Some(Ata.StateSet.singleton q_src) in - let t1 = ?< q_src><(test, is_last && not(ex))>=> - p_f *& ( if is_last then Ata.true_ else (_l left) ** q_dst) in + let t1,ldst = ?< q_src><(test, is_last && not(ex))>=> + p_f *& ( if is_last then Ata.Formula.true_ else (_l left) *+ q_dst), + ( if is_last then [] else [q_dst]) + in - let _ = add_trans num conf.tr t1 in - - + let _ = add_trans num conf.tr t1 in let _ = if axis=Descendant then add_trans num conf.tr_aux ( ?< q_src><@ ((if ex||nrec then TagSet.diff TagSet.star test - else TagSet.star),false)>=> `LLeft ** q_src ) + else TagSet.star),false)>=> + (if TagSet.equal test TagSet.star then + `Left else `LLeft) *+ q_src ) in let t3 = ?< q_src><@ ((if ex then TagSet.diff TagSet.any test else TagSet.any), false)>=> - if ex then right ** q_src - else (if axis=Descendant then `RRight else `Right) ** q_src + (if axis=Descendant && (not (TagSet.equal test TagSet.star)) then + `RRight else `Right) *+ q_src in let _ = add_trans num conf.tr_aux t3 in - [q_dst], q_dst, + ldst, q_dst, (if axis = FollowingSibling then hpush q_src ctx_path else vpush q_src ctx_path) | Attribute -> - let q_dstreal = Ata.mk_state() in + let q_dstreal = Ata.State.make() in (* attributes are always the first child *) let t1 = ?< q_src><(TagSet.attribute,false)>=> - `Left ** q_dst in + `Left *+ q_dst in let t2 = ?< q_dst><(test, is_last && not(existential))>=> - if is_last then Ata.true_ else `Left ** q_dstreal in - let tsa = ?< q_dst><(TagSet.star, false)>=> `Right ** q_dst + if is_last then Ata.Formula.true_ else `Left *+ q_dstreal in + let tsa = ?< q_dst><(TagSet.star, false)>=> `Right *+ q_dst in add_trans num conf.tr t1; add_trans num conf.tr_aux t2; @@ -376,38 +347,11 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num [q_dst;q_dstreal], q_dstreal, ctx_path - | Ancestor | AncestorOrSelf -> - conf.has_backward <- true; - let up_states, new_ctx = - List.fold_left (fun acc (q,_) -> if q == q_root then acc else q::acc) [] ctx_path, (vpush q_root []) - in - let _ = if axis = AncestorOrSelf then - or_self conf q_src (fst(vpop ctx_path)) q_dst dir test p_f (is_last && not(existential)); - in - let fc = List.fold_left (fun f s -> ((_l dir)**s +|f)) Ata.false_ up_states - in - let t1 = ?< q_frm_root><(test,is_last && (not existential) )>=> - ( (*if is_last then Ata.true_ else *) (`LLeft ) ** q_dst) *& fc in - add_trans num conf.tr t1; - [q_dst ], q_dst, vpush q_frm_root new_ctx - - | Parent -> - conf.has_backward <- true; - let q_self,new_ctx = - match ctx_path with - | (a,_)::[] -> a, vpush q_root [] - | (a,_)::r -> a, r - | _ -> assert false - in - let t1 = ?< q_frm_root>< (test,is_last && (not existential)) >=> - (if is_last then Ata.true_ else (_l dir) ** q_dst) *& (_l dir) ** q_self in - add_trans num conf.tr t1; - [ q_dst ], q_dst, vpush q_frm_root new_ctx | _ -> assert false in - (* todo change everything to Ptset *) - (Ptset.elements (Ptset.union p_st (Ptset.from_list new_st)), + (* todo change everything to Ata.StateSet *) + (Ata.StateSet.elements (Ata.StateSet.union p_st (Ata.StateSet.from_list new_st)), new_dst, new_ctx) and is_rec = function @@ -423,16 +367,16 @@ and compile_path ?(existential=false) annot_path config q_src states idx ctx_pat let add_states,new_dst,new_ctx = compile_step ~existential:existential config a_dst dir ctx_path (is_rec a_isrec) step num in - let new_states = Ptset.union (Ptset.from_list add_states) a_st in + let new_states = Ata.StateSet.union (Ata.StateSet.from_list add_states) a_st in let nanc_st,npar_st,npre_st,new_bw = match step with - |PrecedingSibling,_,_ -> anc_st,par_st,Ptset.add a_dst pre_st,true - |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ptset.add a_dst anc_st,par_st,pre_st,true + |PrecedingSibling,_,_ -> anc_st,par_st,Ata.StateSet.add a_dst pre_st,true + |(Parent|Ancestor|AncestorOrSelf),_,_ -> Ata.StateSet.add a_dst anc_st,par_st,pre_st,true | _ -> anc_st,par_st,pre_st,has_backward in new_states,new_dst,nanc_st,npar_st,npre_st,new_ctx, num+1,new_bw,(match a_isrec with [] -> [] | _::r -> r) ) - (states, q_src, Ptset.empty,Ptset.empty,Ptset.empty, ctx_path,idx, false,(List.tl annot_path) ) + (states, q_src, Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty, ctx_path,idx, false,(List.tl annot_path) ) annot_path and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = @@ -441,10 +385,10 @@ and binop_ conf q_src idx ctx_path dir pred p1 p2 f ddst = let a_st2,anc_st2,par_st2,pre_st2,idx2,f2 = compile_pred conf q_src idx1 ctx_path dir p2 ddst in - Ptset.union a_st1 a_st2, - Ptset.union anc_st1 anc_st2, - Ptset.union par_st1 par_st2, - Ptset.union pre_st1 pre_st2, + Ata.StateSet.union a_st1 a_st2, + Ata.StateSet.union anc_st1 anc_st2, + Ata.StateSet.union par_st1 par_st2, + Ata.StateSet.union pre_st1 pre_st2, idx2, (f f1 f2) and compile_pred conf q_src idx ctx_path dir pred qdst = @@ -453,16 +397,16 @@ and compile_pred conf q_src idx ctx_path dir pred qdst = binop_ conf q_src idx ctx_path dir pred p1 p2 (( +| )) qdst | And(p1,p2) -> binop_ conf q_src idx ctx_path dir pred p1 p2 (( *& )) qdst - | Expr e -> compile_expr conf Ptset.empty q_src idx ctx_path dir e qdst + | Expr e -> compile_expr conf Ata.StateSet.empty q_src idx ctx_path dir e qdst | Not(p) -> let a_st,anc_st,par_st,pre_st,idx,f = compile_pred conf q_src idx ctx_path dir p qdst - in a_st,anc_st,par_st,pre_st,idx, Ata.not_ f + in a_st,anc_st,par_st,pre_st,idx, Ata.Formula.not_ f and compile_expr conf states q_src idx ctx_path dir e qdst = match e with | Path (p) -> - let q = Ata.mk_state () in + let q = Ata.State.make () in let annot_path = match p with Relative(r) -> dirannot (List.rev r) | _ -> assert false in let a_st,a_dst,anc_st,par_st,pre_st,_,idx,has_backward,_ = compile_path ~existential:true annot_path conf q states idx ctx_path @@ -472,12 +416,12 @@ and compile_expr conf states q_src idx ctx_path dir e qdst = | _ -> `Left in let _ = match annot_path with - | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ptset.add qdst conf.final_state + | (((Parent|Ancestor|AncestorOrSelf),_,_),_)::_ -> conf.final_state <- Ata.StateSet.add qdst conf.final_state | _ -> () in let _ = conf.univ_states <- a_dst::conf.univ_states in - (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) ** q)) - | True -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.true_ - | False -> states,Ptset.empty,Ptset.empty,Ptset.empty,idx,Ata.false_ + (a_st,anc_st,par_st,pre_st,idx, ((ret_dir) *+ q)) + | True -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.true_ + | False -> states,Ata.StateSet.empty,Ata.StateSet.empty,Ata.StateSet.empty,idx,Ata.Formula.false_ | _ -> assert false @@ -487,7 +431,7 @@ and dirannot = function | p::(((FollowingSibling),_,_)::_ as l) -> (p,`Right)::(dirannot l) | p::l -> (p,`Left) :: (dirannot l) -let compile path = +let compile ?(querystring="") path = let steps = match path with | Absolute(steps) @@ -496,11 +440,10 @@ let compile path = in let steps = List.rev steps in let dirsteps = dirannot steps in - let _ = Ata.mk_state() in - let config = { st_root = Ata.mk_state(); - st_univ = Ata.mk_state(); - final_state = Ptset.empty; - st_from_root = Ata.mk_state(); + let config = { st_root = Ata.State.make(); + st_univ = Ata.State.make(); + final_state = Ata.StateSet.empty; + st_from_root = Ata.State.make(); has_backward = false; tr_parent_loop = Hashtbl.create 5; tr = Hashtbl.create 5; @@ -511,8 +454,8 @@ let compile path = starstate = None; } in - let q0 = Ata.mk_state() in - let states = Ptset.from_list [config.st_univ;config.st_root] + let q0 = Ata.State.make() in + let states = Ata.StateSet.from_list [config.st_univ;config.st_root] in let num = 0 in (* add_trans num config.tr_aux (mk_star config.st_from_root `Left config.st_univ config.st_from_root); @@ -524,39 +467,37 @@ let compile path = in let fst_tr = ?< (config.st_root) >< (TagSet.singleton (Tag.tag ""),false) >=> - ((if is_rec dirsteps then `LLeft else `Left)** q0) *& (if config.has_backward then `LLeft ** config.st_from_root else Ata.true_) + ((if is_rec dirsteps then `LLeft else `Left)*+ q0) *& (if config.has_backward then `LLeft *+ config.st_from_root else Ata.Formula.true_) in add_trans num config.tr fst_tr; if config.has_backward then begin add_trans num config.tr_aux - (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft ** config.st_from_root); + (?< (config.st_from_root) >< (TagSet.star,false) >=> `LLeft *+ config.st_from_root); add_trans num config.tr_aux (?< (config.st_from_root) >< (TagSet.any,false) >=> - `RRight ** config.st_from_root); + `RRight *+ config.st_from_root); end; let phi = Hashtbl.create 37 in - let fadd = fun _ (_,l) -> List.iter (fun (s,t,m,f,p) -> + let fadd = fun _ (_,l) -> List.iter (fun (s,t,tr) -> let lt = try Hashtbl.find phi s - with Not_found -> [] + with Not_found -> [] in - Hashtbl.replace phi s ((t,(m,f,p))::lt) + Hashtbl.replace phi s ((t,tr)::lt) ) l in Hashtbl.iter (fadd) config.tr; Hashtbl.iter (fadd) config.tr_aux; Hashtbl.iter (fadd) config.tr_parent_loop; let final = - let s = Ptset.union anc_st (Ptset.from_list []) - in if has_backward then Ptset.add config.st_from_root s else s + let s = anc_st + in if has_backward then Ata.StateSet.add config.st_from_root s else s in { Ata.id = Oo.id (object end); - Ata.states = Hashtbl.fold (fun q _ acc -> Ptset.add q acc) phi Ptset.empty; - Ata.init = Ptset.singleton config.st_root; - Ata.final = Ptset.union anc_st config.final_state; - Ata.universal = Ptset.add a_dst (Ptset.from_list config.univ_states); - Ata.phi = phi; - Ata.sigma = Hashtbl.create 17; + Ata.states = Hashtbl.fold (fun q _ acc -> Ata.StateSet.add q acc) phi Ata.StateSet.empty; + Ata.init = Ata.StateSet.singleton config.st_root; + Ata.trans = phi; Ata.starstate = config.starstate; + Ata.query_string = querystring; },config.entry_points,!contains diff --git a/xPath.mli b/xPath.mli index 3ecf868..6d8d8a7 100644 --- a/xPath.mli +++ b/xPath.mli @@ -35,5 +35,5 @@ sig end module Compile : sig -val compile : Ast.path -> 'a Ata.t * (Tag.t*Ptset.t) list * string option +val compile : ?querystring:string -> Ast.path -> 'a Ata.t * (Tag.t*Ata.StateSet.t) list * string option end -- 2.17.1