From: kim Date: Tue, 24 Mar 2009 07:05:54 +0000 (+0000) Subject: . X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=6131f2f9e380543197c9fa253325bbc84749e6e8;p=SXSI%2Fxpathcomp.git . git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@286 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index 54fbe78..d31aa75 100644 --- a/Makefile +++ b/Makefile @@ -48,7 +48,7 @@ OCAMLOPT = ocamlopt -g -cc "$(CXX)" SYNT_DEBUG = -ppopt -DDEBUG else CXX = g++ -OCAMLOPT = ocamlopt -cc "$(CXX)" -ccopt -O3 -ccopt -std=c++0x -noassert -inline $(INLINE) +OCAMLOPT = ocamlopt -nodynlink -cc "$(CXX)" -ccopt -O3 -ccopt -std=c++0x -noassert -inline $(INLINE) endif ifeq ($(PROFILE), true) SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE @@ -101,7 +101,7 @@ libcamlshredder.a: $(CXXOBJECTS) XMLTree/XMLTree.a @echo [LIB] $@ $(HIDE) mkdir -p .libs/ $(HIDE) cd .libs/ && ar x ../XMLTree/XMLTree.a - $(HIDE) $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) ./.libs/*.o $(LIBS) + $(HIDE) $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) .libs/*.o $(LIBS) $(HIDE) rm -rf .libs clean: diff --git a/ata.ml b/ata.ml index 1ba9c40..425fad6 100644 --- a/ata.ml +++ b/ata.ml @@ -6,18 +6,27 @@ let gen_id = module TS = struct - type t = Nil | Cons of Tree.t * t | Concat of t*t + type t = Nil + | Sing of Tree.t + | Cons of Tree.t*t + | ConsCat of Tree.t * t * t + | Concat of t*t let empty = Nil let cons e t = Cons(e,t) - let concat t1 t2 = Concat (t1,t2) - let append e t = Concat(t,Cons(e,Nil)) + let concat t1 t2 = Concat(t1,t2) + let append e t = Concat(t,Sing(e)) + + + let fold f l acc = let rec loop acc = function | Nil -> acc - | Cons(e,t) -> loop (f e acc) t - | Concat(t1,t2) -> loop (loop acc t1) t2 + | Sing e -> f e acc + | Cons (e,t) -> loop (f e acc) t + | ConsCat (e,t1,t2) -> loop (loop (f e acc) t1) t2 + | Concat (t1,t2) -> loop (loop acc t1) t2 in loop acc l @@ -27,8 +36,11 @@ let gen_id = let iter f l = let rec loop = function | Nil -> () - | Cons(e,t) -> let _ = f e in loop t - | Concat(t1,t2) -> let _ = loop t1 in loop t2 + | Sing e -> f e + | Cons (e,t) -> f e; loop t + | ConsCat(e,t1,t2) -> + f e; loop t1; loop t2 + | Concat(t1,t2) -> loop t1;loop t2 in loop l end @@ -73,14 +85,14 @@ and formula = { fid: int; } external hash_const_variant : [> ] -> int = "%identity" -external int_bool : bool -> int = "%identity" +external vb : bool -> int = "%identity" let hash_node_form t = match t with | False -> 0 | True -> 1 | And(f1,f2) -> (2+17*f1.fkey + 37*f2.fkey) (*land max_int *) | Or(f1,f2) -> (3+101*f1.fkey + 253*f2.fkey) (*land max_int *) - | Atom(v,b,s) -> ((hash_const_variant v) + (3846*(int_bool b) +257) + (s lsl 13 - s)) (*land max_int *) + | Atom(v,b,s) -> ((hash_const_variant v) + (3846*(vb b) +257) + (s lsl 13 - s)) (*land max_int *) module FormNode = @@ -203,181 +215,36 @@ struct let hash = k_hash end -module HTagSet = -struct - type key = Ptset.t*Tag.t - let equal (s1,s2) (t1,t2) = (s2 == t2) && Ptset.equal s1 t1 - let hash (s,t) = ((Ptset.hash s)) lsl 31 lxor (Tag.hash t) - -type 'a t = - { mutable size: int; (* number of elements *) - mutable data: (key,'a) bucketlist array } (* the buckets *) - -and ('a, 'b) bucketlist = - Empty - | Cons of 'a * 'b * ('a, 'b) bucketlist - -let create initial_size = - let s = min (max 1 initial_size) Sys.max_array_length in - { size = 0; data = Array.make s Empty } - -let clear h = - for i = 0 to Array.length h.data - 1 do - h.data.(i) <- Empty - done; - h.size <- 0 - -let copy h = - { size = h.size; - data = Array.copy h.data } - -let length h = h.size - -let resize tbl = - let odata = tbl.data in - let osize = Array.length odata in - let nsize = min (2 * osize + 1) Sys.max_array_length in - if nsize <> osize then begin - let ndata = Array.create nsize Empty in - let rec insert_bucket = function - Empty -> () - | Cons(key, data, rest) -> - insert_bucket rest; (* preserve original order of elements *) - let nidx = (hash key) mod nsize in - ndata.(nidx) <- Cons(key, data, ndata.(nidx)) in - for i = 0 to osize - 1 do - insert_bucket odata.(i) - done; - tbl.data <- ndata; - end - -let add h key info = - let i = (hash key) mod (Array.length h.data) in - let bucket = Cons(key, info, h.data.(i)) in - h.data.(i) <- bucket; - h.size <- succ h.size; - if h.size > Array.length h.data lsl 1 then resize h - -let remove h key = - let rec remove_bucket = function - Empty -> - Empty - | Cons(k, i, next) -> - if equal k key - then begin h.size <- pred h.size; next end - else Cons(k, i, remove_bucket next) in - let i = (hash key) mod (Array.length h.data) in - h.data.(i) <- remove_bucket h.data.(i) - -let rec find_rec key = function - Empty -> - raise Not_found - | Cons(k, d, rest) -> - if equal key k then d else find_rec key rest - -let find h key = - match h.data.((hash key) mod (Array.length h.data)) with - Empty -> raise Not_found - | Cons(k1, d1, rest1) -> - if equal key k1 then d1 else - match rest1 with - Empty -> raise Not_found - | Cons(k2, d2, rest2) -> - if equal key k2 then d2 else - match rest2 with - Empty -> raise Not_found - | Cons(k3, d3, rest3) -> - if equal key k3 then d3 else find_rec key rest3 - -let find_all h key = - let rec find_in_bucket = function - Empty -> - [] - | Cons(k, d, rest) -> - if equal k key - then d :: find_in_bucket rest - else find_in_bucket rest in - find_in_bucket h.data.((hash key) mod (Array.length h.data)) - -let replace h key info = - let rec replace_bucket = function - Empty -> - raise Not_found - | Cons(k, i, next) -> - if equal k key - then Cons(k, info, next) - else Cons(k, i, replace_bucket next) in - let i = (hash key) mod (Array.length h.data) in - let l = h.data.(i) in - try - h.data.(i) <- replace_bucket l - with Not_found -> - h.data.(i) <- Cons(key, info, l); - h.size <- succ h.size; - if h.size > Array.length h.data lsl 1 then resize h - -let mem h key = - let rec mem_in_bucket = function - | Empty -> - false - | Cons(k, d, rest) -> - equal k key || mem_in_bucket rest in - mem_in_bucket h.data.((hash key) mod (Array.length h.data)) - -let iter f h = - let rec do_bucket = function - Empty -> - () - | Cons(k, d, rest) -> - f k d; do_bucket rest in - let d = h.data in - for i = 0 to Array.length d - 1 do - do_bucket d.(i) - done - -let fold f h init = - let rec do_bucket b accu = - match b with - Empty -> - accu - | Cons(k, d, rest) -> - do_bucket rest (f k d accu) in - let d = h.data in - let accu = ref init in - for i = 0 to Array.length d - 1 do - accu := do_bucket d.(i) !accu - done; - !accu - - -end - - - - - - - - - - - - +module HTagSet = Hashtbl.Make(HTagSetKey) type dispatch = { first : Tree.t -> Tree.t; flabel : string; next : Tree.t -> Tree.t -> Tree.t; nlabel : string; + consres : Tree.t -> TS.t -> TS.t -> bool -> bool -> TS.t } + +type formlist = Nil | Cons of state*formula*int*formlist + +let f_hash (h,s,t) = h * 41+((Ptset.hash s) lsl 10 ) lxor (Ptset.hash t)*4097 +module HFormlistKey = +struct + type t = int*Ptset.t*Ptset.t + let equal (h1,s1,t1) (h2,s2,t2) = h1==h2 && s1 == s2 && t1 == t2 + let hash = f_hash +end +module HFormlist = Hashtbl.Make (HFormlistKey) + type t = { id : int; mutable states : Ptset.t; init : Ptset.t; mutable final : Ptset.t; universal : Ptset.t; + starstate : Ptset.t option; (* Transitions of the Alternating automaton *) phi : (state,(TagSet.t*(bool*formula*bool)) list) Hashtbl.t; - sigma : (dispatch*bool*formula) HTagSet.t; + sigma : (dispatch*bool*formlist*Ptset.t*Ptset.t) HTagSet.t; } module Pair (X : Set.OrderedType) (Y : Set.OrderedType) = @@ -442,7 +309,6 @@ type t = { pl2 acc ) pl1 PL.empty - and dnf f = try Hashtbl.find dnf_hash f.fid @@ -492,21 +358,31 @@ type t = { Format.fprintf ppf "\n")l; Format.fprintf ppf "NFA transitions :\n------------------------------\n"; - HTagSet.iter (fun (qs,t) (disp,b,f) -> - pr_st ppf (Ptset.elements qs); - Format.fprintf ppf ",%s %s " (Tag.to_string t) (if b then "=>" else "->"); - pr_frm ppf f; - Format.fprintf ppf "(fid=%i) left=" f.fid; - let (l,ll,_),(r,rr,_) = f.st in - pr_st ppf (Ptset.elements l); - Format.fprintf ppf ", "; - pr_st ppf (Ptset.elements ll); +(* HTagSet.iter (fun (qs,t) (disp,b,_,flist,_,_) -> + let (ls,lls,_),(rs,rrs,_) = + List.fold_left (fun ((a1,b1,c1),(a2,b2,c2)) (_,f) -> + let (x1,y1,z1),(x2,y2,z2) = f.st in + ((Ptset.union x1 a1),(Ptset.union y1 b1),(Ptset.union c1 z1)), + ((Ptset.union x2 a2),(Ptset.union y2 b2),(Ptset.union c2 z2))) + ((Ptset.empty,Ptset.empty,Ptset.empty), + (Ptset.empty,Ptset.empty,Ptset.empty)) + flist + in + pr_st ppf (Ptset.elements qs); + Format.fprintf ppf ",%s %s " (Tag.to_string t) (if b then "=>" else "->"); + List.iter (fun (q,f) -> + Format.fprintf ppf "\n%i," q; + pr_frm ppf f) flist; + Format.fprintf ppf "\nleft="; + pr_st ppf (Ptset.elements ls); + Format.fprintf ppf " , "; + pr_st ppf (Ptset.elements lls); Format.fprintf ppf ", right="; - pr_st ppf (Ptset.elements r); + pr_st ppf (Ptset.elements rs); Format.fprintf ppf ", "; - pr_st ppf (Ptset.elements rr); - Format.fprintf ppf ", first=%s, next=%s\n" disp.flabel disp.nlabel; - ) a.sigma; + pr_st ppf (Ptset.elements rrs); + Format.fprintf ppf ", first=%s, next=%s\n\n" disp.flabel disp.nlabel; + ) a.sigma; *) Format.fprintf ppf "=======================================\n%!" module Transitions = struct @@ -544,14 +420,20 @@ type t = { (* test some inlining *) | True -> true,true,true | False -> false,false,false - | Atom((`Left|`LLeft),b,q) -> if b == (Ptset.mem q s1) then (true,true,false) else false,false,false - | Atom(_,b,q) -> if b == (Ptset.mem q s2) then (true,false,true) else false,false,false | _ -> try - HFEval.find hfeval (f.fid,s1,s2) + HFEval.find hfeval (f.fid,s1,s2) with - | Not_found -> let r = + | Not_found -> let r = match f.pos with + | Atom((`Left|`LLeft),b,q) -> + if b == (Ptset.mem q s1) + then (true,true,false) + else false,false,false + | Atom(_,b,q) -> + if b == (Ptset.mem q s2) + then (true,false,true) + else false,false,false | Or(f1,f2) -> let b1,rl1,rr1 = eval f1 in @@ -577,18 +459,32 @@ type t = { in eval f - let fstate_pool = Hashtbl.create 11 + let h_formlist = HFormlist.create 511 + + let form_list_fold_left f acc fl = + let rec loop acc fl = + match fl with + | Nil -> acc + | Cons(s,frm,h,fll) -> loop (f acc s frm h) fll + in + loop acc fl + - let merge_pred a b = match a,b with - | Some(f1), Some(f2) -> Some(fun x -> f1 x || f2 x) - | None,None -> None - | None,Some(_) -> b - | Some(_),None -> a + let rec eval_formlist s1 s2 = function + | Nil -> Ptset.empty,false,false,false + | Cons(q,f,h,fl) -> + let k = (h,s1,s2) + in + try HFormlist.find h_formlist k + with + Not_found -> + let s,b',b1',b2' = eval_formlist s1 s2 fl in + let b,b1,b2 = eval_form_bool f s1 s2 in + let r = if b then (Ptset.add q s, b'||b, b1'||b1,b2'||b2) + else s,b',b1',b2' + in + HFormlist.add h_formlist k r;r - let acc_pred p l1 l2 = match p with - | `Left _ -> p::l1,l2 - | `Right _ -> l1,p::l2 - | _ -> l1,l2 @@ -612,25 +508,42 @@ type t = { else `Negative(TagSet.negative ts) + + let cons_res e s1 s2 b1 b2 = + if b1&&b2 then + if s2 == TS.Nil && s1 == TS.Nil + then TS.Sing e + else if s1 == TS.Nil + then TS.Cons (e,s2) + else if s2 == TS.Nil + then TS.Cons (e,s1) + else TS.ConsCat(e,s1,s2) + else if not(b1 || b2) + then TS.Sing e + else if b1 then if s1 == TS.Nil then TS.Sing e else TS.Cons(e,s1) + else if s2 = TS.Nil then TS.Sing e else TS.Cons(e,s2) + + let cat_res _ s1 s2 b1 b2 = + if b1&&b2 then if s1 == TS.Nil && s2 == TS.Nil then TS.Nil + else + if s1 == TS.Nil + then s2 + else + if s2 == TS.Nil then s1 else TS.Concat(s1,s2) + else if not(b1 || b2) + then TS.Nil + else if b1 then s1 + else s2 + let merge_trans t a tag q acc = - List.fold_left (fun (accf,accm,acchtrue) (ts,(m,f,pred)) -> + List.fold_left (fun (accf,accm,acchtrue,acchash) (ts,(m,f,pred)) -> if TagSet.mem tag ts then - let tmpf,hastrue = - if is_true f then - let newfinal = - try Hashtbl.find fstate_pool f.fid with - | Not_found -> let s = mk_state() in - a.states <- Ptset.add s a.states; - a.final <- Ptset.add s a.final; - Hashtbl.add fstate_pool f.fid s;s - in - (atom_ `Left true newfinal),true - else f,false in - (or_ tmpf accf,accm||m,acchtrue||hastrue) - else (accf,accm,acchtrue) + let acchash = acchash+31*f.fid+42*q in + (Cons(q,f,acchash,accf),accm||m,acchtrue||(is_true f),acchash) + else (accf,accm,acchtrue,acchash) ) acc (try Hashtbl.find a.phi q with Not_found -> []) let inter_text a b = @@ -642,26 +555,30 @@ type t = { let next_sibling_ctx x _ = Tree.next_sibling x let r_ignore _ x = x - - let get_trans t a tag r = try - let dispatch,mark,f = HTagSet.find a.sigma (r,tag) - in f.st,dispatch,f,mark,r with Not_found -> - let f,mark,_,accq = - Ptset.fold (fun q (accf,accm,acchtrue,accq) -> - let naccf,naccm,nacctrue = - merge_trans t a tag q (accf,accm,acchtrue ) + let fl,mark,_,_,accq = + Ptset.fold (fun q (accf,accm,acchtrue,acchash,accq) -> + let naccf,naccm,nacctrue,acchash = + merge_trans t a tag q (accf,accm,acchtrue,acchash ) in - if is_false naccf then (naccf,naccm,nacctrue,accq) - else (naccf,naccm,nacctrue,Ptset.add q accq) + (* if is_false naccf then (naccf,naccm,nacctrue,accq) + else *) (naccf,naccm,nacctrue,acchash,Ptset.add q accq) ) - r (false_,false,false,Ptset.empty) + r (Nil,false,false,17,Ptset.empty) in - let (ls,lls,_),(rs,rrs,_) = f.st in + let (ls,lls,llls),(rs,rrs,rrrs) = + form_list_fold_left (fun ((a1,b1,c1),(a2,b2,c2)) _ f _ -> + let (x1,y1,z1),(x2,y2,z2) = f.st in + ((Ptset.union x1 a1),(Ptset.union y1 b1),(Ptset.union c1 z1)), + ((Ptset.union x2 a2),(Ptset.union y2 b2),(Ptset.union c2 z2))) + ((Ptset.empty,Ptset.empty,Ptset.empty), + (Ptset.empty,Ptset.empty,Ptset.empty)) + fl + in let tb,ta = Tree.tags t tag in @@ -669,7 +586,21 @@ type t = { and tll,htllt,llfin = inter_text tb (tags a lls) and tr,htrt,rfin = inter_text ta (tags a rs) and trr,htrrt,rrfin = inter_text ta (tags a rrs) - in + in(* + let _ = + Format.fprintf Format.err_formatter "Tag %s, right_states " (Tag.to_string tag); + pr_st Format.err_formatter (Ptset.elements rs); + Format.fprintf Format.err_formatter " tags = "; + Ptset.iter (fun t -> Format.fprintf Format.err_formatter "%s " + (Tag.to_string t)) tr; + Format.fprintf Format.err_formatter ", next_states "; + pr_st Format.err_formatter (Ptset.elements rrs); + Format.fprintf Format.err_formatter " tags = "; + Ptset.iter (fun t -> Format.fprintf Format.err_formatter "%s " + (Tag.to_string t)) trr; + Format.fprintf Format.err_formatter "\n%!"; + + in*) let first,flabel = if (llfin && lfin) then (* no stars *) (if htlt || htllt then (Tree.text_below, "#text_below") @@ -711,65 +642,73 @@ type t = { else if htrt || htrrt then (Tree.next_sibling_ctx,"#next_sibling_ctx") else (Tree.node_sibling_ctx,"#node_sibling_ctx") in - let dispatch = { first = first; flabel = flabel; next = next; nlabel = nlabel} + let dispatch = { first = first; flabel = flabel; next = next; nlabel = nlabel; + consres = if mark then cons_res else cat_res } in - HTagSet.add a.sigma (accq,tag) (dispatch,mark,f); - f.st,dispatch,f,mark,accq + HTagSet.add a.sigma (accq,tag) (dispatch,mark,fl,llls,rrrs); + dispatch,mark,fl,llls,rrrs - let rec accepting_among a t orig ctx = - let rest = Ptset.inter orig a.universal in - let r = Ptset.diff orig rest in - if Ptset.is_empty r then rest,0,TS.empty else - if Tree.is_nil t - then orig,0,TS.empty - else - let ((_,_,llls),(_,_,rrrs)),dispatch,formula,mark,r' = - get_trans t a (Tree.tag t) r - in - let s1,n1,res1 = accepting_among a (dispatch.first t) llls t in - let s2,n2,res2 = accepting_among a (dispatch.next t ctx) rrrs ctx in - let rb,rb1,rb2 = eval_form_bool formula s1 s2 in - if rb - then - let n1,res1 = if rb1 then n1,res1 else 0,TS.empty - and n2,res2 = if rb2 then n2,res2 else 0,TS.empty - in - if mark - then r',1+n1+n2,TS.Cons(t,(TS.Concat(res1,res2))) - else r',n1+n2,TS.Concat(res1,res2) - else Ptset.empty,0,TS.empty - - let rec accepting_among_count a t orig ctx = - let rest = Ptset.inter orig a.universal in - let r = Ptset.diff orig rest in - if Ptset.is_empty r then rest,0 else - if Tree.is_node t - then - let ((_,_,llls),(_,_,rrrs)),dispatch,formula,mark,r' = - get_trans t a (Tree.tag t) r - in - let s1,res1 = accepting_among_count a (dispatch.first t) llls t - and s2,res2 = accepting_among_count a (dispatch.next t ctx) rrrs ctx - in - let rb,rb1,rb2 = eval_form_bool formula s1 s2 in - if rb - then - let res1 = if rb1 then res1 else 0 - and res2 = if rb2 then res2 else 0 - in r', if mark then 1+res1+res2 else res1+res2 - else Ptset.empty,0 - else orig,0 - - + + let rec accepting_among a t r ctx = + if Tree.is_nil t || Ptset.is_empty r then Ptset.empty,0,TS.Nil else + let dispatch,mark,flist,llls,rrrs = + get_trans t a (Tree.tag t) r + in + let s1,n1,res1 = accepting_among a (dispatch.first t) llls t in + let s2,n2,res2 = accepting_among a (dispatch.next t ctx) rrrs ctx in + let r',rb,rb1,rb2 = eval_formlist s1 s2 flist in + r',(vb rb)*((vb mark) + (vb rb1)* n1 + (vb rb2)*n2),if rb then + dispatch.consres t res1 res2 rb1 rb2 + else TS.Nil + let run a t = let st,n,res = accepting_among a t a.init t in if Ptset.is_empty (st) then TS.empty,0 else res,n - + + let rec accepting_among_count_no_star a t r ctx = + if Tree.is_nil t||Ptset.is_empty r then Ptset.empty,0 else + let dispatch,mark,flist,llls,rrrs = + get_trans t a (Tree.tag t) r + in + let s1,res1 = accepting_among_count_no_star a (dispatch.first t) llls t + and s2,res2 = accepting_among_count_no_star a (dispatch.next t ctx) rrrs ctx + in + let r',rb,rb1,rb2 = eval_formlist s1 s2 flist + in + r',(vb rb)*((vb mark) + (vb rb1)*res1 + (vb rb2)*res2) + + + + let rec accepting_among_count_star a t n = + if Tree.is_nil t then n else + if (Tree.tag t == Tag.attribute) + then accepting_among_count_star a (Tree.node_sibling t) n + else accepting_among_count_star a (Tree.node_sibling t) + (accepting_among_count_star a (Tree.node_child t) (1+n)) + + let rec accepting_among_count_may_star starstate a t r ctx = + if r == starstate then starstate,(accepting_among_count_star a t 0) + else + if Tree.is_nil t||Ptset.is_empty r then Ptset.empty,0 else + let dispatch,mark,flist,llls,rrrs = + get_trans t a (Tree.tag t) r + in + let s1,res1 = accepting_among_count_may_star starstate a (dispatch.first t) llls t + and s2,res2 = accepting_among_count_may_star starstate a (dispatch.next t ctx) rrrs ctx + in + let r',rb,rb1,rb2 = eval_formlist s1 s2 flist + in + r',(vb rb)*((vb mark) + (vb rb1)*res1 + (vb rb2)*res2) + let run_count a t = - let st,res = accepting_among_count a t a.init t in + + let st,res = match a.starstate with + | None -> accepting_among_count_no_star a t a.init t + | Some s -> accepting_among_count_may_star s a t a.init t + in if Ptset.is_empty (st) then 0 else res diff --git a/ata.mli b/ata.mli index cd6610b..4ec2f59 100644 --- a/ata.mli +++ b/ata.mli @@ -34,16 +34,21 @@ type dispatch = { first : Tree.t -> Tree.t; flabel : string; next : Tree.t -> Tree.t -> Tree.t; nlabel : string; + consres : Tree.t -> TS.t -> TS.t -> bool -> bool -> TS.t; } + +type formlist = Nil | Cons of state*formula*int*formlist + type t = { id : int; mutable states : Ptset.t; init : Ptset.t; mutable final : Ptset.t; universal : Ptset.t; + starstate : Ptset.t option; (* Transitions of the Alternating automaton *) phi : (state,(TagSet.t*(bool*formula*bool)) list) Hashtbl.t; - sigma : (dispatch*bool*formula) HTagSet.t; + sigma : (dispatch*bool*formlist*Ptset.t*Ptset.t) HTagSet.t; } val dump : Format.formatter -> t -> unit diff --git a/main.ml b/main.ml index aed4ebb..985a5b2 100644 --- a/main.ml +++ b/main.ml @@ -19,7 +19,10 @@ let time f x = r ;; let total_time () = List.fold_left (+.) 0. !l;; - +let enabled_gc = Gc.get() +let disabled_gc = { Gc.get() with + Gc.max_overhead = 1000000; + Gc.space_overhead = 100 } let main v query output = let _ = Tag.init (Tree.tag_pool v) in @@ -47,7 +50,7 @@ let main v query output = let r = Tree.count v s in Printf.eprintf "Global count is %i, using " r; - if r < 60000 then begin + if r < !Options.tc_threshold then begin Printf.eprintf "TextCollection contains\nCalling global contains : "; time (Tree.init_contains v) s end @@ -58,12 +61,14 @@ let main v query output = in Printf.eprintf "Execution time %s : " (if !Options.count_only then "(counting only)" else ""); begin + let _ = Gc.full_major();Gc.compact() in + let _ = Gc.set (disabled_gc) in if !Options.count_only then let r = time ( run_count auto )v in let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r in () else -(* let _ = Gc.set ({ Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 }) in *) + let result,rcount = time (if !Options.time then run_time auto else run auto) v in Printf.eprintf "Number of nodes in the result set : %i\n" rcount; Printf.eprintf "\n%!"; @@ -75,12 +80,13 @@ let main v query output = time( fun () -> let oc = open_out f in output_string oc "\n"; - TS.iter (fun t -> + TS.iter (fun t -> output_string oc "----------\n"; Tree.print_xml_fast oc t; output_char oc '\n') result) (); end; end; - let _ = Ata.dump Format.err_formatter auto in + let _ = Gc.set enabled_gc in +(* let _ = Ata.dump Format.err_formatter auto in *) Printf.eprintf "Total running time : %fms\n%!" (total_time()) ;; diff --git a/options.ml b/options.ml index ab25d7d..43f665f 100644 --- a/options.ml +++ b/options.ml @@ -1,6 +1,7 @@ let index_empty_texts = ref false let sample_factor = ref 64 let disable_text_collection = ref false +let tc_threshold = ref 60000 let query = ref "" let input_file = ref "" @@ -22,6 +23,7 @@ let anon_fun = let spec = [ "-c", Arg.Set(count_only), "counting only (don't materialize the result set)"; "-t", Arg.Set(time), "print timing statistics"; + "-max-tc", Arg.Set_int(tc_threshold), "set maximum count for which the TextCollection is used"; "-f", Arg.Set_int(sample_factor), "sample factor [default=64]"; "-i", Arg.Set(index_empty_texts), "index empty texts [default=false]"; "-d", Arg.Set(disable_text_collection), "disable text collection[default=false]"; diff --git a/options.mli b/options.mli index ebc5828..a7d870d 100644 --- a/options.mli +++ b/options.mli @@ -8,3 +8,4 @@ val input_file : string ref val output_file : string option ref val save_file : string ref val time : bool ref +val tc_threshold : int ref diff --git a/tests/test.xml b/tests/test.xml index 4411c44..35bd7d3 100644 --- a/tests/test.xml +++ b/tests/test.xml @@ -1,13 +1,10 @@ - - - - - + + diff --git a/tree.ml b/tree.ml index c968f10..74903de 100644 --- a/tree.ml +++ b/tree.ml @@ -168,16 +168,9 @@ module DocIdSet = struct let compare = compare_node end) end -let is_nil t = match t.node with - | Nil -> true - | Node(i) -> equal_node i nil - | _ -> false - -let is_node t = -match t.node with - | Node(i) -> not(equal_node i nil) - | _ -> false +let is_nil t = t.node == Nil +let is_node t = t.node != Nil let node_of_t t = let _ = Tag.init (Obj.magic t) in @@ -232,7 +225,7 @@ let compare a b = match a.node,b.node with let equal a b = (compare a b) == 0 -let norm (n : [`Tree ] node ) = if tree_is_nil n then Nil else Node (n) +let norm (n : [`Tree ] node ) = if n == -1 then Nil else Node (n) let nts = function Nil -> "Nil" @@ -279,7 +272,7 @@ let node_sibling n = | Node i -> { n with node= norm(tree_next_sibling n.doc i) } | _ -> { n with node = Nil } -let node_sibling_ctx n _ = +let node_sibling_ctx n _ = match n.node with | Node i -> { n with node= norm(tree_next_sibling n.doc i) } | _ -> { n with node = Nil } diff --git a/xPath.ml b/xPath.ml index 4d83634..27479be 100644 --- a/xPath.ml +++ b/xPath.ml @@ -230,6 +230,7 @@ type config = { st_root : Ata.state; (* state matching the root element (initial mutable entry_points : (Tag.t*Ptset.t) list; mutable contains : string option; mutable univ_states : Ata.state list; + mutable starstate : Ptset.t option; } let dummy_conf = { st_root = -1; st_univ = -1; @@ -242,6 +243,7 @@ let dummy_conf = { st_root = -1; entry_points = []; contains = None; univ_states = []; + starstate = None; } @@ -332,9 +334,11 @@ let rec compile_step ?(existential=false) conf q_src dir ctx_path nrec step num if nrec then `LLeft,`RRight else `Left,`Right in - + let _ = if is_last && axis=Descendant && TagSet.equal test TagSet.star + then conf.starstate <- Some(Ptset.singleton q_src) + in let t1 = ?< q_src><(test, is_last && not(ex))>=> - p_f *& ( if false (*is_last*) then Ata.true_ else (_l left) ** q_dst) in + p_f *& ( if is_last then Ata.true_ else (_l left) ** q_dst) in let _ = add_trans num conf.tr t1 in @@ -503,6 +507,7 @@ let compile path = entry_points = []; contains = None; univ_states = []; + starstate = None; } in let q0 = Ata.mk_state() in @@ -550,6 +555,7 @@ let compile path = Ata.universal = Ptset.add a_dst (Ptset.from_list config.univ_states); Ata.phi = phi; Ata.sigma = Ata.HTagSet.create 17; + Ata.starstate = config.starstate; },config.entry_points,!contains