From ac8720a930e5757a72f73e80aff7a988b0082831 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 28 Apr 2010 06:46:34 +0000 Subject: [PATCH] Fixed nasty garbage collector setting git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@809 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- Makefile | 9 +- ata.ml | 302 ++++++++++++++++++++++++++++++++++++++++++++++-------- ata.mli | 4 + main.ml | 25 ++--- ptset.ml | 127 +++++++++++++++-------- ptset.mli | 16 ++- results.c | 4 +- tree.ml | 4 +- tree.mli | 2 +- utils.ml | 8 +- 10 files changed, 389 insertions(+), 112 deletions(-) diff --git a/Makefile b/Makefile index 88620eb..5cb08a9 100644 --- a/Makefile +++ b/Makefile @@ -85,7 +85,7 @@ all: main #-ccopt -gp -p main: libcamlshredder.a $(MLOBJS) @echo [LINK] $@ - $(HIDE) $(OCAMLFIND) $(LINK) -o main -package "$(OCAMLPACKAGES)" $(SYNTAX) -cclib \ + $(HIDE) $(OCAMLFIND) $(LINK) -o main -package "$(OCAMLPACKAGES)" $(SYNTAX) -cclib \ "$(LIBS) ./libcamlshredder.a" $(MLOBJS) unit_test: libcamlshredder.a $(BASEOBJS) unit_test.cmx @@ -108,9 +108,10 @@ unit_test: libcamlshredder.a $(BASEOBJS) unit_test.cmx @echo [OCAMLOPT] $@ $(HIDE) $(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $< -ata.cmx: ata.ml - @echo [OCAMLOPTPROF] $@ - $(HIDE) $(OCAMLFIND) $(OCAMLOPT) -S -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $< +# ata.cmx: ata.ml +# @echo [OCAMLOPTPROF] $@ +# $(HIDE) $(OCAMLFIND) $(OCAMLOPT) -S -ccopt -gp -p -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $< + .mli.cmi: @echo [OCAMLOPT] $@ diff --git a/ata.ml b/ata.ml index 9197cbc..a291367 100644 --- a/ata.ml +++ b/ata.ml @@ -13,8 +13,9 @@ sig end = struct type t = int - let make = let id = ref ~-1 in - fun () -> incr id; !id + let make = + let id = ref ~-1 in + fun () -> incr id; !id let compare = (-) let equal = (==) @@ -25,8 +26,25 @@ struct if x < 0 then failwith (Printf.sprintf "State: Assertion %i < 0 failed" x) end -module StateSet = Ptset.Int - +module StateSet = +struct + include Ptset.Make ( struct type t = int + type data = t + external hash : t -> int = "%identity" + external uid : t -> Uid.t = "%identity" + external equal : t -> t -> bool = "%eq" + external make : t -> int = "%identity" + external node : t -> int = "%identity" + external with_id : Uid.t -> t = "%identity" + end + ) + let print ppf s = + Format.pp_print_string ppf "{ "; + iter (fun i -> Format.fprintf ppf "%i " i) s; + Format.pp_print_string ppf "}"; + Format.pp_print_flush ppf () +end + module Formula = struct type 'hcons expr = @@ -221,8 +239,6 @@ module Transition = struct end -module TransTable = Hashtbl - module Formlist = struct include Hlist.Make(Transition) let print ppf fl = @@ -238,9 +254,9 @@ end type 'a t = { id : int; - mutable states : Ptset.Int.t; - init : Ptset.Int.t; - starstate : Ptset.Int.t option; + mutable states : StateSet.t; + init : StateSet.t; + starstate : StateSet.t option; (* Transitions of the Alternating automaton *) trans : (State.t,(TagSet.t*Transition.t) list) Hashtbl.t; query_string: string; @@ -454,7 +470,7 @@ let tags_of_state a q = else 0 let merge conf t res1 res2 = match conf with - NO -> 0 + | NO -> 0 | ONLY1 -> res1 | ONLY2 -> res2 | ONLY12 -> res1+res2 @@ -462,6 +478,10 @@ let tags_of_state a q = | MARK1 -> res1+1 | MARK2 -> res2+1 | MARK12 -> res1+res2+1 + let merge conf _ res1 res2 = + let conf = Obj.magic conf in + (conf lsr 2) + ((conf land 0b10) lsr 1)*res2 + (conf land 0b1)*res1 + let mk_quick_tag_loop _ sl ss tree tag = (); fun t ctx -> @@ -849,7 +869,7 @@ END type t = fun_tree array array let dummy = fun _ _ _ _ _ -> failwith "Uninitializd CodeCache" - let default_line = Array.create 256 dummy (* 256 = max_tag *) + let default_line = Array.create 1024 dummy (* 1024 = max_tag *) let create n = Array.create n default_line let init f = for i = 0 to (Array.length default_line) - 1 @@ -871,9 +891,6 @@ END end - let td_trans = CodeCache.create 10000 (* should be number of tags *number of states^2 - in the document *) - let empty_size n = let rec loop acc = function 0 -> acc | n -> loop (SList.cons StateSet.empty acc) (n-1) @@ -887,9 +904,9 @@ END type t = Obj.t array array array array let dummy_val = Obj.repr ((),2,()) - let default_line3 = Array.create 10000 dummy_val - let default_line2 = Array.create 10000 default_line3 - let default_line1 = Array.create 10000 default_line2 + let default_line3 = Array.create BIG_A_SIZE dummy_val + let default_line2 = Array.create BIG_A_SIZE default_line3 + let default_line1 = Array.create BIG_A_SIZE default_line2 let create n = Array.create n default_line1 @@ -905,18 +922,18 @@ END if e == default then let ne = Array.copy e in (set tab idx ne;ne) else e - + let add h tag fl s1 s2 (data: SList.t*bool*(merge_conf array)) = let l1 = get_replace h tag default_line1 in let l2 = get_replace l1 (Uid.to_int fl.Formlistlist.Node.id) default_line2 in - let l3 = get_replace l2 (Uid.to_int s1.SList.Node.id) default_line3 in + let l3 = get_replace l2 (Uid.to_int s1.SList.Node.id) default_line3 in set l3 (Uid.to_int s2.SList.Node.id) (Obj.repr data) end - let h_fold2 = Fold2Res.create 256 + - let top_down ?(noright=false) a tree t slist ctx slot_size = + let top_down ?(noright=false) a tree t slist ctx slot_size td_trans h_fold2= let pempty = empty_size slot_size in let rempty = Array.make slot_size RS.empty in (* evaluation starts from the right so we put sl1,res1 at the end *) @@ -1139,15 +1156,218 @@ END let _ = CodeCache.init mk_trans in (if noright then loop_no_right else loop) t ctx slist Tag.dummy - let run_top_down a tree = - let init = SList.cons a.init SList.nil in - let _,res = top_down a tree Tree.root init Tree.root 1 - in - D_IGNORE_( - output_trace a tree "trace.html" - (RS.fold (fun t a -> IntSet.add (Tree.id tree t) a) res.(0) IntSet.empty), + + let run_top_down a tree = + let init = SList.cons a.init SList.nil in + let _,res = top_down a tree Tree.root init Tree.root 1 (CodeCache.create BIG_A_SIZE) (Fold2Res.create 1024) + in + D_IGNORE_( + output_trace a tree "trace.html" + (RS.fold (fun t a -> IntSet.add (Tree.id tree t) a) res.(0) IntSet.empty), res.(0)) - ;; + ;; + + + + + + module Code3Cache = + struct + let get = Array.unsafe_get + let set = Array.set + + type fun_tree = [`Tree] Tree.node -> [`Tree] Tree.node -> StateSet.t -> Tag.t -> StateSet.t*RS.t + type t = fun_tree array array + + let dummy = fun _ _ _ _ -> failwith "Uninitializd Code3Cache" + let default_line = Array.create 1024 dummy (* 256 = max_tag *) + let create n = Array.create n default_line + let init f = + for i = 0 to (Array.length default_line) - 1 + do + default_line.(i) <- f + done + + let get_fun h slist tag = + get (get h (Uid.to_int slist.StateSet.Node.id)) tag + + let set_fun (h : t) slist tag (data : fun_tree) = + let tab = get h (Uid.to_int slist.StateSet.Node.id) in + let line = if tab == default_line then + let x = Array.copy tab in + (set h (Uid.to_int slist.StateSet.Node.id) x;x) + else tab + in + set line tag data + + end + + + + module Fold3Res = struct + let get = Array.unsafe_get + let set = Array.set + external field1 : Obj.t -> int = "%field1" + type t = Obj.t array array array array + let dummy_val = Obj.repr ((),2,()) + + let default_line3 = Array.create 1024 dummy_val + let default_line2 = Array.create BIG_A_SIZE default_line3 + let default_line1 = Array.create BIG_A_SIZE default_line2 + + let create n = Array.create n default_line1 + + let find h tag fl s1 s2 : StateSet.t*bool*merge_conf = + let l1 = get h (Uid.to_int fl.Formlist.Node.id) in + let l2 = get l1 (Uid.to_int s1.StateSet.Node.id) in + let l3 = get l2 (Uid.to_int s2.StateSet.Node.id) in + Obj.magic (get l3 tag) + + let is_valid b = b != (Obj.magic dummy_val) + let get_replace tab idx default = + let e = get tab idx in + if e == default then + let ne = Array.copy e in (set tab idx ne;ne) + else e + + let add h tag fl s1 s2 (data: StateSet.t*bool*merge_conf) = + let l1 = get_replace h (Uid.to_int fl.Formlist.Node.id) default_line1 in + let l2 = get_replace l1 (Uid.to_int s1.StateSet.Node.id) default_line2 in + let l3 = get_replace l2 (Uid.to_int s2.StateSet.Node.id) default_line3 in + set l3 tag (Obj.repr data) + end + + + let empty_res = StateSet.empty,RS.empty + + let top_down1 a tree t slist ctx td_trans h_fold2 = + (* evaluation starts from the right so we put sl1,res1 at the end *) + let eval_fold2_slist fll t tag (sl2,res2) (sl1,res1) = + let data = Fold3Res.find h_fold2 tag fll sl1 sl2 in + if Fold3Res.is_valid data then + let r,b,conf = data in + (r,if b then RS.merge conf t res1 res2 else RS.empty) + else + let r,conf = eval_formlist tag sl1 sl2 fll in + let b = conf <> NO in + (Fold3Res.add h_fold2 tag fll sl1 sl2 (r,b,conf); + (r, if b then RS.merge conf t res1 res2 else RS.empty)) + + in + let rec loop t ctx slist _ = + if t == Tree.nil then empty_res else + let tag = Tree.tag tree t in + (Code3Cache.get_fun td_trans slist tag) t ctx slist tag + + and loop_tag t ctx slist tag = + if t == Tree.nil then empty_res else + (Code3Cache.get_fun td_trans slist tag) t ctx slist tag + + and mk_trans t ctx slist tag = + let fl_list,llist,rlist,ca,da,sa,fa = + StateSet.fold + (fun q acc -> + List.fold_left + (fun ((fl_acc,ll_acc,rl_acc,c_acc,d_acc,s_acc,f_acc) as acc) + (ts,t) -> + if (TagSet.mem tag ts) + then + let _,_,_,f,_ = t.Transition.node in + let (child,desc,below),(sibl,foll,after) = Formula.st f in + (Formlist.cons t fl_acc, + StateSet.union ll_acc below, + StateSet.union rl_acc after, + StateSet.union child c_acc, + StateSet.union desc d_acc, + StateSet.union sibl s_acc, + StateSet.union foll f_acc) + else acc ) acc ( + try Hashtbl.find a.trans q + with + Not_found -> Printf.eprintf "Looking for state %i, doesn't exist!!!\n%!" + q;[] + ) + + ) slist (Formlist.nil,StateSet.empty,StateSet.empty, + StateSet.empty,StateSet.empty,StateSet.empty,StateSet.empty) + + in + (* Logic to chose the first and next function *) + let tags_child,tags_below,tags_siblings,tags_after = Tree.tags tree tag in + let d_f = Algebra.decide a tags_child tags_below (StateSet.union ca da) true in + let d_n = Algebra.decide a tags_siblings tags_after (StateSet.union sa fa) false in + let f_kind,first = choose_jump_down tree d_f + and n_kind,next = choose_jump_next tree d_n in + + let cont = + match f_kind,n_kind with + | `NIL,`NIL -> + fun t _ _ tag -> eval_fold2_slist fl_list t tag empty_res empty_res + + | _,`NIL -> ( + match f_kind with + |`TAG(tag1) -> + (fun t _ _ tag -> eval_fold2_slist fl_list t tag empty_res + (loop_tag (first t) t llist tag1)) + | _ -> + fun t _ _ tag -> eval_fold2_slist fl_list t tag empty_res + (loop (first t) t llist tag) + ) + | `NIL,_ -> ( + match n_kind with + |`TAG(tag2) -> + fun t ctx _ tag -> + eval_fold2_slist fl_list t tag + (loop_tag (next t ctx) ctx rlist tag2) + empty_res + + | _ -> + fun t ctx _ tag -> + eval_fold2_slist fl_list t tag + (loop (next t ctx) ctx rlist tag) + empty_res + + ) + + | `TAG(tag1),`TAG(tag2) -> + fun t ctx _ tag -> + eval_fold2_slist fl_list t tag + (loop_tag (next t ctx) ctx rlist tag2) + (loop_tag (first t) t llist tag1) + + | `TAG(tag1),`ANY -> + fun t ctx _ tag -> + eval_fold2_slist fl_list t tag + (loop (next t ctx) ctx rlist tag) + (loop_tag (first t) t llist tag1) + + | `ANY,`TAG(tag2) -> + fun t ctx _ tag -> + eval_fold2_slist fl_list t tag + (loop_tag (next t ctx) ctx rlist tag2) + (loop (first t) t llist tag) + + + | _,_ -> + fun t ctx _ tag -> + eval_fold2_slist fl_list t tag + (loop (next t ctx) ctx rlist tag) + (loop (first t) t llist tag) + + + + in + Code3Cache.set_fun td_trans slist tag cont; + cont t ctx slist tag + in + let _ = Code3Cache.init mk_trans in + loop t ctx slist Tag.dummy + + + let run_top_down1 a tree = + let _,res = top_down1 a tree Tree.root a.init Tree.root (Code3Cache.create BIG_A_SIZE) (Fold3Res.create BIG_A_SIZE) + in + res module Configuration = struct @@ -1165,7 +1385,7 @@ END if Ptss.mem s c.sets then { c with results = IMap.add s (RS.concat r (IMap.find s c.results)) c.results} else - { hash = HASHINT2(c.hash,Uid.to_int (Ptset.Int.uid s)); + { hash = HASHINT2(c.hash,Uid.to_int s.StateSet.Node.id); sets = Ptss.add s c.sets; results = IMap.add s r c.results } @@ -1199,7 +1419,7 @@ END in let h,s = Ptss.fold - (fun s (ah,ass) -> (HASHINT2(ah, Uid.to_int (Ptset.Int.uid s)), + (fun s (ah,ass) -> (HASHINT2(ah, Uid.to_int s.StateSet.Node.id ), Ptss.add s ass)) (Ptss.union c1.sets c2.sets) (0,Ptss.empty) in @@ -1223,8 +1443,8 @@ END Hashtbl.find h_fold key with Not_found -> let res = - if dir then eval_formlist tag s Ptset.Int.empty formlist - else eval_formlist tag Ptset.Int.empty s formlist + if dir then eval_formlist tag s StateSet.empty formlist + else eval_formlist tag StateSet.empty s formlist in (Hashtbl.add h_fold key res;res) in let (rb,rb1,rb2,mark) = bool_of_merge mcnf in @@ -1297,7 +1517,7 @@ END let slist = SList.rev (slist) in let newconf = fold_f_conf tree parent slist fl_list conf dir in let accu,newconf = Configuration.IMap.fold (fun s res (ar,nc) -> - if Ptset.Int.intersect s init then + if StateSet.intersect s init then ( RS.concat res ar ,nc) else (ar,Configuration.add nc s res)) (newconf.Configuration.results) (accu,Configuration.empty) @@ -1314,8 +1534,8 @@ END | Not_found -> let res = Hashtbl.fold (fun q l acc -> if List.exists (fun (ts,_) -> TagSet.mem tag ts) l - then Ptset.Int.add q acc - else acc) a.trans Ptset.Int.empty + then StateSet.add q acc + else acc) a.trans StateSet.empty in Hashtbl.add h_tdconf tag res;res in (* let _ = pr ", among "; @@ -1323,7 +1543,7 @@ END pr "\n%!"; in *) let r = SList.cons r SList.nil in - let set,res = top_down (~noright:noright) a tree t r t 1 in + let set,res = top_down (~noright:noright) a tree t r t 1 (CodeCache.create BIG_A_SIZE) (Fold2Res.create 1024) in let set = match SList.node set with | SList.Cons(x,_) ->x | _ -> assert false @@ -1371,9 +1591,10 @@ END end - let top_down_count a t = let module RI = Run(Integer) in let r = Integer.length (RI.run_top_down a t) - in (*RI.TransCache.dump RI.td_trans; *)r - let top_down a t = let module RI = Run(IdSet) in (RI.run_top_down a t) + let top_down_count a t = let module RI = Run(Integer) in Integer.length (RI.run_top_down a t) + let top_down_count1 a t = let module RI = Run(Integer) in Integer.length (RI.run_top_down1 a t) + let top_down a t = let module RI = Run(IdSet) in (RI.run_top_down a t) + let top_down1 a t = let module RI = Run(IdSet) in (RI.run_top_down1 a t) let bottom_up_count a t k = let module RI = Run(Integer) in Integer.length (RI.run_bottom_up a t k) let bottom_up a t k = let module RI = Run(IdSet) in (RI.run_bottom_up a t k) @@ -1381,5 +1602,6 @@ END struct module Results = GResult(Doc) let top_down a t = let module R = Run(Results) in (R.run_top_down a t) + let top_down1 a t = let module R = Run(Results) in (R.run_top_down1 a t) end diff --git a/ata.mli b/ata.mli index 125e34b..3af6907 100644 --- a/ata.mli +++ b/ata.mli @@ -101,7 +101,10 @@ module IdSet : ResultSet module GResult (Doc : sig val doc : Tree.t end) : ResultSet val top_down_count : 'a t -> Tree.t -> int +val top_down_count1 : 'a t -> Tree.t -> int val top_down : 'a t -> Tree.t -> IdSet.t +val top_down1 : 'a t -> Tree.t -> IdSet.t + val bottom_up_count : 'a t -> Tree.t -> [> `CONTAINS of 'b | `TAG of Tag.t ] -> int val bottom_up : @@ -111,4 +114,5 @@ module Test (Doc : sig val doc : Tree.t end ) : sig module Results : ResultSet val top_down : 'a t -> Tree.t -> Results.t + val top_down1 : 'a t -> Tree.t -> Results.t end diff --git a/main.ml b/main.ml index 5a7f120..1e145d1 100644 --- a/main.ml +++ b/main.ml @@ -9,10 +9,12 @@ open Ata INCLUDE "utils.ml" let () = init_timer();; -let enabled_gc = Gc.get() -let disabled_gc = { Gc.get() with - Gc.max_overhead = 1000000; - Gc.space_overhead = 100 } +let default_gc = Gc.get() +let tuned_gc = { Gc.get() with + Gc.minor_heap_size = 1024*1024; + Gc.major_heap_increment = 1024*1024; + Gc.max_overhead = 1000000; + } let hash x = 131*x/(x-1+1) let test_loop tree tag = @@ -95,9 +97,9 @@ let main v query_string output = let _ = Printf.eprintf "Number of nodes %i\n%!" (Tree.size v) in (* let _ = test_text v in *) (* let _ = Tree.stats v in *) - let _ = Printf.eprintf "Timing first_child/next_sibling %!" in - let _ = time ~count:0 (Tree.benchmark_fcns) v in (* - let _ = Printf.eprintf "Timing last_child/prev_sibling %!" in +(* let _ = Printf.eprintf "Timing first_child/next_sibling %!" in *) +(* let _ = time ~count:1 (Tree.benchmark_fcns) v in *) + (* let _ = Printf.eprintf "Timing last_child/prev_sibling %!" in let _ = time (Tree.benchmark_lcps) v in let _ = Printf.eprintf "Timing jump to a %!" in let _ = time (Tree.benchmark_jump v) (Tag.tag "a") in @@ -165,7 +167,7 @@ let main v query_string output = begin let _ = Gc.full_major();Gc.compact() in let _ = Printf.eprintf "%!" in - let _ = Gc.set (disabled_gc) in + let _ = Gc.set (tuned_gc) in if !Options.backward && ((snd test_list) != `NOTHING )then if !Options.count_only then let r = time_mem (bottom_up_count auto v )(snd test_list) in @@ -195,12 +197,12 @@ let main v query_string output = if !Options.backward then Printf.eprintf "WARNING: couldn't find a jumping point, running top-down\n" in if !Options.count_only then - let r = time ~count:5 ( top_down_count auto ) v in + let r = time ~count:5 ( top_down_count1 auto ) v in let _ = Printf.eprintf "Number of nodes in the result set : %i\n%!" r in () else let module GR = Ata.Test(struct let doc = v end) in - let result = time_mem (GR.top_down auto) v in + let result = time ~count:5 (GR.top_down1 auto) v in let _ = Printf.eprintf "Counting results " in let rcount = time (GR.Results.length) result in Printf.eprintf "Number of nodes in the result set : %i\n" rcount; @@ -233,8 +235,7 @@ let main v query_string output = ) result) (); end; end; - let _ = Gc.set enabled_gc in - Printf.eprintf "Total running time : %fms\n%!" (total_time()) + Printf.eprintf "Total running time : %fms\n%!" (total_time()) ;; Options.parse_cmdline();; diff --git a/ptset.ml b/ptset.ml index 87e7506..befb42e 100644 --- a/ptset.ml +++ b/ptset.ml @@ -8,8 +8,45 @@ INCLUDE "utils.ml" module type S = sig - include Set.S - type data + type elt + + type 'a node + module rec Node : sig + include Hcons.S with type data = Data.t + end + and Data : sig + include + Hashtbl.HashedType with type t = Node.t node + end + type data = Data.t + type t = Node.t + + + val empty : t + val is_empty : t -> bool + val mem : elt -> t -> bool + val add : elt -> t -> t + val singleton : elt -> t + val remove : elt -> t -> t + val union : t -> t -> t + val inter : t -> t -> t + val diff : t -> t -> t + val compare : t -> t -> int + val equal : t -> t -> bool + val subset : t -> t -> bool + val iter : (elt -> unit) -> t -> unit + val fold : (elt -> 'a -> 'a) -> t -> 'a -> 'a + val for_all : (elt -> bool) -> t -> bool + val exists : (elt -> bool) -> t -> bool + val filter : (elt -> bool) -> t -> t + val partition : (elt -> bool) -> t -> t * t + val cardinal : t -> int + val elements : t -> elt list + val min_elt : t -> elt + val max_elt : t -> elt + val choose : t -> elt + val split : elt -> t -> t * bool * t + val intersect : t -> t -> bool val is_singleton : t -> bool val mem_union : t -> t -> t @@ -31,39 +68,39 @@ struct | Leaf of elt | Branch of int * int * 'a * 'a - module rec HNode : Hcons.S with type data = Node.t = Hcons.Make (Node) - and Node : Hashtbl.HashedType with type t = HNode.t node = + module rec Node : Hcons.S with type data = Data.t = Hcons.Make (Data) + and Data : Hashtbl.HashedType with type t = Node.t node = struct - type t = HNode.t node + type t = Node.t node let equal x y = match x,y with | Empty,Empty -> true | Leaf k1, Leaf k2 -> k1 == k2 | Branch(b1,i1,l1,r1),Branch(b2,i2,l2,r2) -> b1 == b2 && i1 == i2 && - (HNode.equal l1 l2) && - (HNode.equal r1 r2) + (Node.equal l1 l2) && + (Node.equal r1 r2) | _ -> false let hash = function | Empty -> 0 | Leaf i -> HASHINT2(HALF_MAX_INT,Uid.to_int (H.uid i)) - | Branch (b,i,l,r) -> HASHINT4(b,i,Uid.to_int l.HNode.id, Uid.to_int r.HNode.id) + | Branch (b,i,l,r) -> HASHINT4(b,i,Uid.to_int l.Node.id, Uid.to_int r.Node.id) end - ;; - - type t = HNode.t - type data = t node - let hash = HNode.hash - let uid = HNode.uid - let make = HNode.make + + type data = Data.t + type t = Node.t + + let hash = Node.hash + let uid = Node.uid + let make = Node.make let node _ = failwith "node" - let empty = HNode.make Empty + let empty = Node.make Empty - let is_empty s = (HNode.node s) == Empty + let is_empty s = (Node.node s) == Empty - let branch p m l r = HNode.make (Branch(p,m,l,r)) + let branch p m l r = Node.make (Branch(p,m,l,r)) - let leaf k = HNode.make (Leaf k) + let leaf k = Node.make (Leaf k) (* To enforce the invariant that a branch contains two non empty sub-trees *) let branch_ne p m t0 t1 = @@ -77,29 +114,29 @@ struct let singleton k = leaf k let is_singleton n = - match HNode.node n with Leaf _ -> true + match Node.node n with Leaf _ -> true | _ -> false let mem (k:elt) n = let kid = Uid.to_int (H.uid k) in - let rec loop n = match HNode.node n with + let rec loop n = match Node.node n with | Empty -> false | Leaf j -> k == j | Branch (p, _, l, r) -> if kid <= p then loop l else loop r in loop n - let rec min_elt n = match HNode.node n with + let rec min_elt n = match Node.node n with | Empty -> raise Not_found | Leaf k -> k | Branch (_,_,s,_) -> min_elt s - let rec max_elt n = match HNode.node n with + let rec max_elt n = match Node.node n with | Empty -> raise Not_found | Leaf k -> k | Branch (_,_,_,t) -> max_elt t let elements s = - let rec elements_aux acc n = match HNode.node n with + let rec elements_aux acc n = match Node.node n with | Empty -> acc | Leaf k -> k :: acc | Branch (_,_,l,r) -> elements_aux (elements_aux acc r) l @@ -145,7 +182,7 @@ END let add k t = let kid = Uid.to_int (H.uid k) in - let rec ins n = match HNode.node n with + let rec ins n = match Node.node n with | Empty -> leaf k | Leaf j -> if j == k then n else join kid (leaf k) (Uid.to_int (H.uid j)) n | Branch (p,m,t0,t1) -> @@ -161,7 +198,7 @@ END let remove k t = let kid = Uid.to_int(H.uid k) in - let rec rmv n = match HNode.node n with + let rec rmv n = match Node.node n with | Empty -> empty | Leaf j -> if k == j then empty else n | Branch (p,m,t0,t1) -> @@ -177,15 +214,15 @@ END (* should run in O(1) thanks to Hash consing *) - let equal a b = HNode.equal a b + let equal a b = Node.equal a b - let compare a b = (Uid.to_int (HNode.uid a)) - (Uid.to_int (HNode.uid b)) + let compare a b = (Uid.to_int (Node.uid a)) - (Uid.to_int (Node.uid b)) let rec merge s t = if (equal s t) (* This is cheap thanks to hash-consing *) then s else - match HNode.node s, HNode.node t with + match Node.node s, Node.node t with | Empty, _ -> t | _, Empty -> s | Leaf k, _ -> add k t @@ -211,7 +248,7 @@ END let rec subset s1 s2 = (equal s1 s2) || - match (HNode.node s1,HNode.node s2) with + match (Node.node s1,Node.node s2) with | Empty, _ -> true | _, Empty -> false | Leaf k1, _ -> mem k1 s2 @@ -237,8 +274,8 @@ END let equal (x,y) (z,t) = (equal x z)&&(equal y t) let equal a b = equal a b || equal b a let hash (x,y) = (* commutative hash *) - let x = HNode.hash x - and y = HNode.hash y + let x = Node.hash x + and y = Node.hash y in if x < y then HASHINT2(x,y) else HASHINT2(y,x) end) @@ -254,7 +291,7 @@ END if equal s1 s2 then s1 else - match (HNode.node s1,HNode.node s2) with + match (Node.node s1,Node.node s2) with | Empty, _ -> empty | _, Empty -> empty | Leaf k1, _ -> if mem k1 s2 then s1 else empty @@ -273,7 +310,7 @@ END if equal s1 s2 then empty else - match (HNode.node s1,HNode.node s2) with + match (Node.node s1,Node.node s2) with | Empty, _ -> empty | _, Empty -> s1 | Leaf k1, _ -> if mem k1 s2 then empty else s1 @@ -296,46 +333,46 @@ END [exists], [filter], [partition], [choose], [elements]) are implemented as for any other kind of binary trees. *) -let rec cardinal n = match HNode.node n with +let rec cardinal n = match Node.node n with | Empty -> 0 | Leaf _ -> 1 | Branch (_,_,t0,t1) -> cardinal t0 + cardinal t1 -let rec iter f n = match HNode.node n with +let rec iter f n = match Node.node n with | Empty -> () | Leaf k -> f k | Branch (_,_,t0,t1) -> iter f t0; iter f t1 -let rec fold f s accu = match HNode.node s with +let rec fold f s accu = match Node.node s with | Empty -> accu | Leaf k -> f k accu | Branch (_,_,t0,t1) -> fold f t0 (fold f t1 accu) -let rec for_all p n = match HNode.node n with +let rec for_all p n = match Node.node n with | Empty -> true | Leaf k -> p k | Branch (_,_,t0,t1) -> for_all p t0 && for_all p t1 -let rec exists p n = match HNode.node n with +let rec exists p n = match Node.node n with | Empty -> false | Leaf k -> p k | Branch (_,_,t0,t1) -> exists p t0 || exists p t1 -let rec filter pr n = match HNode.node n with +let rec filter pr n = match Node.node n with | Empty -> empty | Leaf k -> if pr k then n else empty | Branch (p,m,t0,t1) -> branch_ne p m (filter pr t0) (filter pr t1) let partition p s = - let rec part (t,f as acc) n = match HNode.node n with + let rec part (t,f as acc) n = match Node.node n with | Empty -> acc | Leaf k -> if p k then (add k t, f) else (t, add k f) | Branch (_,_,t0,t1) -> part (part acc t0) t1 in part (empty, empty) s -let rec choose n = match HNode.node n with +let rec choose n = match Node.node n with | Empty -> raise Not_found | Leaf k -> k | Branch (_, _,t0,_) -> choose t0 (* we know that [t0] is non-empty *) @@ -352,7 +389,7 @@ let split x s = (*s Additional functions w.r.t to [Set.S]. *) let rec intersect s1 s2 = (equal s1 s2) || - match (HNode.node s1,HNode.node s2) with + match (Node.node s1,Node.node s2) with | Empty, _ -> false | _, Empty -> false | Leaf k1, _ -> mem k1 s2 @@ -369,14 +406,14 @@ let rec intersect s1 s2 = (equal s1 s2) || -let rec uncons n = match HNode.node n with +let rec uncons n = match Node.node n with | Empty -> raise Not_found | Leaf k -> (k,empty) | Branch (p,m,s,t) -> let h,ns = uncons s in h,branch_ne p m ns t let from_list l = List.fold_left (fun acc e -> add e acc) empty l -let with_id = HNode.with_id +let with_id = Node.with_id end module Int : sig diff --git a/ptset.mli b/ptset.mli index 477acc3..27b6332 100644 --- a/ptset.mli +++ b/ptset.mli @@ -26,8 +26,18 @@ module type S = sig type elt - type data - type t + + type 'a node + module rec Node : sig + include Hcons.S with type data = Data.t + end + and Data : sig + include + Hashtbl.HashedType with type t = Node.t node + end + type data = Data.t + type t = Node.t + val empty : t val is_empty : t -> bool val mem : elt -> t -> bool @@ -78,4 +88,4 @@ module Int : sig val print : Format.formatter -> t -> unit end -module Make ( H : Hcons.S ) : S with type elt = H.t +module Make ( H : Hcons.SA ) : S with type elt = H.t diff --git a/results.c b/results.c index 19480fd..82013cb 100644 --- a/results.c +++ b/results.c @@ -29,8 +29,8 @@ results createResults (int n) R.n = 2*n-1; R.lgn = lg(n); - fprintf(stderr,"Size of the result set : %i elements, %li kB\n", n, - (((R.n+W-1)/W)*sizeof(int)/1024)); + //fprintf(stderr,"Size of the result set : %i elements, %li kB\n", n, + //(((R.n+W-1)/W)*sizeof(int)/1024)); R.tree = (int*) malloc (((R.n+W-1)/W)*sizeof(int)); clearBit(R.tree,0); // clear all return R; diff --git a/tree.ml b/tree.ml index 4b7cb54..8e5dbc7 100644 --- a/tree.ml +++ b/tree.ml @@ -430,7 +430,7 @@ let load ?(sample=64) ?(load_text=true) str = let in_c = Unix.in_channel_of_descr fd in let _ = set_binary_mode_in in_c true in let load_table () = - (let ms = input_line in_c in if ms <> magic_string then failwith ("Invalid index file " ^ ms)); + (let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file"); (let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file"); let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t = Marshal.from_channel in_c @@ -450,7 +450,7 @@ let load ?(sample=64) ?(load_text=true) str = ntable in let _ = Printf.eprintf "\nLoading tag table : " in - let ntable = time ~count:0 load_table () in + let ntable = time (load_table) () in ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET); let tree = { doc = tree_load fd load_text sample; ttable = ntable;} diff --git a/tree.mli b/tree.mli index 280d97d..e4c60c8 100644 --- a/tree.mli +++ b/tree.mli @@ -11,7 +11,7 @@ val load : ?sample:int -> ?load_text:bool -> string -> t val tag_pool : t -> Tag.pool -type 'a node = private int +type 'a node type node_kind = [ `Tree | `Text ] val equal : [ `Tree ] node -> [ `Tree ] node -> bool val compare : [ `Tree ] node -> [ `Tree ] node -> int diff --git a/utils.ml b/utils.ml index 69754f5..36334f6 100644 --- a/utils.ml +++ b/utils.ml @@ -46,6 +46,9 @@ DEFINE SMALL_H_SIZE = PRIME2 DEFINE MED_H_SIZE = PRIME5 DEFINE BIG_H_SIZE = PRIME8 +DEFINE SMALL_A_SIZE = 128 +DEFINE MED_A_SIZE = 2048 +DEFINE BIG_A_SIZE = 8192 let read_procmem () = @@ -89,10 +92,9 @@ let time f ?(count=1) x = let r = f x in let t2 = Unix.gettimeofday () in let t = (1000. *. (t2 -. t1)) in - if i > count then (l:= t::!l;r) - else begin Printf.eprintf "run %i/%i, %fms\n%!" i count t; - loop (i+1) end + if i >= count then (l:= t::!l;r) + else loop (i+1) in loop 1 ;; let total_time () = List.fold_left (+.) 0. !l;; -- 2.17.1