From 25a3fa55f6de1835d2407283eeb43b01819543f6 Mon Sep 17 00:00:00 2001 From: kim Date: Tue, 10 Mar 2009 01:42:27 +0000 Subject: [PATCH] Added naive contains git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@233 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 4 ++-- main.ml | 15 +++++++++++++-- tree.ml | 31 +++++++++++++++++++++++++++++-- tree.mli | 1 + 4 files changed, 45 insertions(+), 6 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index f56b4a3..d85be93 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -171,8 +171,8 @@ extern "C" CAMLprim value caml_text_collection_count_contains(value tree,value s } extern "C" CAMLprim value caml_text_collection_count(value tree,value str){ CAMLparam2(tree,str); - //uchar * cstr = (uchar *) String_val(str); - NOT_IMPLEMENTED("text_collection_count"); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr)))); CAMLreturn (Val_unit); } diff --git a/main.ml b/main.ml index c446ef3..8264278 100644 --- a/main.ml +++ b/main.ml @@ -43,14 +43,25 @@ let main v query output = let _ = Printf.eprintf "%!" in let _ = match contains with None -> () - | Some s -> Tree.Binary.init_contains v s + | Some s -> + let r = Tree.Binary.count v s + in + Printf.eprintf "Global count is %i, using " r; + if r < 60000 then begin + Printf.eprintf "TextCollection contains\nCalling global contains : "; + time (Tree.Binary.init_contains v) s + end + else begin + Printf.eprintf "Naive contains\nCalling global contains : "; + time (Tree.Binary.init_naive_contains v) s + end in Printf.eprintf "Execution time %s : " (if !Options.count_only then "(counting only)" else ""); begin if !Options.count_only then failwith "Count only not implemented in this version" else - let _ = Gc.set ({ Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 }) in + (* let _ = Gc.set ({ Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 }) in *) let result = time (if !Options.time then run_time auto else run auto) v in Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result); Printf.eprintf "\n%!"; diff --git a/tree.ml b/tree.ml index b19a3aa..cbb267a 100644 --- a/tree.ml +++ b/tree.ml @@ -60,6 +60,7 @@ sig val print_id : Format.formatter -> t -> unit val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit val init_contains : t -> string -> unit + val init_naive_contains : t -> string -> unit val mk_nil : t -> t val test_jump : t -> Tag.t -> unit end @@ -310,6 +311,31 @@ struct Array.fast_sort (compare) a; contains_array := a + let init_naive_contains t s = + let i,j = Tree.doc_ids t.doc (Tree.root t.doc) + in + let regexp = Str.regexp_string s in + let matching arg = + try + let _ = Str.search_forward regexp arg 0; + in true + with _ -> false + in + let rec loop n acc l = + if n >= j then acc,l + else + let s = (*Printf.eprintf "%i \n%!" n;*)Text.get_cached_text t.doc n + in + if matching s + then loop (n+1) (n::acc) (l+1) + else loop (n+1) acc l + in + let acc,l = loop i [] 0 in + let a = Array.create l Text.nil in + let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc + in + contains_array := a + module DocIdSet = struct @@ -558,16 +584,17 @@ struct | _ -> { t with node=Nil } + let last_idx = ref 0 let array_find a i j = let l = Array.length a in let rec loop idx x y = if x > y || idx >= l then Text.nil else - if a.(idx) >= x then if a.(idx) > y then Text.nil else a.(idx) + if a.(idx) >= x then if a.(idx) > y then Text.nil else (last_idx := idx;a.(idx)) else loop (idx+1) x y in if a.(0) > j || a.(l-1) < i then Text.nil - else loop 0 i j + else loop !last_idx i j let text_below t = diff --git a/tree.mli b/tree.mli index e9ef8c7..89bf83d 100644 --- a/tree.mli +++ b/tree.mli @@ -58,6 +58,7 @@ sig val print_id : Format.formatter -> t -> unit val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit val init_contains : t -> string -> unit + val init_naive_contains : t -> string -> unit val mk_nil : t -> t val test_jump : t -> Tag.t -> unit end -- 2.17.1