}
extern "C" CAMLprim value caml_text_collection_count(value tree,value str){
CAMLparam2(tree,str);
- //uchar * cstr = (uchar *) String_val(str);
- NOT_IMPLEMENTED("text_collection_count");
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr))));
CAMLreturn (Val_unit);
}
let _ = Printf.eprintf "%!" in
let _ = match contains with
None -> ()
- | Some s -> Tree.Binary.init_contains v s
+ | Some s ->
+ let r = Tree.Binary.count v s
+ in
+ Printf.eprintf "Global count is %i, using " r;
+ if r < 60000 then begin
+ Printf.eprintf "TextCollection contains\nCalling global contains : ";
+ time (Tree.Binary.init_contains v) s
+ end
+ else begin
+ Printf.eprintf "Naive contains\nCalling global contains : ";
+ time (Tree.Binary.init_naive_contains v) s
+ end
in
Printf.eprintf "Execution time %s : " (if !Options.count_only then "(counting only)" else "");
begin
if !Options.count_only then
failwith "Count only not implemented in this version"
else
- let _ = Gc.set ({ Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 }) in
+ (* let _ = Gc.set ({ Gc.get() with Gc.max_overhead = 1000000; Gc.space_overhead = 100 }) in *)
let result = time (if !Options.time then run_time auto else run auto) v in
Printf.eprintf "Number of nodes in the result set : %i\n" (TS.length result);
Printf.eprintf "\n%!";
val print_id : Format.formatter -> t -> unit
val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit
val init_contains : t -> string -> unit
+ val init_naive_contains : t -> string -> unit
val mk_nil : t -> t
val test_jump : t -> Tag.t -> unit
end
Array.fast_sort (compare) a;
contains_array := a
+ let init_naive_contains t s =
+ let i,j = Tree.doc_ids t.doc (Tree.root t.doc)
+ in
+ let regexp = Str.regexp_string s in
+ let matching arg =
+ try
+ let _ = Str.search_forward regexp arg 0;
+ in true
+ with _ -> false
+ in
+ let rec loop n acc l =
+ if n >= j then acc,l
+ else
+ let s = (*Printf.eprintf "%i \n%!" n;*)Text.get_cached_text t.doc n
+ in
+ if matching s
+ then loop (n+1) (n::acc) (l+1)
+ else loop (n+1) acc l
+ in
+ let acc,l = loop i [] 0 in
+ let a = Array.create l Text.nil in
+ let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
+ in
+ contains_array := a
+
module DocIdSet = struct
| _ -> { t with node=Nil }
+ let last_idx = ref 0
let array_find a i j =
let l = Array.length a in
let rec loop idx x y =
if x > y || idx >= l then Text.nil
else
- if a.(idx) >= x then if a.(idx) > y then Text.nil else a.(idx)
+ if a.(idx) >= x then if a.(idx) > y then Text.nil else (last_idx := idx;a.(idx))
else loop (idx+1) x y
in
if a.(0) > j || a.(l-1) < i then Text.nil
- else loop 0 i j
+ else loop !last_idx i j
let text_below t =
val print_id : Format.formatter -> t -> unit
val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit
val init_contains : t -> string -> unit
+ val init_naive_contains : t -> string -> unit
val mk_nil : t -> t
val test_jump : t -> Tag.t -> unit
end