+let benchmark_fsns t = benchmark_fsns t.doc
+
+
+
+
+
+
+
+let text_size tree = inode (snd ( tree_doc_ids tree root ))
+
+let text_get_text t (x:[`Text] node) =
+ if x == nulldoc then ""
+ else text_get_text t x
+
+
+
+
+module HPtset = Hashtbl.Make(Ptset.Int)
+
+let vector_htbl = HPtset.create MED_H_SIZE
+
+let ptset_to_vector s =
+ try
+ HPtset.find vector_htbl s
+ with
+ Not_found ->
+ let v = unordered_set_alloc (Ptset.Int.cardinal s) in
+ let _ = Ptset.Int.iter (fun e -> unordered_set_insert v e) s in
+ HPtset.add vector_htbl s v; v
+
+
+
+let subtree_size t i = tree_subtree_size t.doc i
+let subtree_elements t i = tree_subtree_elements t.doc i
+let text_size t = text_size t.doc
+
+module MemUnion = Hashtbl.Make (struct
+ type t = Ptset.Int.t*Ptset.Int.t
+ let equal (x,y) (z,t) = x == z || y == t
+ let equal a b = equal a b || equal b a
+ let hash (x,y) = (* commutative hash *)
+ let x = Ptset.Int.uid x
+ and y = Ptset.Int.uid y
+ in
+ if x <= y then HASHINT2(x,y) else HASHINT2(y,x)
+ end)
+
+module MemAdd = Hashtbl.Make (
+ struct
+ type t = Tag.t*Ptset.Int.t
+ let equal (x,y) (z,t) = (x == z)&&(y == t)
+ let hash (x,y) = HASHINT2(x,Ptset.Int.uid y)
+ end)
+
+let collect_tags tree =
+ let _ = Printf.eprintf "Collecting Tags\n%!" in
+(* let h_union = MemUnion.create BIG_H_SIZE in
+ let pt_cup s1 s2 =
+ try
+ MemUnion.find h_union (s1,s2)
+ with
+ | Not_found -> let s = Ptset.Int.union s1 s2
+ in
+ MemUnion.add h_union (s1,s2) s;s
+ in
+ let h_add = MemAdd.create BIG_H_SIZE in
+ let pt_add t s =
+ try MemAdd.find h_add (t,s)
+ with
+ | Not_found -> let r = Ptset.Int.add t s in
+ MemAdd.add h_add (t,s) r;r
+ in *)
+ let pt_cup = Ptset.Int.union in
+ let pt_add = Ptset.Int.add in
+ let h = Hashtbl.create BIG_H_SIZE in
+ let update t sc sb ss sa =
+ let schild,sbelow,ssibling,safter =
+ try
+ Hashtbl.find h t
+ with
+ | Not_found ->
+ (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
+ in
+ Hashtbl.replace h t
+ (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
+ in
+ let rec loop right id acc_after =
+ if id == nil
+ then Ptset.Int.empty,Ptset.Int.empty,acc_after else
+ let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
+ let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
+ let tag = tree_tag tree id in
+ update tag child1 desc1 sibling2 after2;
+ ( pt_add tag sibling2,
+ pt_add tag (pt_cup desc1 desc2),
+ if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
+ in
+ let _ = loop false (tree_root tree) Ptset.Int.empty in
+ let _ = Printf.eprintf "Finished\n%!" in
+ h
+
+
+
+
+let contains_array = ref [| |]
+let contains_index = Hashtbl.create 4096
+let in_array _ i =
+ try
+ Hashtbl.find contains_index i
+ with
+ Not_found -> false
+
+let init_textfun f t s =
+ let a = match f with
+ | `CONTAINS -> text_contains t.doc s
+ | `STARTSWITH -> text_prefix t.doc s
+ | `ENDSWITH -> text_suffix t.doc s
+ | `EQUALS -> text_equals t.doc s
+ in
+ (*Array.fast_sort (compare) a; *)
+ contains_array := a;
+ Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
+
+let count_contains t s = text_count_contains t.doc s
+
+let init_naive_contains t s =
+ let i,j = tree_doc_ids t.doc (tree_root t.doc)
+ in
+ let regexp = Str.regexp_string s in
+ let matching arg =
+ try
+ let _ = Str.search_forward regexp arg 0;
+ in true
+ with _ -> false
+ in
+ let rec loop n acc l =
+ if n >= j then acc,l
+ else
+ let s = text_get_text t.doc n