+let collect_tags tree =
+ let _ = Printf.eprintf "Collecting Tags\n%!" in
+ let h_union = MemUnion.create BIG_H_SIZE in
+ let pt_cup s1 s2 =
+ try
+ MemUnion.find h_union (s1,s2)
+ with
+ | Not_found -> let s = Ptset.Int.union s1 s2
+ in
+ MemUnion.add h_union (s1,s2) s;s
+ in
+ let h_add = MemAdd.create BIG_H_SIZE in
+ let pt_add t s =
+ try MemAdd.find h_add (t,s)
+ with
+ | Not_found -> let r = Ptset.Int.add t s in
+ MemAdd.add h_add (t,s) r;r
+ in
+ let h = Hashtbl.create BIG_H_SIZE in
+ let update t sc sb ss sa =
+ let schild,sbelow,ssibling,safter =
+ try
+ Hashtbl.find h t
+ with
+ | Not_found ->
+ (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
+ in
+ Hashtbl.replace h t
+ (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
+ in
+ let rec loop right id acc_after =
+ if id == nil
+ then Ptset.Int.empty,Ptset.Int.empty,acc_after else
+ let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
+ let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
+ let tag = tree_tag tree id in
+ update tag child1 desc1 sibling2 after2;
+ ( pt_add tag sibling2,
+ pt_add tag (pt_cup desc1 desc2),
+ if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
+ in
+ let _ = loop false (tree_root tree) Ptset.Int.empty in
+ let _ = Printf.eprintf "Finished\n%!" in
+ h
+
+
+
+
+let contains_array = ref [| |]
+let contains_index = Hashtbl.create 4096
+let in_array _ i =
+ try
+ Hashtbl.find contains_index i
+ with
+ Not_found -> false
+
+let init_textfun f t s =
+ let a = match f with
+ | `CONTAINS -> text_contains t.doc s
+ | `STARTSWITH -> text_prefix t.doc s
+ | `ENDSWITH -> text_suffix t.doc s
+ | `EQUALS -> text_equals t.doc s
+ in
+ (*Array.fast_sort (compare) a; *)
+ contains_array := a;
+ Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
+
+let count_contains t s = text_count_contains t.doc s
+
+let init_naive_contains t s =
+ let i,j = tree_doc_ids t.doc (tree_root t.doc)
+ in
+ let regexp = Str.regexp_string s in
+ let matching arg =
+ try
+ let _ = Str.search_forward regexp arg 0;
+ in true
+ with _ -> false
+ in
+ let rec loop n acc l =
+ if n >= j then acc,l
+ else
+ let s = text_get_text t.doc n
+ in
+ if matching s
+ then loop (nodei ((inode n)+1)) (n::acc) (l+1)
+ else loop (nodei ((inode n)+1)) acc l
+ in
+ let acc,l = loop i [] 0 in
+ let a = Array.create l nulldoc in
+ let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
+ in
+ contains_array := a
+
+let last_idx = ref 0
+
+let array_find a i j =
+ let l = Array.length a in
+ let rec loop idx x y =
+ if x > y || idx >= l then nulldoc
+ else
+ if a.(idx) >= x then if a.(idx) > y then nulldoc else (last_idx := idx;a.(idx))
+ else loop (idx+1) x y
+ in
+ if a.(0) > j || a.(l-1) < i then nulldoc
+ else loop !last_idx i j
+
+let text_below tree t =
+ let l = Array.length !contains_array in
+ let i,j = tree_doc_ids tree.doc t in
+ let id = if l == 0 then i else (array_find !contains_array i j) in
+ tree_parent_node tree.doc id
+
+let text_next tree t root =
+ let l = Array.length !contains_array in
+ let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
+ let _,j = tree_doc_ids tree.doc root in
+ let id = if l == 0 then if inf > j then nulldoc else inf
+ else array_find !contains_array inf j
+ in
+ tree_parent_node tree.doc id