X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=487a057b30ad73301d11d3c9bef22f918df09c5c;hb=92455238a637876bec18bfdaed4f5342f4cbbd1f;hp=b19a3aaa1faa835de69df291bf265454a854db3c;hpb=dc91851aaeac91a71eba2c266d0227adea0c5815;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index b19a3aa..487a057 100644 --- a/tree.ml +++ b/tree.ml @@ -60,8 +60,11 @@ sig val print_id : Format.formatter -> t -> unit val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit val init_contains : t -> string -> unit + val init_naive_contains : t -> string -> unit val mk_nil : t -> t val test_jump : t -> Tag.t -> unit + val time_xml_tree : t -> Tag.t -> int list + val time_xml_tree2 : t -> Tag.t -> int list end module XML = @@ -176,6 +179,7 @@ struct external tagged_next : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_next" external tagged_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only" external tagged_desc_or_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only" + external tagged_foll_below : t -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" let test_jump tree tag = let rec loop id ctx = @@ -230,6 +234,46 @@ struct end in aux (root v) + + let rrrr = ref 0 + + let time_xml_tree v tag = + + let rec aux id acc = + incr rrrr; + if (is_nil id) + then acc + else begin + let acc = + if tag == (tag_id v id) + then + id::acc + else acc + in + aux (next_sibling v id) (aux (first_child v id) acc); + end + in + let r = aux (root v) [] in + Printf.eprintf "%i\n%!" !rrrr;r + + let rrrr2 = ref 0 + let time_xml_tree2 v tag = + let rec aux id acc ctx= + incr rrrr2; + if (is_nil id) + then acc + else begin + let acc = + if tag == (tag_id v id) + then + id::acc + else acc + in + aux (tagged_foll_below v id tag ctx) (aux (tagged_desc v id tag) acc id) ctx; + end + in + let r = aux (root v) [] (root v) in + Printf.eprintf "%i\n%!" !rrrr2; r @@ -301,6 +345,8 @@ struct let dump { doc=t } = Tree.print_skel t let test_xml_tree ppf tags { doc=t } = Tree.test_xml_tree ppf tags t + let time_xml_tree { doc=t } tag = Tree.time_xml_tree t tag + let time_xml_tree2 { doc=t } tag = Tree.time_xml_tree2 t tag let test_jump { doc=t } tag = Tree.test_jump t tag let contains_array = ref [| |] @@ -310,6 +356,31 @@ struct Array.fast_sort (compare) a; contains_array := a + let init_naive_contains t s = + let i,j = Tree.doc_ids t.doc (Tree.root t.doc) + in + let regexp = Str.regexp_string s in + let matching arg = + try + let _ = Str.search_forward regexp arg 0; + in true + with _ -> false + in + let rec loop n acc l = + if n >= j then acc,l + else + let s = (*Printf.eprintf "%i \n%!" n;*)Text.get_cached_text t.doc n + in + if matching s + then loop (n+1) (n::acc) (l+1) + else loop (n+1) acc l + in + let acc,l = loop i [] 0 in + let a = Array.create l Text.nil in + let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc + in + contains_array := a + module DocIdSet = struct @@ -558,16 +629,17 @@ struct | _ -> { t with node=Nil } + let last_idx = ref 0 let array_find a i j = let l = Array.length a in let rec loop idx x y = if x > y || idx >= l then Text.nil else - if a.(idx) >= x then if a.(idx) > y then Text.nil else a.(idx) + if a.(idx) >= x then if a.(idx) > y then Text.nil else (last_idx := idx;a.(idx)) else loop (idx+1) x y in if a.(0) > j || a.(l-1) < i then Text.nil - else loop 0 i j + else loop !last_idx i j let text_below t =