X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=487a057b30ad73301d11d3c9bef22f918df09c5c;hb=d4342e4bb9c853114de295567cd91ec86bb9e68f;hp=20a77925c9d346c7ae83f7529a75c5ef66ead2ff;hpb=aafe9afd804263ac5e28cb2e7857cc003e3c1d2d;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index 20a7792..487a057 100644 --- a/tree.ml +++ b/tree.ml @@ -48,12 +48,23 @@ sig val has_tagged_foll : t -> Tag.t -> bool val tagged_desc : t -> Tag.t -> t val tagged_foll : t -> Tag.t -> t + val tagged_below : t -> Ptset.t -> Ptset.t -> t + val tagged_next : t -> Ptset.t -> Ptset.t -> t -> t + val tagged_desc_only : t -> Ptset.t -> t + val tagged_foll_only : t -> Ptset.t -> t -> t + val text_below : t -> t + val text_next : t -> t -> t val init_tagged_next : t -> Tag.t -> unit - val tagged_next : t -> Tag.t -> t val subtree_tags : t -> Tag.t -> int val is_left : t -> bool val print_id : Format.formatter -> t -> unit val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit + val init_contains : t -> string -> unit + val init_naive_contains : t -> string -> unit + val mk_nil : t -> t + val test_jump : t -> Tag.t -> unit + val time_xml_tree : t -> Tag.t -> int list + val time_xml_tree2 : t -> Tag.t -> int list end module XML = @@ -126,6 +137,7 @@ struct external parent : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" external parent_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" + external prev_doc : t -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" external first_child : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" @@ -155,17 +167,33 @@ struct external my_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" external next_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" - + external doc_ids : t -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids" external text_xml_id : t -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" external node_xml_id : t -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" external is_ancestor : t -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" external tagged_desc : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" external tagged_foll : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_foll" - external tagged_next : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_next" external subtree_tags : t -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" + external tagged_below : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_tagged_below" + external tagged_desc_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node = "caml_xml_tree_tagged_desc_only" + external tagged_next : t -> [`Tree ] node -> Ptset.int_vector -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_next" + external tagged_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only" + external tagged_desc_or_foll_only : t -> [`Tree ] node -> Ptset.int_vector -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_only" + external tagged_foll_below : t -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" + + let test_jump tree tag = + let rec loop id ctx = + if id != nil + then + let first = tagged_desc tree id tag + and next = tagged_desc tree id tag + in + loop first id; + loop next ctx + in + loop (root tree) (root tree) - - + let test_xml_tree ppf tags v = let pr x = Format.fprintf ppf x in let rec aux id = @@ -178,17 +206,21 @@ struct (tag_id v id) (Tag.to_string (tag_id v id)) (node_xml_id v id); - pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i\n%!" + pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i PrevDoc(next_text)=%i\n%!" (int_of_node (prev_text v id)) (Text.get_text v (prev_text v id)) (int_of_node (my_text v id)) (Text.get_text v (my_text v id)) (int_of_node (next_text v id)) (Text.get_text v (next_text v id)) - (int_of_node(parent_doc v (my_text v id))); - pr "Testing Tagged*\n%!"; - Ptset.iter (fun t -> - let str = Tag.to_string t in + (int_of_node(parent_doc v (my_text v id))) + (int_of_node(prev_doc v (next_text v id))); + let i1,i2 = doc_ids v id in + pr "Testing DocIds below (%i,%i)*\n%!" + (int_of_node i1) (int_of_node i2); + pr "Testing Tagged*\n%!"; + Ptset.iter (fun t -> + let str = Tag.to_string t in if Tag.pcdata <> t then begin pr "Tag: %s : \n%!" str; @@ -202,6 +234,46 @@ struct end in aux (root v) + + let rrrr = ref 0 + + let time_xml_tree v tag = + + let rec aux id acc = + incr rrrr; + if (is_nil id) + then acc + else begin + let acc = + if tag == (tag_id v id) + then + id::acc + else acc + in + aux (next_sibling v id) (aux (first_child v id) acc); + end + in + let r = aux (root v) [] in + Printf.eprintf "%i\n%!" !rrrr;r + + let rrrr2 = ref 0 + let time_xml_tree2 v tag = + let rec aux id acc ctx= + incr rrrr2; + if (is_nil id) + then acc + else begin + let acc = + if tag == (tag_id v id) + then + id::acc + else acc + in + aux (tagged_foll_below v id tag ctx) (aux (tagged_desc v id tag) acc id) ctx; + end + in + let r = aux (root v) [] (root v) in + Printf.eprintf "%i\n%!" !rrrr2; r @@ -273,6 +345,43 @@ struct let dump { doc=t } = Tree.print_skel t let test_xml_tree ppf tags { doc=t } = Tree.test_xml_tree ppf tags t + let time_xml_tree { doc=t } tag = Tree.time_xml_tree t tag + let time_xml_tree2 { doc=t } tag = Tree.time_xml_tree2 t tag + let test_jump { doc=t } tag = Tree.test_jump t tag + let contains_array = ref [| |] + + let init_contains t s = + let a = Text.contains t.doc s + in + Array.fast_sort (compare) a; + contains_array := a + + let init_naive_contains t s = + let i,j = Tree.doc_ids t.doc (Tree.root t.doc) + in + let regexp = Str.regexp_string s in + let matching arg = + try + let _ = Str.search_forward regexp arg 0; + in true + with _ -> false + in + let rec loop n acc l = + if n >= j then acc,l + else + let s = (*Printf.eprintf "%i \n%!" n;*)Text.get_cached_text t.doc n + in + if matching s + then loop (n+1) (n::acc) (l+1) + else loop (n+1) acc l + in + let acc,l = loop i [] 0 in + let a = Array.create l Text.nil in + let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc + in + contains_array := a + + module DocIdSet = struct include Set.Make (struct type t = string_content @@ -336,7 +445,7 @@ struct | Node (NC t) -> Printf.sprintf "Node (NC %i)" (int_of_node t) | Node (SC (t,i)) -> Printf.sprintf "Node (SC (%i,%i))" (int_of_node t) (int_of_node i) - + let mk_nil t = { t with node = Nil } let root n = { n with node = norm (Tree.root n.doc) } let is_root n = match n.node with | Node(NC t) -> (int_of_node t) == 0 @@ -446,14 +555,130 @@ struct | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) } | _ -> { t with node=Nil } -(* - let tagged_next t tag = - if tag == Tag.attribute || tag == Tag.pcdata then failwith "tagged_next" - else - match tagged_desc t tag with - | { doc = d; node=Nil } -> tagged_foll t tag - | x -> x -*) + + let tagged_next t tb tf s = + match s with + | { node = Node (NC(below)) } -> begin + match t with + | { doc = d; node=Node(NC n) } -> + { t with node= norm (tagged_next d n (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) } + | { doc = d; node=Node(SC (i,n) ) } when is_nil n -> + let p = parent_doc d i in + { t with node= norm (tagged_next d p (Ptset.to_int_vector tb) (Ptset.to_int_vector tf) below) } + | { doc = d; node=Node(SC (_,n) ) } -> + if Ptset.mem (tag_id d n) (Ptset.union tb tf) + then { t with node=Node(NC(n)) } + else + let vb = Ptset.to_int_vector tb in + let vf = Ptset.to_int_vector tf in + let node = + let dsc = tagged_below d n vb vf in + if is_nil dsc + then tagged_next d n vb vf below + else dsc + in + { t with node = norm node } + | _ -> {t with node=Nil } + end + + | _ -> {t with node=Nil } + + let tagged_foll_only t tf s = + match s with + | { node = Node (NC(below)) } -> begin + match t with + | { doc = d; node=Node(NC n) } -> + { t with node= norm (tagged_foll_only d n (Ptset.to_int_vector tf) below) } + | { doc = d; node=Node(SC (i,n) ) } when is_nil n -> + let p = parent_doc d i in + { t with node= norm (tagged_foll_only d p (Ptset.to_int_vector tf) below) } + | { doc = d; node=Node(SC (_,n) ) } -> + if Ptset.mem (tag_id d n) tf + then { t with node=Node(NC(n)) } + else + let vf = Ptset.to_int_vector tf in + let node = + let dsc = tagged_desc_only d n vf in + if is_nil dsc + then tagged_foll_only d n vf below + else dsc + in + { t with node = norm node } + | _ -> {t with node=Nil } + end + + | _ -> {t with node=Nil } + + + let tagged_below t tc td = + match t with + | { doc = d; node=Node(NC n) } -> + let vc = Ptset.to_int_vector tc + in + let vd = Ptset.to_int_vector td + in + { t with node= norm(tagged_below d n vc vd) } + | _ -> { t with node=Nil } + + let tagged_desc_only t td = + match t with + | { doc = d; node=Node(NC n) } -> + let vd = Ptset.to_int_vector td + in + { t with node= norm(tagged_desc_only d n vd) } + | _ -> { t with node=Nil } + + + let last_idx = ref 0 + let array_find a i j = + let l = Array.length a in + let rec loop idx x y = + if x > y || idx >= l then Text.nil + else + if a.(idx) >= x then if a.(idx) > y then Text.nil else (last_idx := idx;a.(idx)) + else loop (idx+1) x y + in + if a.(0) > j || a.(l-1) < i then Text.nil + else loop !last_idx i j + + + let text_below t = + let l = Array.length !contains_array in + if l = 0 then { t with node=Nil } + else + match t with + | { doc = d; node=Node(NC n) } -> + let i,j = doc_ids t.doc n in + let id = array_find !contains_array i j + in + if id == Text.nil then + { t with node=Nil } + else + {t with node = Node(SC(id, Tree.next_sibling d (Tree.prev_doc d id))) } + | _ -> { t with node=Nil } + + let text_next t root = + let l = Array.length !contains_array in + if l = 0 then { t with node=Nil } + else + let inf = match t with + | { doc =d; node = Node(NC n) } -> snd(doc_ids d n)+1 + | { node = Node(SC(i,_)) } -> i+1 + | _ -> assert false + in + match root with + | { doc = d; node= Node (NC n) } -> + let _,j = doc_ids t.doc n in + + let id = array_find !contains_array inf j + in + if id == Text.nil then { doc = d; node= Nil } + else + {doc = d; node = Node(SC(id,Tree.next_sibling d (Tree.prev_doc d id))) } + | _ -> { t with node=Nil} + + + let subtree_tags t tag = match t with { doc = d; node = Node(NC n) } -> @@ -491,14 +716,14 @@ struct - let tagged_next t tag = +(* let tagged_next t tag = if !idx >= Array.length !tagged_desc_array then {t with node=Nil} else let r = !tagged_desc_array.(!idx) in incr idx; r - +*) let has_tagged_foll t tag = is_node (tagged_foll t tag)