X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=20a77925c9d346c7ae83f7529a75c5ef66ead2ff;hb=aafe9afd804263ac5e28cb2e7857cc003e3c1d2d;hp=780791a914e0463e31f8b70f80da940c519c03e0;hpb=4a66518948bf6356b5cb72ba30b4d48a8c9e833a;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index 780791a..20a7792 100644 --- a/tree.ml +++ b/tree.ml @@ -48,9 +48,12 @@ sig val has_tagged_foll : t -> Tag.t -> bool val tagged_desc : t -> Tag.t -> t val tagged_foll : t -> Tag.t -> t + val init_tagged_next : t -> Tag.t -> unit val tagged_next : t -> Tag.t -> t val subtree_tags : t -> Tag.t -> int val is_left : t -> bool + val print_id : Format.formatter -> t -> unit + val test_xml_tree : Format.formatter -> Ptset.t -> t -> unit end module XML = @@ -89,6 +92,7 @@ struct *) external is_empty : t -> [`Text ] node -> bool = "caml_text_collection_empty_text" + let is_empty t n = (equal nil n) || is_empty t n @@ -96,7 +100,7 @@ struct let get_text t n = - if (equal nil n) || is_empty t n then "" + if equal nil n then "" else get_cached_text t n external size : t -> int = "caml_text_collection_size" @@ -127,25 +131,23 @@ struct external next_sibling : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" - + external prev_sibling : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" (* external tag : t -> [`Tree ] node -> T = "caml_xml_tree_tag"*) external tag_id : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" (* - let tag_hash = Hashtbl.create 4097 + let tag_hash = Array.make 6_000_000 (Tag.nullt) - let tag_id t id = - try - Hashtbl.find tag_hash id - with - | Not_found -> + let tag_id t id = + let tag = tag_hash.(int_of_node id) + in + if tag != Tag.nullt then tag + else let tag = tag_id t id in - Hashtbl.add tag_hash id tag;tag + (tag_hash.(int_of_node id) <- tag; tag) *) - - let is_last t n = equal nil (next_sibling t n) external prev_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" @@ -162,14 +164,59 @@ struct external tagged_next : t -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_next" external subtree_tags : t -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" + + + let test_xml_tree ppf tags v = + let pr x = Format.fprintf ppf x in + let rec aux id = + if (is_nil id) + then () + else + begin + pr "Node %i, (Tag) %i='%s' (GetTagName), NodeXMLId (Preorder)=%i\n%!" + (int_of_node id) + (tag_id v id) + (Tag.to_string (tag_id v id)) + (node_xml_id v id); + pr "DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) ParentDoc(my_text)=%i\n%!" + (int_of_node (prev_text v id)) + (Text.get_text v (prev_text v id)) + (int_of_node (my_text v id)) + (Text.get_text v (my_text v id)) + (int_of_node (next_text v id)) + (Text.get_text v (next_text v id)) + (int_of_node(parent_doc v (my_text v id))); + pr "Testing Tagged*\n%!"; + Ptset.iter (fun t -> + let str = Tag.to_string t in + if Tag.pcdata <> t + then begin + pr "Tag: %s : \n%!" str; + pr "TaggedDesc = %i%!, " (tagged_desc v id t); + pr "TaggedFoll = %i\n%!" (tagged_foll v id t); + pr "SubtreeTags = %i\n%!" (subtree_tags v id t); + end) tags; + pr "----------------------------\n"; + aux(first_child v id); + aux(next_sibling v id); + end + in + aux (root v) + + + + + + let print_skel t = let rec aux id = if (is_nil id) then Printf.eprintf "#\n" else begin - Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) parent_doc(my_text)=%i\n%!" + Printf.eprintf "Node %i has tag '%i=%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) parent_doc(my_text)=%i\n%!" (int_of_node id) + (tag_id t id) (Tag.to_string (tag_id t id)) (node_xml_id t id) (int_of_node (prev_text t id)) @@ -202,6 +249,9 @@ struct end in aux (root t) + + + end @@ -222,6 +272,8 @@ struct node : descr } let dump { doc=t } = Tree.print_skel t + let test_xml_tree ppf tags { doc=t } = Tree.test_xml_tree ppf tags t + module DocIdSet = struct include Set.Make (struct type t = string_content let compare = (-) end) @@ -287,19 +339,29 @@ struct let root n = { n with node = norm (Tree.root n.doc) } let is_root n = match n.node with - | Node(NC t) when (Tree.root n.doc) == t -> true + | Node(NC t) -> (int_of_node t) == 0 | _ -> false - let parent n = + let parent n = + if is_root n then { n with node=Nil} + else let node' = match n.node with | Node(NC t) -> let txt = prev_text n.doc t in if Text.is_empty n.doc txt then - Node(NC (Tree.parent n.doc t)) + let ps = Tree.prev_sibling n.doc t in + if is_nil ps + then + Node(NC (Tree.parent n.doc t)) + else Node(NC ps) else Node(SC (txt,t)) - | Node(SC(t,_)) -> Node (NC(parent_doc n.doc t)) + | Node(SC(i,t)) -> + let ps = Tree.prev_sibling n.doc t in + if is_nil ps + then Node (NC(parent_doc n.doc i)) + else Node(NC ps) | _ -> failwith "parent" in { n with node = node' } @@ -322,7 +384,6 @@ struct | Nil | String _ -> failwith "first_child" in { n with node = node'} - let next_sibling n = let node' = @@ -345,18 +406,15 @@ struct let id = function { doc=d; node=Node(NC n)} -> node_xml_id d n | { doc=d; node=Node(SC (i,_) )} -> text_xml_id d i - | _ -> failwith "id" + | _ -> -1 (* + Format.fprintf Format.err_formatter "Failure id on %s\n%!" (nts x.node); + failwith "id" *) let tag = function { node=Node(SC _) } -> Tag.pcdata | { doc=d; node=Node(NC n)} -> tag_id d n | _ -> failwith "tag" -(* let tag_id = - function { node=Node(SC _) } -> () - | { doc=d; node=Node(NC n)} -> tag_id d n - | _ -> () -*) let string_below t id = let strid = parent_doc t.doc id in match t.node with @@ -388,19 +446,59 @@ struct | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_desc d n tag) } | _ -> { t with node=Nil } - +(* let tagged_next t tag = - if tag = Tag.attribute || tag = Tag.pcdata then failwith "tagged_next" - else match t with - | { doc=d; node=Node(NC n) } -> { t with node = norm (tagged_next d n tag) } - | { doc=d; node=Node(SC (_,n)) } -> { t with node = norm (tagged_next d n tag) } - | _ -> { t with node=Nil } - + if tag == Tag.attribute || tag == Tag.pcdata then failwith "tagged_next" + else + match tagged_desc t tag with + | { doc = d; node=Nil } -> tagged_foll t tag + | x -> x +*) let subtree_tags t tag = match t with - { doc = d; node = Node(NC n) } -> subtree_tags d n tag + { doc = d; node = Node(NC n) } -> + subtree_tags d n tag | _ -> 0 + let tagged_desc_array = ref [| |] + let idx = ref 0 + + let init_tagged_next t tagid = + let l = subtree_tags (root t) tagid + in + tagged_desc_array := Array.create l { t with node= Nil }; + let i = ref 0 in + let rec collect t = + if is_node t then begin + if tag t == tagid then + begin + !tagged_desc_array.(!i) <- t; + incr i; + end; + collect (first_child t); + collect (next_sibling t) + end; + in + collect t; + idx := 0 + + let print_id ppf v = + let pr x= Format.fprintf ppf x in + match v with + { node=Nil } -> pr "NULLT: -1" + | { node=String(i) } | { node=Node(SC(i,_)) } -> pr "DocID: %i" (int_of_node i) + | { node=Node(NC(i)) } -> pr "Node: %i" (int_of_node i) + + + + let tagged_next t tag = + if !idx >= Array.length !tagged_desc_array + then {t with node=Nil} + else + let r = !tagged_desc_array.(!idx) + in + incr idx; r + let has_tagged_foll t tag = is_node (tagged_foll t tag) @@ -453,13 +551,17 @@ struct let count t s = Text.count t.doc s let is_left t = - let u = left (parent t) in - (id t) == (id u) + if is_root t then false + else + if tag (parent t) == Tag.pcdata then false + else + let u = left (parent t) in + (id t) == (id u) let print_xml_fast outc t = let rec loop ?(print_right=true) t = match t.node with | Nil -> () - | String (s) -> output_string outc (string t) + | String (s) -> output_string outc (Text.get_text t.doc s) | Node _ when Tag.equal (tag t) Tag.pcdata -> loop (left t); if print_right then loop (right t) @@ -516,6 +618,10 @@ struct print_xml_fast outc (first_child t) else print_xml_fast outc t + + + + let traversal t = Tree.traversal t.doc let full_traversal t = let rec aux n =