X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=3bfbfceeac181d6976aba24b50b3f8da1b0cc83c;hb=63ca35af9ef5c0b18b3d3217536f3353f77f5465;hp=0c1f10b2ec8e4cf2af2fd453f824de1577e8a8d1;hpb=24fdea81b5506233d139bd7d72364a190bef35b8;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index 0c1f10b..3bfbfce 100644 --- a/tree.ml +++ b/tree.ml @@ -4,6 +4,7 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) +INCLUDE "debug.ml" module type BINARY = sig type node_content @@ -12,6 +13,7 @@ sig type t val parse_xml_uri : string -> t val parse_xml_string : string -> t + val tag_pool : t -> Tag.pool val string : t -> string val descr : t -> descr val left : t -> t @@ -43,16 +45,15 @@ struct external int_of_node : 'a node -> int = "%identity" - external parse_xml_uri : string -> t = "caml_call_shredder_uri" - let parse_xml_uri uri = parse_xml_uri uri - - external parse_xml_string : string -> t = "caml_call_shredder_string" - let parse_xml_string uri = parse_xml_string uri + external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri" + + external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string" + module Text = struct - type t (* pointer to the text collection *) + (* Todo *) external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt" let nil = nullt () @@ -95,10 +96,8 @@ struct external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" - external tag : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" - external tag_id : t -> [`Tree ] node -> unit = "caml_xml_tree_tag_id" - - external text_collection : t -> Text.t = "caml_xml_tree_text_collection" +(* external tag : t -> [`Tree ] node -> T = "caml_xml_tree_tag"*) + external tag_id : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" let is_last t n = equal nil (next_sibling t n) @@ -113,7 +112,6 @@ struct external is_ancestor : t -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" let print_skel t = - let textcol = text_collection t in let rec aux id = if (is_nil id) then Printf.eprintf "#\n" @@ -121,14 +119,14 @@ struct begin Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!" (int_of_node id) - (Tag.to_string (tag t id)) + (Tag.to_string (tag_id t id)) (node_xml_id t id) (int_of_node (prev_text t id)) - (Text.get_text textcol (prev_text t id)) + (Text.get_text t (prev_text t id)) (int_of_node (my_text t id)) - (Text.get_text textcol (my_text t id)) + (Text.get_text t (my_text t id)) (int_of_node (next_text t id)) - (Text.get_text textcol (next_text t id)); + (Text.get_text t (next_text t id)); aux(first_child t id); aux(next_sibling t id); end @@ -136,17 +134,16 @@ struct aux (root t) let traversal t = - let textcol = text_collection t in let rec aux id = if not (is_nil id) then begin (* ignore (tag t id); - ignore (Text.get_text textcol (prev_text t id)); + ignore (Text.get_text t (prev_text t id)); if (is_leaf t id) - then ignore (Text.get_text textcol (my_text t id)); + then ignore (Text.get_text t (my_text t id)); if (is_last t id) - then ignore (Text.get_text textcol (next_text t id)); *) + then ignore (Text.get_text t (next_text t id)); *) aux (first_child t id); aux (next_sibling t id); end @@ -168,8 +165,7 @@ struct type doc = t - type t = { doc : doc; - text : Text.t; + type t = { doc : doc; node : descr } let dump { doc=t } = Tree.print_skel t @@ -179,12 +175,24 @@ struct open Tree let node_of_t t = { doc= t; - text = text_collection t; node = Node(NC (root t)) } - let parse_xml_uri str = node_of_t (parse_xml_uri str) - let parse_xml_string str = node_of_t (parse_xml_string str) + let parse_xml_uri str = node_of_t + (MM((parse_xml_uri str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection),__LOCATION__)) + + let parse_xml_string str = node_of_t + (MM((parse_xml_string str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection),__LOCATION__)) + + + external pool : doc -> Tag.pool = "%identity" + let tag_pool t = pool t.doc let compare a b = match a.node,b.node with | Node(NC i),Node(NC j) -> compare i j @@ -201,7 +209,7 @@ struct let equal a b = (compare a b) == 0 let string t = match t.node with - | String i -> Text.get_text t.text i + | String i -> Text.get_text t.doc i | _ -> assert false let norm (n : [`Tree ] node ) = if is_nil n then Nil else Node (NC n) @@ -231,13 +239,13 @@ struct match n.node with | Node (NC t) when is_leaf n.doc t -> let txt = my_text n.doc t in - if Text.is_empty n.text txt + if Text.is_empty n.doc txt then Nil else Node(SC (txt,Tree.nil)) | Node (NC t) -> let fs = first_child n.doc t in let txt = prev_text n.doc fs in - if Text.is_empty n.text txt + if Text.is_empty n.doc txt then norm fs else Node (SC (txt, fs)) | Node(SC (i,_)) -> String i @@ -253,7 +261,7 @@ struct | Node(NC t) -> let ns = next_sibling n.doc t in let txt = next_text n.doc t in - if Text.is_empty n.text txt + if Text.is_empty n.doc txt then norm ns else Node (SC (txt, ns)) | Nil | String _ -> failwith "next_sibling" @@ -271,14 +279,14 @@ struct let tag = function { node=Node(SC _) } -> Tag.pcdata - | { doc=d; node=Node(NC n)} -> tag d n - | _ -> failwith "Tag" + | { doc=d; node=Node(NC n)} -> tag_id d n + | _ -> failwith "tag" - let tag_id = +(* let tag_id = function { node=Node(SC _) } -> () | { doc=d; node=Node(NC n)} -> tag_id d n | _ -> () - +*) let string_below t id = let pid = parent_doc t.doc id in match t.node with @@ -287,7 +295,7 @@ struct | _ -> false let contains t s = - Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.text s) + Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s) let contains_old t s = let regexp = Str.regexp_string s in @@ -367,20 +375,22 @@ struct let rec aux n = match n.node with | Nil -> () - | String i -> () (*ignore(Text.get_text t.text i) *) + | String i -> () (*ignore(Text.get_text t.doc i) *) | Node(_) -> (* tag_id n; *) aux (first_child n); aux (next_sibling n) in aux t + + let print_stats _ = () end end - - +IFDEF DEBUG +THEN module DEBUGTREE = struct @@ -438,13 +448,13 @@ module DEBUGTREE match n.node with | Node (NC t) when is_leaf_ n.doc t -> let txt = my_text_ n.doc t in - if is_empty_ n.text txt + if is_empty_ n.doc txt then Nil else Node(SC (txt,XML.Tree.nil)) | Node (NC t) -> let fs = first_child_ n.doc t in let txt = prev_text_ n.doc fs in - if is_empty_ n.text txt + if is_empty_ n.doc txt then norm fs else Node (SC (txt, fs)) | Node(SC (i,_)) -> String i @@ -459,8 +469,12 @@ module DEBUGTREE | Node (SC (_,ns)) -> norm ns | Node(NC t) -> let ns = next_sibling_ n.doc t in - let txt = next_text_ n.doc t in - if is_empty_ n.text txt + let txt = + if XML.Tree.is_nil ns then + next_text_ n.doc t + else prev_text_ n.doc ns + in + if is_empty_ n.doc txt then norm ns else Node (SC (txt, ns)) | Nil | String _ -> failwith "next_sibling" @@ -472,7 +486,6 @@ module DEBUGTREE | { doc=d; node=Node(SC (i,_) )} -> text_xml_id_ d i | _ -> failwith "id" - (* Wrapper around critical function *) let string t = time ("TextCollection.GetText()") (string) t let left = first_child @@ -564,4 +577,6 @@ module DEBUGTREE end module Binary = DEBUGTREE - +ELSE +module Binary = XML.Binary +END (* IFDEF DEBUG *)