X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=3bfbfceeac181d6976aba24b50b3f8da1b0cc83c;hb=63ca35af9ef5c0b18b3d3217536f3353f77f5465;hp=c3a225545a56fafd9bae7294cf1ef68e6b07fa11;hpb=b821684aac2e3114c2eb28188020d7a09b5de2a5;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index c3a2255..3bfbfce 100644 --- a/tree.ml +++ b/tree.ml @@ -4,6 +4,7 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) +INCLUDE "debug.ml" module type BINARY = sig type node_content @@ -12,6 +13,7 @@ sig type t val parse_xml_uri : string -> t val parse_xml_string : string -> t + val tag_pool : t -> Tag.pool val string : t -> string val descr : t -> descr val left : t -> t @@ -43,12 +45,11 @@ struct external int_of_node : 'a node -> int = "%identity" - external parse_xml_uri : string -> t = "caml_call_shredder_uri" - let parse_xml_uri uri = parse_xml_uri uri - - external parse_xml_string : string -> t = "caml_call_shredder_string" - let parse_xml_string uri = parse_xml_string uri + external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri" + + external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string" + module Text = struct @@ -95,8 +96,8 @@ struct external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" - external tag : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" - external tag_id : t -> [`Tree ] node -> unit = "caml_xml_tree_tag_id" +(* external tag : t -> [`Tree ] node -> T = "caml_xml_tree_tag"*) + external tag_id : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" let is_last t n = equal nil (next_sibling t n) @@ -118,7 +119,7 @@ struct begin Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!" (int_of_node id) - (Tag.to_string (tag t id)) + (Tag.to_string (tag_id t id)) (node_xml_id t id) (int_of_node (prev_text t id)) (Text.get_text t (prev_text t id)) @@ -177,8 +178,21 @@ struct node = Node(NC (root t)) } - let parse_xml_uri str = node_of_t (parse_xml_uri str) - let parse_xml_string str = node_of_t (parse_xml_string str) + let parse_xml_uri str = node_of_t + (MM((parse_xml_uri str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection),__LOCATION__)) + + let parse_xml_string str = node_of_t + (MM((parse_xml_string str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection),__LOCATION__)) + + + external pool : doc -> Tag.pool = "%identity" + let tag_pool t = pool t.doc let compare a b = match a.node,b.node with | Node(NC i),Node(NC j) -> compare i j @@ -265,14 +279,14 @@ struct let tag = function { node=Node(SC _) } -> Tag.pcdata - | { doc=d; node=Node(NC n)} -> tag d n - | _ -> failwith "Tag" + | { doc=d; node=Node(NC n)} -> tag_id d n + | _ -> failwith "tag" - let tag_id = +(* let tag_id = function { node=Node(SC _) } -> () | { doc=d; node=Node(NC n)} -> tag_id d n | _ -> () - +*) let string_below t id = let pid = parent_doc t.doc id in match t.node with @@ -367,14 +381,16 @@ struct aux (first_child n); aux (next_sibling n) in aux t + + let print_stats _ = () end end - - +IFDEF DEBUG +THEN module DEBUGTREE = struct @@ -453,7 +469,11 @@ module DEBUGTREE | Node (SC (_,ns)) -> norm ns | Node(NC t) -> let ns = next_sibling_ n.doc t in - let txt = next_text_ n.doc t in + let txt = + if XML.Tree.is_nil ns then + next_text_ n.doc t + else prev_text_ n.doc ns + in if is_empty_ n.doc txt then norm ns else Node (SC (txt, ns)) @@ -466,7 +486,6 @@ module DEBUGTREE | { doc=d; node=Node(SC (i,_) )} -> text_xml_id_ d i | _ -> failwith "id" - (* Wrapper around critical function *) let string t = time ("TextCollection.GetText()") (string) t let left = first_child @@ -558,4 +577,6 @@ module DEBUGTREE end module Binary = DEBUGTREE - +ELSE +module Binary = XML.Binary +END (* IFDEF DEBUG *)