X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=9cab2c791f45cf72d13fe8881c6043296495f73f;hb=6b03c8ef3dac4b1de06ca577e8e0ee07c6c3eae7;hp=8ebcdccfdb58bd6b63b04eba96824d2cf61c39a6;hpb=95367aa932a9e179976e59ea326542c50905f5b3;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index 8ebcdcc..9cab2c7 100644 --- a/tree.ml +++ b/tree.ml @@ -4,6 +4,7 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) +INCLUDE "debug.ml" module type BINARY = sig type node_content @@ -12,6 +13,9 @@ sig type t val parse_xml_uri : string -> t val parse_xml_string : string -> t + val save : t -> string -> unit + val load : ?sample:int -> string -> t + val tag_pool : t -> Tag.pool val string : t -> string val descr : t -> descr val left : t -> t @@ -43,12 +47,12 @@ struct external int_of_node : 'a node -> int = "%identity" - external parse_xml_uri : string -> t = "caml_call_shredder_uri" - let parse_xml_uri uri = parse_xml_uri uri - - external parse_xml_string : string -> t = "caml_call_shredder_string" - let parse_xml_string uri = parse_xml_string uri - + external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri" + external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string" + + external save_tree : t -> string -> unit = "caml_xml_tree_save" + external load_tree : string -> int -> t = "caml_xml_tree_load" + module Text = struct @@ -95,8 +99,8 @@ struct external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" - external tag : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" - external tag_id : t -> [`Tree ] node -> unit = "caml_xml_tree_tag_id" +(* external tag : t -> [`Tree ] node -> T = "caml_xml_tree_tag"*) + external tag_id : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" let is_last t n = equal nil (next_sibling t n) @@ -118,7 +122,7 @@ struct begin Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!" (int_of_node id) - (Tag.to_string (tag t id)) + (Tag.to_string (tag_id t id)) (node_xml_id t id) (int_of_node (prev_text t id)) (Text.get_text t (prev_text t id)) @@ -177,8 +181,26 @@ struct node = Node(NC (root t)) } - let parse_xml_uri str = node_of_t (parse_xml_uri str) - let parse_xml_string str = node_of_t (parse_xml_string str) + let parse_xml_uri str = node_of_t + (MM((parse_xml_uri str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection),__LOCATION__)) + + let parse_xml_string str = node_of_t + (MM((parse_xml_string str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection),__LOCATION__)) + + + let save t str = save_tree t.doc str + + let load ?(sample=64) str = node_of_t (load_tree str sample) + + + external pool : doc -> Tag.pool = "%identity" + let tag_pool t = pool t.doc let compare a b = match a.node,b.node with | Node(NC i),Node(NC j) -> compare i j @@ -265,14 +287,14 @@ struct let tag = function { node=Node(SC _) } -> Tag.pcdata - | { doc=d; node=Node(NC n)} -> tag d n - | _ -> failwith "Tag" + | { doc=d; node=Node(NC n)} -> tag_id d n + | _ -> failwith "tag" - let tag_id = +(* let tag_id = function { node=Node(SC _) } -> () | { doc=d; node=Node(NC n)} -> tag_id d n | _ -> () - +*) let string_below t id = let pid = parent_doc t.doc id in match t.node with @@ -367,14 +389,16 @@ struct aux (first_child n); aux (next_sibling n) in aux t + + let print_stats _ = () end end - - +IFDEF DEBUG +THEN module DEBUGTREE = struct @@ -561,3 +585,6 @@ module DEBUGTREE end module Binary = DEBUGTREE +ELSE +module Binary = XML.Binary +END (* IFDEF DEBUG *)