X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=6c3cc1bf483f0a650010a1524a75269c2263562e;hb=ff0d1111e34d2aba9b6454e0c95b67bd38ba392c;hp=ecd8e3b9ac317fe628e5e46f63eed1e64c9ac559;hpb=eebef30070a951d852ce5811b289d8131a5300eb;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index ecd8e3b..6c3cc1b 100644 --- a/tree.ml +++ b/tree.ml @@ -4,6 +4,7 @@ (* Copyright NICTA 2008 *) (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) +INCLUDE "debug.ml" module type BINARY = sig type node_content @@ -12,6 +13,8 @@ sig type t val parse_xml_uri : string -> t val parse_xml_string : string -> t + val save : t -> string -> unit + val load : ?sample:int -> string -> t val tag_pool : t -> Tag.pool val string : t -> string val descr : t -> descr @@ -23,11 +26,16 @@ sig val print_xml_fast : out_channel -> t -> unit val compare : t -> t -> int val equal : t -> t -> bool - module DocIdSet : Set.S with type elt = string_content + module DocIdSet : + sig + include Set.S + end + with type elt = string_content val string_below : t -> string_content -> bool val contains : t -> string -> DocIdSet.t val contains_old : t -> string -> bool val dump : t -> unit + val get_string : t -> string_content -> string end module XML = @@ -44,15 +52,17 @@ struct external int_of_node : 'a node -> int = "%identity" - external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri" - - + external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri" external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string" + external save_tree : t -> string -> unit = "caml_xml_tree_save" + external load_tree : string -> int -> t = "caml_xml_tree_load" + module Text = struct - + let equal : [`Text] node -> [`Text] node -> bool = equal + (* Todo *) external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt" let nil = nullt () @@ -75,7 +85,7 @@ struct module Tree = struct - + let equal : [`Tree ] node -> [`Tree] node -> bool = equal external serialize : t -> string -> unit = "caml_xml_tree_serialize" external unserialize : string -> t = "caml_xml_tree_unserialize" @@ -116,7 +126,7 @@ struct then Printf.eprintf "#\n" else begin - Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!" + Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s) parent_doc(my_text)=%i\n%!" (int_of_node id) (Tag.to_string (tag_id t id)) (node_xml_id t id) @@ -125,7 +135,9 @@ struct (int_of_node (my_text t id)) (Text.get_text t (my_text t id)) (int_of_node (next_text t id)) - (Text.get_text t (next_text t id)); + (Text.get_text t (next_text t id)) + (int_of_node(parent_doc t (my_text t id))); + aux(first_child t id); aux(next_sibling t id); end @@ -168,22 +180,33 @@ struct node : descr } let dump { doc=t } = Tree.print_skel t - module DocIdSet = Set.Make (struct type t = string_content - let compare = (-) end) - - + module DocIdSet = struct + include Set.Make (struct type t = string_content + let compare = (-) end) + + end + let get_string t (i:string_content) = Text.get_text t.doc i open Tree let node_of_t t = { doc= t; node = Node(NC (root t)) } let parse_xml_uri str = node_of_t - (parse_xml_uri str - !Options.sample_factor !Options.index_empty_texts !Options.disable_text_collection) + (MM((parse_xml_uri str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection),__LOCATION__)) let parse_xml_string str = node_of_t - (parse_xml_string str - !Options.sample_factor !Options.index_empty_texts !Options.disable_text_collection) + (MM((parse_xml_string str + !Options.sample_factor + !Options.index_empty_texts + !Options.disable_text_collection),__LOCATION__)) + + + let save t str = save_tree t.doc str + + let load ?(sample=64) str = node_of_t (load_tree str sample) external pool : doc -> Tag.pool = "%identity" @@ -283,11 +306,13 @@ struct | _ -> () *) let string_below t id = - let pid = parent_doc t.doc id in + let strid = parent_doc t.doc id in match t.node with - | Node(NC(i)) -> (is_ancestor t.doc i pid) - | Node(SC(i,_)) -> (is_ancestor t.doc (parent_doc t.doc i) pid) + | Node(NC(i)) -> + (Tree.equal i strid) || (is_ancestor t.doc i strid) + | Node(SC(i,_)) -> Text.equal i id | _ -> false + let contains t s = Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s) @@ -311,7 +336,9 @@ struct let rec loop ?(print_right=true) t = match t.node with | Nil -> () | String (s) -> output_string outc (string t) - | Node _ when Tag.equal (tag t) Tag.pcdata -> loop (left t); loop (right t) + | Node _ when Tag.equal (tag t) Tag.pcdata -> + loop (left t); + if print_right then loop (right t) | Node (_) -> let tg = Tag.to_string (tag t) in @@ -384,8 +411,8 @@ end - - +IFDEF DEBUG +THEN module DEBUGTREE = struct @@ -572,3 +599,6 @@ module DEBUGTREE end module Binary = DEBUGTREE +ELSE +module Binary = XML.Binary +END (* IFDEF DEBUG *)