X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2Ftree.ml;h=fae8ebaea4dac30eea66c0b4b73db01f99acd189;hb=4309f9456521bffcab5ff79abe1ed826744c3a57;hp=9c6eff3adb19a51e9afcdbd0cb9f21a702da79b3;hpb=1d6a3a063ccce5c746801045601b5d96bb2804b6;p=SXSI%2Fxpathcomp.git diff --git a/src/tree.ml b/src/tree.ml index 9c6eff3..fae8eba 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -7,7 +7,6 @@ INCLUDE "debug.ml" INCLUDE "utils.ml" - external init_lib : unit -> unit = "sxsi_cpp_init" exception CPlusPlusError of string @@ -20,7 +19,6 @@ type node = [ `Tree ] Node.t type tree - external register_tag : tree -> string -> Tag.t = "caml_xml_tree_register_tag" external tag_name : tree -> Tag.t -> string = "caml_xml_tree_get_tag_name" @@ -53,11 +51,11 @@ module TreeBuilder = struct type t external create : unit -> t = "caml_xml_tree_builder_create" - external open_document : t -> bool -> int -> bool -> int -> unit = "caml_xml_tree_builder_open_document" + external open_document : t -> int -> bool -> int -> unit = "caml_xml_tree_builder_open_document" external close_document : t -> tree = "caml_xml_tree_builder_close_document" - external open_tag : t -> string -> unit = "caml_xml_tree_builder_new_open_tag" - external close_tag : t -> string -> unit = "caml_xml_tree_builder_new_closing_tag" - external text : t -> string -> unit = "caml_xml_tree_builder_new_text" + external open_tag : t -> string -> unit = "caml_xml_tree_builder_open_tag" + external close_tag : t -> string -> unit = "caml_xml_tree_builder_close_tag" + external text : t -> string -> unit = "caml_xml_tree_builder_text" let is_whitespace s = let rec loop len i = @@ -75,7 +73,8 @@ struct (fun parser_ -> incr event_counter; if !event_counter land 0xffffff == 0 then - Printf.eprintf "Current position: %i\n%!" (Expat.get_current_byte_index parser_)) + Logger.print Format.err_formatter "Current position: %i@\n@?" (Expat.get_current_byte_index parser_)) + let do_text b t = if Buffer.length t > 0 then begin @@ -97,7 +96,6 @@ struct close_tag b atname let start_element_handler parser_ b t tag attr_list = - display_count parser_; do_text b t; open_tag b tag; match attr_list with @@ -109,12 +107,10 @@ struct let end_element_handler parser_ b t tag = - display_count parser_; do_text b t; close_tag b tag let character_data_handler parser_ _ t text = - display_count parser_; Buffer.add_string t text let create_parser () = @@ -124,18 +120,17 @@ struct let finalize () = do_text build buf; close_tag build ""; - Printf.eprintf "Finished Parsing\n%!"; - Printf.eprintf "Started Index construction\n%!"; + Logger.print Format.err_formatter "Finished parsing@\n"; + Logger.print Format.err_formatter "Starting index construction@\n"; let r = close_document build in - Printf.eprintf "Finished Index construction\n%!"; + Logger.print Format.err_formatter "Finished index construction@\n"; r in Expat.set_start_element_handler parser_ (start_element_handler parser_ build buf); Expat.set_end_element_handler parser_ (end_element_handler parser_ build buf); Expat.set_character_data_handler parser_ (character_data_handler parser_ build buf); - Printf.eprintf "Started Parsing\n%!"; - open_document build !Options.index_empty_texts !Options.sample_factor - !Options.disable_text_collection !Options.text_index_type; + Logger.print Format.err_formatter "Started parsing@\n"; + open_document build !Options.sample_factor !Options.disable_text_collection !Options.text_index_type; open_tag build ""; parser_, finalize @@ -170,10 +165,22 @@ end type bit_vector = string external bool_of_int : int -> bool = "%identity" +external int_of_bool : bool -> int = "%identity" let bit_vector_unsafe_get v i = bool_of_int (((Char.code (String.unsafe_get v (i lsr 3))) lsr (i land 7)) land 1) +let chr (c:int) : char = Obj.magic (c land 0xff) +let bit_vector_unsafe_set v i b = + let j = i lsr 3 in + let c = Char.code v.[j] in + let bit = int_of_bool b in + let mask = bit lsl (i land 7) in + if b then v.[j] <- chr (c lor mask) else v.[j] <- (chr (c land (lnot mask))) + +let bit_vector_create n = + let len = if n <= 0 then 0 else (n - 1) / 8 + 1 in + String.make len '\000' type t = { doc : tree; @@ -205,26 +212,25 @@ external nullt : unit -> 'a Node.t = "caml_xml_tree_nullt" let nil : [`Tree ] Node.t = Node.nil let root : [`Tree ] Node.t = Node.null -type unordered_set +type tag_list -external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc" -external unordered_set_length : unordered_set -> int = "caml_unordered_set_length" -external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc" +external tag_list_alloc : int -> tag_list = "caml_tag_list_alloc" +external tag_list_set : tag_list -> int -> Tag.t -> unit = "caml_tag_list_set" "noalloc" module HPtset = Hashtbl.Make(Ptset.Int) let vector_htbl = HPtset.create MED_H_SIZE -let unordered_set_of_set s = +let tag_list_of_set s = try HPtset.find vector_htbl s with - Not_found -> - let v = unordered_set_alloc (Ptset.Int.cardinal s) in - let _ = Ptset.Int.iter (fun e -> unordered_set_insert v e) s in - HPtset.add vector_htbl s v; v - -let ptset_to_vector = unordered_set_of_set + Not_found -> + let v = tag_list_alloc (Ptset.Int.cardinal s + 1) in + let i = ref 0 in + let () = Ptset.Int.iter (fun e -> tag_list_set v !i e; incr i) s in + let () = tag_list_set v !i Tag.nullt in + HPtset.add vector_htbl s v; v (** tree interface *) @@ -240,7 +246,7 @@ let first_element t n = tree_first_element t.doc n external tree_tagged_child : tree -> [`Tree] Node.t -> Tag.t -> [`Tree] Node.t = "caml_xml_tree_tagged_child" "noalloc" let tagged_child t n tag = tree_tagged_child t.doc n tag -external tree_select_child : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t = "caml_xml_tree_select_child" "noalloc" +external tree_select_child : tree -> [`Tree ] Node.t -> tag_list -> [`Tree] Node.t = "caml_xml_tree_select_child" "noalloc" let select_child t n tag_set = tree_select_child t.doc n tag_set external tree_last_child : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_last_child" "noalloc" @@ -253,14 +259,13 @@ let next_sibling t n = tree_next_sibling t.doc n external tree_next_element : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_next_element" "noalloc" let next_element t n = tree_next_element t.doc n -external tree_next_node_before : tree -> [`Tree] Node.t -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_next_node_before" "noalloc" -let next_node_before t n ctx = tree_next_node_before t.doc n ctx -external tree_tagged_following_sibling : tree -> [`Tree] Node.t -> Tag.t -> [`Tree] Node.t = "caml_xml_tree_tagged_following_sibling" "noalloc" -let tagged_following_sibling t n tag = tree_tagged_following_sibling t.doc n tag +external tree_tagged_sibling : tree -> [`Tree] Node.t -> Tag.t -> [`Tree] Node.t = "caml_xml_tree_tagged_sibling" "noalloc" +let tagged_sibling t n tag = tree_tagged_sibling t.doc n tag + -external tree_select_following_sibling : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t = "caml_xml_tree_select_following_sibling" "noalloc" -let select_following_sibling t n tag_set = tree_select_following_sibling t.doc n tag_set +external tree_select_sibling : tree -> [`Tree ] Node.t -> tag_list -> [`Tree] Node.t = "caml_xml_tree_select_sibling" "noalloc" +let select_sibling t n tag_set = tree_select_sibling t.doc n tag_set external tree_prev_sibling : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_prev_sibling" "noalloc" let prev_sibling t n = tree_prev_sibling t.doc n @@ -273,23 +278,18 @@ let tagged_descendant t n tag = tree_tagged_descendant t.doc n tag external tree_tagged_next : tree -> [`Tree ] Node.t -> Tag.t -> [`Tree ] Node.t = "caml_xml_tree_tagged_next" "noalloc" let tagged_next t n tag = tree_tagged_next t.doc n tag -external tree_select_descendant : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t = "caml_xml_tree_select_descendant" "noalloc" +external tree_select_descendant : tree -> [`Tree ] Node.t -> tag_list -> [`Tree] Node.t = "caml_xml_tree_select_descendant" "noalloc" let select_descendant t n tag_set = tree_select_descendant t.doc n tag_set external tree_tagged_following_before : tree -> [`Tree ] Node.t -> Tag.t -> [`Tree ] Node.t -> [`Tree ] Node.t = "caml_xml_tree_tagged_following_before" "noalloc" let tagged_following_before t n tag ctx = tree_tagged_following_before t.doc n tag ctx -external tree_select_following_before : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_select_following_before" "noalloc" +external tree_select_following_before : tree -> [`Tree ] Node.t -> tag_list -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_select_following_before" "noalloc" let select_following_before t n tag_set ctx = tree_select_following_before t.doc n tag_set ctx external tree_parent : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_parent" "noalloc" let parent t n = tree_parent t.doc n -external tree_binary_parent : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_binary_parent" - "noalloc" -let binary_parent t n = tree_binary_parent t.doc n - - external tree_tag : tree -> [`Tree] Node.t -> Tag.t = "caml_xml_tree_tag" "noalloc" let tag t n = tree_tag t.doc n @@ -313,31 +313,22 @@ let tags t tag = open Format let dump_tag_table t = - eprintf "Child tags:\n%!"; - Array.iteri - (fun tag set -> eprintf "%s: %a\n%!" - (Tag.to_string tag) TagSet.print (TagSet.inj_positive set)) - t.children; - eprintf "-----------------------------\n%!"; - eprintf "Descendant tags:\n%!"; - Array.iteri - (fun tag set -> eprintf "%s: %a\n%!" - (Tag.to_string tag) TagSet.print (TagSet.inj_positive set)) - t.descendants; - eprintf "-----------------------------\n%!"; - eprintf "Sibling tags:\n%!"; - Array.iteri - (fun tag set -> eprintf "%s: %a\n%!" - (Tag.to_string tag) TagSet.print (TagSet.inj_positive set)) - t.siblings; - eprintf "-----------------------------\n%!"; - eprintf "Following tags:\n%!"; - Array.iteri - (fun tag set -> eprintf "%s: %a\n%!" - (Tag.to_string tag) TagSet.print (TagSet.inj_positive set)) - t.followings; - eprintf "-----------------------------\n%!" - + let tag = ref 0 in + let printer ppf set = + Logger.print ppf "%s: %a" + (Tag.to_string !tag) TagSet.print (TagSet.inj_positive set); + incr tag + in + let set_printer msg set = + tag := 0; + Logger.print err_formatter "%s :@\n" msg; + Pretty.pp_print_array ~sep:pp_force_newline printer err_formatter set; + Logger.print err_formatter "-----------------------------@\n"; + in + set_printer "Child tags" t.children; + set_printer "Descendant tags" t.descendants; + set_printer "Sibling tags" t.siblings; + set_printer "Following tags" t.followings external tree_subtree_tags : tree -> [`Tree] Node.t -> Tag.t -> int = "caml_xml_tree_subtree_tags" "noalloc" let subtree_tags t n tag = tree_subtree_tags t.doc n tag @@ -377,7 +368,7 @@ let stats t = in let a,b = loop true root 0 0 0 in - Printf.eprintf "Average depth: %f, number of leaves %i\n%!" ((float_of_int a)/. (float_of_int b)) b + Logger.print err_formatter "Average depth: %f, number of leaves %i@\n@?" ((float_of_int a)/. (float_of_int b)) b ;; module TagS = @@ -546,9 +537,9 @@ let is_node t = t != nil let is_root t = t == root let node_of_t t = - eprintf "Initializing tag structure\n%!"; + Logger.print err_formatter "Initializing tag structure@\n"; let _ = Tag.init (mk_tag_ops t) in - eprintf "Starting tag table construction\n%!"; + Logger.print err_formatter "Starting tag table construction@\n"; let f, n, c, d = time collect_labels t ~msg:"Building tag relationship table" in let c = Array.map TagS.to_ptset c in let n = Array.map TagS.to_ptset n in @@ -583,7 +574,7 @@ let version_string = "3" let pos fd = Unix.lseek fd 0 Unix.SEEK_CUR -let pr_pos fd = Printf.eprintf "At position %i\n%!" (pos fd) +let pr_pos fd = Logger.print err_formatter "At position %i@\n" (pos fd) let write fd s = let sl = String.length s in @@ -738,15 +729,9 @@ let mk_pred query s = in let bv = results.bv in memo := begin fun _ n -> - let b = - bit_vector_unsafe_get bv (Node.to_int n) - in - D_TRACE_(Printf.eprintf "Result of memoized call to query %s is %b for node %i\n" s b (Node.to_int n)); - b + bit_vector_unsafe_get bv (Node.to_int n) end; - let b = bit_vector_unsafe_get bv (Node.to_int node) in - D_TRACE_(Printf.eprintf "Result is %b for node %i\n" b (Node.to_int node)); - b + bit_vector_unsafe_get bv (Node.to_int node) end; Predicate.make memo