X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2Ftree.ml;h=fae8ebaea4dac30eea66c0b4b73db01f99acd189;hb=4309f9456521bffcab5ff79abe1ed826744c3a57;hp=66bf1593bcbd3ca71ea09d46238784a76a943609;hpb=a145e1cff02534a93be2544303551c7ea94f0083;p=SXSI%2Fxpathcomp.git diff --git a/src/tree.ml b/src/tree.ml index 66bf159..fae8eba 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -51,11 +51,11 @@ module TreeBuilder = struct type t external create : unit -> t = "caml_xml_tree_builder_create" - external open_document : t -> bool -> int -> bool -> int -> unit = "caml_xml_tree_builder_open_document" + external open_document : t -> int -> bool -> int -> unit = "caml_xml_tree_builder_open_document" external close_document : t -> tree = "caml_xml_tree_builder_close_document" - external open_tag : t -> string -> unit = "caml_xml_tree_builder_new_open_tag" - external close_tag : t -> string -> unit = "caml_xml_tree_builder_new_closing_tag" - external text : t -> string -> unit = "caml_xml_tree_builder_new_text" + external open_tag : t -> string -> unit = "caml_xml_tree_builder_open_tag" + external close_tag : t -> string -> unit = "caml_xml_tree_builder_close_tag" + external text : t -> string -> unit = "caml_xml_tree_builder_text" let is_whitespace s = let rec loop len i = @@ -96,7 +96,6 @@ struct close_tag b atname let start_element_handler parser_ b t tag attr_list = - display_count parser_; do_text b t; open_tag b tag; match attr_list with @@ -108,12 +107,10 @@ struct let end_element_handler parser_ b t tag = - display_count parser_; do_text b t; close_tag b tag let character_data_handler parser_ _ t text = - display_count parser_; Buffer.add_string t text let create_parser () = @@ -133,8 +130,7 @@ struct Expat.set_end_element_handler parser_ (end_element_handler parser_ build buf); Expat.set_character_data_handler parser_ (character_data_handler parser_ build buf); Logger.print Format.err_formatter "Started parsing@\n"; - open_document build !Options.index_empty_texts !Options.sample_factor - !Options.disable_text_collection !Options.text_index_type; + open_document build !Options.sample_factor !Options.disable_text_collection !Options.text_index_type; open_tag build ""; parser_, finalize @@ -184,7 +180,7 @@ let bit_vector_unsafe_set v i b = let bit_vector_create n = let len = if n <= 0 then 0 else (n - 1) / 8 + 1 in - String.make len '\000' + String.make len '\000' type t = { doc : tree; @@ -216,26 +212,25 @@ external nullt : unit -> 'a Node.t = "caml_xml_tree_nullt" let nil : [`Tree ] Node.t = Node.nil let root : [`Tree ] Node.t = Node.null -type unordered_set +type tag_list -external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc" -external unordered_set_length : unordered_set -> int = "caml_unordered_set_length" -external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc" +external tag_list_alloc : int -> tag_list = "caml_tag_list_alloc" +external tag_list_set : tag_list -> int -> Tag.t -> unit = "caml_tag_list_set" "noalloc" module HPtset = Hashtbl.Make(Ptset.Int) let vector_htbl = HPtset.create MED_H_SIZE -let unordered_set_of_set s = +let tag_list_of_set s = try HPtset.find vector_htbl s with - Not_found -> - let v = unordered_set_alloc (Ptset.Int.cardinal s) in - let _ = Ptset.Int.iter (fun e -> unordered_set_insert v e) s in - HPtset.add vector_htbl s v; v - -let ptset_to_vector = unordered_set_of_set + Not_found -> + let v = tag_list_alloc (Ptset.Int.cardinal s + 1) in + let i = ref 0 in + let () = Ptset.Int.iter (fun e -> tag_list_set v !i e; incr i) s in + let () = tag_list_set v !i Tag.nullt in + HPtset.add vector_htbl s v; v (** tree interface *) @@ -251,7 +246,7 @@ let first_element t n = tree_first_element t.doc n external tree_tagged_child : tree -> [`Tree] Node.t -> Tag.t -> [`Tree] Node.t = "caml_xml_tree_tagged_child" "noalloc" let tagged_child t n tag = tree_tagged_child t.doc n tag -external tree_select_child : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t = "caml_xml_tree_select_child" "noalloc" +external tree_select_child : tree -> [`Tree ] Node.t -> tag_list -> [`Tree] Node.t = "caml_xml_tree_select_child" "noalloc" let select_child t n tag_set = tree_select_child t.doc n tag_set external tree_last_child : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_last_child" "noalloc" @@ -264,14 +259,13 @@ let next_sibling t n = tree_next_sibling t.doc n external tree_next_element : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_next_element" "noalloc" let next_element t n = tree_next_element t.doc n -external tree_next_node_before : tree -> [`Tree] Node.t -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_next_node_before" "noalloc" -let next_node_before t n ctx = tree_next_node_before t.doc n ctx -external tree_tagged_following_sibling : tree -> [`Tree] Node.t -> Tag.t -> [`Tree] Node.t = "caml_xml_tree_tagged_following_sibling" "noalloc" -let tagged_following_sibling t n tag = tree_tagged_following_sibling t.doc n tag +external tree_tagged_sibling : tree -> [`Tree] Node.t -> Tag.t -> [`Tree] Node.t = "caml_xml_tree_tagged_sibling" "noalloc" +let tagged_sibling t n tag = tree_tagged_sibling t.doc n tag + -external tree_select_following_sibling : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t = "caml_xml_tree_select_following_sibling" "noalloc" -let select_following_sibling t n tag_set = tree_select_following_sibling t.doc n tag_set +external tree_select_sibling : tree -> [`Tree ] Node.t -> tag_list -> [`Tree] Node.t = "caml_xml_tree_select_sibling" "noalloc" +let select_sibling t n tag_set = tree_select_sibling t.doc n tag_set external tree_prev_sibling : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_prev_sibling" "noalloc" let prev_sibling t n = tree_prev_sibling t.doc n @@ -284,23 +278,18 @@ let tagged_descendant t n tag = tree_tagged_descendant t.doc n tag external tree_tagged_next : tree -> [`Tree ] Node.t -> Tag.t -> [`Tree ] Node.t = "caml_xml_tree_tagged_next" "noalloc" let tagged_next t n tag = tree_tagged_next t.doc n tag -external tree_select_descendant : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t = "caml_xml_tree_select_descendant" "noalloc" +external tree_select_descendant : tree -> [`Tree ] Node.t -> tag_list -> [`Tree] Node.t = "caml_xml_tree_select_descendant" "noalloc" let select_descendant t n tag_set = tree_select_descendant t.doc n tag_set external tree_tagged_following_before : tree -> [`Tree ] Node.t -> Tag.t -> [`Tree ] Node.t -> [`Tree ] Node.t = "caml_xml_tree_tagged_following_before" "noalloc" let tagged_following_before t n tag ctx = tree_tagged_following_before t.doc n tag ctx -external tree_select_following_before : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_select_following_before" "noalloc" +external tree_select_following_before : tree -> [`Tree ] Node.t -> tag_list -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_select_following_before" "noalloc" let select_following_before t n tag_set ctx = tree_select_following_before t.doc n tag_set ctx external tree_parent : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_parent" "noalloc" let parent t n = tree_parent t.doc n -external tree_binary_parent : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_binary_parent" - "noalloc" -let binary_parent t n = tree_binary_parent t.doc n - - external tree_tag : tree -> [`Tree] Node.t -> Tag.t = "caml_xml_tree_tag" "noalloc" let tag t n = tree_tag t.doc n @@ -326,7 +315,7 @@ open Format let dump_tag_table t = let tag = ref 0 in let printer ppf set = - Logger.print ppf "%s: %a" + Logger.print ppf "%s: %a" (Tag.to_string !tag) TagSet.print (TagSet.inj_positive set); incr tag in