X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=f21015df218da2570ac04f2e4286f72532fe1aa4;hb=cf6d366b25132eea7b0f1966c11d034d748af0fa;hp=1ff20824e5ded8881830c062d4a7c950f12f0ad2;hpb=be1caa5c46009c13241cc48ed34a36ee2936ef87;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index 1ff2082..f21015d 100644 --- a/tree.ml +++ b/tree.ml @@ -20,6 +20,11 @@ type tree type 'a node = private int type node_kind = [`Text | `Tree ] +type t = { + doc : tree; + ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; +} + external inode : 'a node -> int = "%identity" external nodei : int -> 'a node = "%identity" let compare_node x y = (inode x) - (inode y) @@ -30,7 +35,7 @@ external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shre external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string" external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print" external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save" -external tree_load : Unix.file_descr -> tree = "caml_xml_tree_load" +external tree_load : Unix.file_descr -> bool -> int -> tree = "caml_xml_tree_load" external nullt : unit -> 'a node = "caml_xml_tree_nullt" @@ -48,7 +53,10 @@ external text_is_contains : tree -> string -> bool = "caml_text_collection_is_co external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains" external text_count : tree -> string -> int = "caml_text_collection_count" external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" -external text_unsorted_contains : tree -> string -> unit = "caml_text_collection_unsorted_contains" +external text_startswith : tree -> string -> [`Text ] node array = "caml_text_collection_startswith" +external text_endswith : tree -> string -> [`Text ] node array = "caml_text_collection_endswith" +external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals" +external text_unsorted_contains : tree -> string -> [`Text ] node array = "caml_text_collection_unsorted_contains" external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" @@ -63,10 +71,10 @@ external tree_parent_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tr external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc" external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc" external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc" -external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" +external tree_first_element : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc" external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc" -external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc" +external tree_next_element : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc" external tree_tagged_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_sibling" "noalloc" external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc" @@ -79,8 +87,7 @@ external tree_tag_id : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" let tree_is_last t n = equal_node nil (tree_next_sibling t n) - -(*external tree_prev_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" "noalloc" *) + external tree_my_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" "noalloc" external tree_my_text_unsafe : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text_unsafe" "noalloc" @@ -100,6 +107,7 @@ external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_x external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc" external tree_tagged_desc : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" "noalloc" external tree_tagged_foll_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" "noalloc" +external tree_tagged_foll_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_before" "noalloc" external tree_subtree_tags : tree -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" "noalloc" @@ -112,6 +120,7 @@ external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] n external tree_select_foll_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_foll_sibling" "noalloc" external tree_select_desc : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_desc" "noalloc" external tree_select_foll_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_below" "noalloc" +external tree_select_foll_before : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_before" "noalloc" module HPtset = Hashtbl.Make(Ptset.Int) @@ -128,10 +137,7 @@ let ptset_to_vector s = HPtset.add vector_htbl s v; v -type t = { - doc : tree; - ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t; -} + let subtree_size t i = tree_subtree_size t.doc i let subtree_elements t i = tree_subtree_elements t.doc i let text_size t = text_size t.doc @@ -219,10 +225,14 @@ let in_array _ i = with Not_found -> false -let init_contains t s = - let a = text_contains t.doc s +let init_textfun f t s = + let a = match f with + | `CONTAINS -> text_contains t.doc s + | `STARTSWITH -> text_startswith t.doc s + | `ENDSWITH -> text_endswith t.doc s + | `EQUALS -> text_equals t.doc s in - Array.fast_sort (compare) a; + (*Array.fast_sort (compare) a; *) contains_array := a; Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array @@ -377,7 +387,7 @@ let save t str = close_out out_c ;; -let load ?(sample=64) str = +let load ?(sample=64) ?(load_text=true) str = let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in let in_c = Unix.in_channel_of_descr fd in let _ = set_binary_mode_in in_c true in @@ -404,7 +414,7 @@ let load ?(sample=64) str = let _ = Printf.eprintf "\nLoading tag table : " in let ntable = time (load_table) () in ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET); - let tree = { doc = tree_load fd; + let tree = { doc = tree_load fd load_text sample; ttable = ntable;} in close_in in_c; tree @@ -442,7 +452,7 @@ let is_binary_ancestor t n1 n2 = let parent t n = tree_parent t.doc n let first_child t = (); fun n -> tree_first_child t.doc n -let first_element t = (); fun n -> tree_first_element t.doc n +let first_element t = (); fun n -> tree_first_element t n (* these function will be called in two times: first partial application on the tag, then application of the tag and the tree, then application of @@ -456,7 +466,7 @@ let select_child t = fun ts -> fun n -> tree_select_child t.doc n v let next_sibling t = (); fun n -> tree_next_sibling t.doc n -let next_element t = (); fun n -> tree_next_element t.doc n +let next_element t = (); fun n -> tree_next_element t n let tagged_sibling t tag = (); fun n -> tree_tagged_sibling t.doc n tag @@ -465,7 +475,7 @@ let select_sibling t = fun ts -> fun n -> tree_select_foll_sibling t.doc n v let next_sibling_ctx t = (); fun n _ -> tree_next_sibling t.doc n -let next_element_ctx t = (); fun n _ -> tree_next_element t.doc n +let next_element_ctx t = (); fun n _ -> tree_next_element t n let tagged_sibling_ctx t tag = (); fun n _ -> tree_tagged_sibling t.doc n tag let select_sibling_ctx t = fun ts ->