X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=tree.ml;h=f21015df218da2570ac04f2e4286f72532fe1aa4;hb=cf6d366b25132eea7b0f1966c11d034d748af0fa;hp=26dc7704f87e98e88fa7c9af3aa0e43fb72f4a11;hpb=329088598ab63bc2d67ff0dfc4f54e90f5d4f283;p=SXSI%2Fxpathcomp.git diff --git a/tree.ml b/tree.ml index 26dc770..f21015d 100644 --- a/tree.ml +++ b/tree.ml @@ -35,7 +35,7 @@ external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shre external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string" external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print" external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save" -external tree_load : Unix.file_descr -> tree = "caml_xml_tree_load" +external tree_load : Unix.file_descr -> bool -> int -> tree = "caml_xml_tree_load" external nullt : unit -> 'a node = "caml_xml_tree_nullt" @@ -53,7 +53,10 @@ external text_is_contains : tree -> string -> bool = "caml_text_collection_is_co external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains" external text_count : tree -> string -> int = "caml_text_collection_count" external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" -external text_unsorted_contains : tree -> string -> unit = "caml_text_collection_unsorted_contains" +external text_startswith : tree -> string -> [`Text ] node array = "caml_text_collection_startswith" +external text_endswith : tree -> string -> [`Text ] node array = "caml_text_collection_endswith" +external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals" +external text_unsorted_contains : tree -> string -> [`Text ] node array = "caml_text_collection_unsorted_contains" external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" @@ -222,10 +225,14 @@ let in_array _ i = with Not_found -> false -let init_contains t s = - let a = text_contains t.doc s +let init_textfun f t s = + let a = match f with + | `CONTAINS -> text_contains t.doc s + | `STARTSWITH -> text_startswith t.doc s + | `ENDSWITH -> text_endswith t.doc s + | `EQUALS -> text_equals t.doc s in - Array.fast_sort (compare) a; + (*Array.fast_sort (compare) a; *) contains_array := a; Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array @@ -380,7 +387,7 @@ let save t str = close_out out_c ;; -let load ?(sample=64) str = +let load ?(sample=64) ?(load_text=true) str = let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in let in_c = Unix.in_channel_of_descr fd in let _ = set_binary_mode_in in_c true in @@ -407,7 +414,7 @@ let load ?(sample=64) str = let _ = Printf.eprintf "\nLoading tag table : " in let ntable = time (load_table) () in ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET); - let tree = { doc = tree_load fd; + let tree = { doc = tree_load fd load_text sample; ttable = ntable;} in close_in in_c; tree