external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string"
external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print"
external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save"
-external tree_load : Unix.file_descr -> tree = "caml_xml_tree_load"
+external tree_load : Unix.file_descr -> bool -> int -> tree = "caml_xml_tree_load"
external nullt : unit -> 'a node = "caml_xml_tree_nullt"
external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains"
external text_count : tree -> string -> int = "caml_text_collection_count"
external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
-external text_unsorted_contains : tree -> string -> unit = "caml_text_collection_unsorted_contains"
+external text_startswith : tree -> string -> [`Text ] node array = "caml_text_collection_startswith"
+external text_endswith : tree -> string -> [`Text ] node array = "caml_text_collection_endswith"
+external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals"
+external text_unsorted_contains : tree -> string -> [`Text ] node array = "caml_text_collection_unsorted_contains"
external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text"
external tree_root : tree -> [`Tree] node = "caml_xml_tree_root"
with
Not_found -> false
-let init_contains t s =
- let a = text_contains t.doc s
+let init_textfun f t s =
+ let a = match f with
+ | `CONTAINS -> text_contains t.doc s
+ | `STARTSWITH -> text_startswith t.doc s
+ | `ENDSWITH -> text_endswith t.doc s
+ | `EQUALS -> text_equals t.doc s
in
- Array.fast_sort (compare) a;
+ (*Array.fast_sort (compare) a; *)
contains_array := a;
Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
close_out out_c
;;
-let load ?(sample=64) str =
+let load ?(sample=64) ?(load_text=true) str =
let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
let in_c = Unix.in_channel_of_descr fd in
let _ = set_binary_mode_in in_c true in
let _ = Printf.eprintf "\nLoading tag table : " in
let ntable = time (load_table) () in
ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
- let tree = { doc = tree_load fd;
+ let tree = { doc = tree_load fd load_text sample;
ttable = ntable;}
in close_in in_c;
tree