external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string"
external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print"
external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string"
external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print"
-external tree_save : tree -> Unix.file_descr -> unit = "caml_xml_tree_save"
-external tree_load : Unix.file_descr -> bool -> int -> tree = "caml_xml_tree_load"
-
+external tree_save : tree -> Unix.file_descr -> string -> unit = "caml_xml_tree_save"
+external tree_load : Unix.file_descr -> string -> bool -> int -> tree = "caml_xml_tree_load"
+
external nullt : unit -> 'a node = "caml_xml_tree_nullt"
let nil : [`Tree ] node = nodei ~-1
let nulldoc : [`Text ] node = nodei ~-1
let root : [`Tree ] node = nodei 0
external nullt : unit -> 'a node = "caml_xml_tree_nullt"
let nil : [`Tree ] node = nodei ~-1
let nulldoc : [`Text ] node = nodei ~-1
let root : [`Tree ] node = nodei 0
-external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text"
-external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text"
+external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text"
+external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text"
-external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix"
-external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix"
-external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal"
-external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains"
-external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan"
+external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix"
+external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix"
+external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal"
+external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains"
+external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan"
external text_count : tree -> string -> int = "caml_text_collection_count"
external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix"
external text_count : tree -> string -> int = "caml_text_collection_count"
external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix"
external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan"
external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan"
external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc"
external tree_size : tree -> int = "caml_xml_tree_size" "noalloc"
external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc"
external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc"
external tree_size : tree -> int = "caml_xml_tree_size" "noalloc"
external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc"
external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc"
external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc"
external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc"
external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc"
external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc"
external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc"
-external benchmark_fcns : tree -> unit = "caml_benchmark_fcns" "noalloc"
+external benchmark_fcns : tree -> int = "caml_benchmark_fcns" "noalloc"
+external benchmark_fene : tree -> int = "caml_benchmark_fene" "noalloc"
+external benchmark_iter : tree -> int = "caml_benchmark_iter" "noalloc"
external benchmark_lcps : tree -> unit = "caml_benchmark_lcps" "noalloc"
let benchmark_lcps t = benchmark_lcps t.doc
external benchmark_lcps : tree -> unit = "caml_benchmark_lcps" "noalloc"
let benchmark_lcps t = benchmark_lcps t.doc
let subtree_size t i = tree_subtree_size t.doc i
let subtree_elements t i = tree_subtree_elements t.doc i
let text_size t = text_size t.doc
let subtree_size t i = tree_subtree_size t.doc i
let subtree_elements t i = tree_subtree_elements t.doc i
let text_size t = text_size t.doc
type t = Ptset.Int.t*Ptset.Int.t
let equal (x,y) (z,t) = x == z && y == t
let equal a b = equal a b || equal b a
type t = Ptset.Int.t*Ptset.Int.t
let equal (x,y) (z,t) = x == z && y == t
let equal a b = equal a b || equal b a
type t = Tag.t*Ptset.Int.t
let equal (x,y) (z,t) = (x == z)&&(y == t)
let hash (x,y) = HASHINT2(x,Uid.to_int (Ptset.Int.uid y))
type t = Tag.t*Ptset.Int.t
let equal (x,y) (z,t) = (x == z)&&(y == t)
let hash (x,y) = HASHINT2(x,Uid.to_int (Ptset.Int.uid y))
type t = Tag.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t
let equal (a1,b1,c1,d1,e1) (a2,b2,c2,d2,e2) = a1==a2 &&
b1 == b2 && c1 == c2 && d1 == d2 && e1 == e2
type t = Tag.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t
let equal (a1,b1,c1,d1,e1) (a2,b2,c2,d2,e2) = a1==a2 &&
b1 == b2 && c1 == c2 && d1 == d2 && e1 == e2
HASHINT4(HASHINT2(a,Uid.to_int (Ptset.Int.uid b)),
Uid.to_int (Ptset.Int.uid c),
Uid.to_int (Ptset.Int.uid d),
HASHINT4(HASHINT2(a,Uid.to_int (Ptset.Int.uid b)),
Uid.to_int (Ptset.Int.uid c),
Uid.to_int (Ptset.Int.uid d),
| Not_found -> let s = Ptset.Int.union s1 s2
in
MemUnion.add h_union (s1,s2) s;s
| Not_found -> let s = Ptset.Int.union s1 s2
in
MemUnion.add h_union (s1,s2) s;s
try MemAdd.find h_add (t,s)
with
| Not_found -> let r = Ptset.Int.add t s in
MemAdd.add h_add (t,s) r;r
try MemAdd.find h_add (t,s)
with
| Not_found -> let r = Ptset.Int.add t s in
MemAdd.add h_add (t,s) r;r
- let update t sc sb ss sa =
- let schild,sbelow,ssibling,safter =
+ let update t sc sb ss sa =
+ let schild,sbelow,ssibling,safter =
(Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
in
(Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
in
- Hashtbl.replace h t
- (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
+ Hashtbl.replace h t
+ (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
if id == nil
then Ptset.Int.empty,Ptset.Int.empty,acc_after else
let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
let tag = tree_tag tree id in
update tag child1 desc1 sibling2 after2;
if id == nil
then Ptset.Int.empty,Ptset.Int.empty,acc_after else
let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
let tag = tree_tag tree id in
update tag child1 desc1 sibling2 after2;
pt_add tag (pt_cup desc1 desc2),
if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
in
pt_add tag (pt_cup desc1 desc2),
if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
in
-let init_textfun f t s =
- let a = match f with
- | `CONTAINS -> text_contains t.doc s
- | `STARTSWITH -> text_prefix t.doc s
- | `ENDSWITH -> text_suffix t.doc s
- | `EQUALS -> text_equals t.doc s
+let init_textfun f t s =
+ let a = match f with
+ | `CONTAINS -> text_contains t.doc s
+ | `STARTSWITH -> text_prefix t.doc s
+ | `ENDSWITH -> text_suffix t.doc s
+ | `EQUALS -> text_equals t.doc s
in
(*Array.fast_sort (compare) a; *)
contains_array := a;
Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
in
(*Array.fast_sort (compare) a; *)
contains_array := a;
Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
let count_contains t s = text_count_contains t.doc s
let init_naive_contains t s =
let i,j = tree_doc_ids t.doc (tree_root t.doc)
in
let regexp = Str.regexp_string s in
let count_contains t s = text_count_contains t.doc s
let init_naive_contains t s =
let i,j = tree_doc_ids t.doc (tree_root t.doc)
in
let regexp = Str.regexp_string s in
else loop (nodei ((inode n)+1)) acc l
in
let acc,l = loop i [] 0 in
else loop (nodei ((inode n)+1)) acc l
in
let acc,l = loop i [] 0 in
let l = Array.length !contains_array in
let i,j = tree_doc_ids tree.doc t in
let id = if l == 0 then i else (array_find !contains_array i j) in
tree_parent_node tree.doc id
let l = Array.length !contains_array in
let i,j = tree_doc_ids tree.doc t in
let id = if l == 0 then i else (array_find !contains_array i j) in
tree_parent_node tree.doc id
- let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
- let _,j = tree_doc_ids tree.doc root in
+ let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
+ let _,j = tree_doc_ids tree.doc root in
in (*
let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
in (*
let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
let parse_xml_uri str = parse parse_xml_uri str
let parse_xml_string str = parse parse_xml_string str
let size t = tree_size t.doc;;
let parse_xml_uri str = parse parse_xml_uri str
let parse_xml_string str = parse parse_xml_string str
let size t = tree_size t.doc;;
let sl = String.length s in
let ssl = Printf.sprintf "%020i" sl in
ignore (Unix.write fd ssl 0 20);
let sl = String.length s in
let ssl = Printf.sprintf "%020i" sl in
ignore (Unix.write fd ssl 0 20);
(* we need to move the fd to the correct position *)
flush out_c;
ignore (Unix.lseek fd (pos_out out_c) Unix.SEEK_SET);
(* we need to move the fd to the correct position *)
flush out_c;
ignore (Unix.lseek fd (pos_out out_c) Unix.SEEK_SET);
let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
let in_c = Unix.in_channel_of_descr fd in
let _ = set_binary_mode_in in_c true in
let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
let in_c = Unix.in_channel_of_descr fd in
let _ = set_binary_mode_in in_c true in
(let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
(let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
(let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
(let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
- Hashtbl.iter (fun k (s1,s2,s3,s4) ->
+ Hashtbl.iter (fun k (s1,s2,s3,s4) ->
let ss1 = Ptset.Int.fold (Ptset.Int.add) s1 Ptset.Int.empty
and ss2 = Ptset.Int.fold (Ptset.Int.add) s2 Ptset.Int.empty
and ss3 = Ptset.Int.fold (Ptset.Int.add) s3 Ptset.Int.empty
let ss1 = Ptset.Int.fold (Ptset.Int.add) s1 Ptset.Int.empty
and ss2 = Ptset.Int.fold (Ptset.Int.add) s2 Ptset.Int.empty
and ss3 = Ptset.Int.fold (Ptset.Int.add) s3 Ptset.Int.empty
let _ = Printf.eprintf "\nLoading tag table : " in
let ntable = time (load_table) () in
ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
let _ = Printf.eprintf "\nLoading tag table : " in
let ntable = time (load_table) () in
ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
let parent t n = tree_parent t.doc n
let first_child t = let doc = t.doc in ();fun n -> tree_first_child doc n
let first_element t = let doc = t.doc in (); fun n -> tree_first_element doc n
let parent t n = tree_parent t.doc n
let first_child t = let doc = t.doc in ();fun n -> tree_first_child doc n
let first_element t = let doc = t.doc in (); fun n -> tree_first_element doc n
(* these function will be called in two times: first partial application
on the tag, then application of the tag and the tree, then application of
the other arguments. We use the trick to let the compiler optimize application
(* these function will be called in two times: first partial application
on the tag, then application of the tag and the tree, then application of
the other arguments. We use the trick to let the compiler optimize application
let next_sibling t = let doc = t.doc in (); fun n -> tree_next_sibling doc n
let next_element t = let doc = t.doc in (); fun n -> tree_next_element doc n
let next_sibling t = let doc = t.doc in (); fun n -> tree_next_sibling doc n
let next_element t = let doc = t.doc in (); fun n -> tree_next_element doc n
fun n -> tree_select_following_sibling t.doc n v
let next_sibling_below t = (); fun n _ -> tree_next_sibling t.doc n
fun n -> tree_select_following_sibling t.doc n v
let next_sibling_below t = (); fun n _ -> tree_next_sibling t.doc n
let v = (ptset_to_vector ts) in ();
fun n _ -> tree_select_following_sibling t.doc n v
let id t n = tree_node_xml_id t.doc n
let v = (ptset_to_vector ts) in ();
fun n _ -> tree_select_following_sibling t.doc n v
let id t n = tree_node_xml_id t.doc n
- let next t = nodei ( (inode t) + 1 )
- let next2 t = nodei ( (inode t) + 2 )
- let next3 t = nodei ( (inode t) + 3 )
-
+ let next t = nodei ( (inode t) + 1 )
+ let next2 t = nodei ( (inode t) + 2 )
+ let next3 t = nodei ( (inode t) + 3 )
+
let att_str t = try Hashtbl.find h_att t with
Not_found -> let s = Tag.to_string t in
let attname = String.sub s 3 ((String.length s) -3) in
let att_str t = try Hashtbl.find h_att t with
Not_found -> let s = Tag.to_string t in
let attname = String.sub s 3 ((String.length s) -3) in
begin
output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
loop (next2 t) (* skip closing $ *)
end
else
let tagstr = tag_str tag in
begin
output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
loop (next2 t) (* skip closing $ *)
end
else
let tagstr = tag_str tag in
output_string outc tagstr in
let t' = next t in
if tree_is_open tree t' then
let _ = push tagstr in
let tag' = tree_tag tree t' in
output_string outc tagstr in
let t' = next t in
if tree_is_open tree t' then
let _ = push tagstr in
let tag' = tree_tag tree t' in
output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag')
else (* closing with no content *)
let _ = output_string outc "/>" in
output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag')
else (* closing with no content *)
let _ = output_string outc "/>" in
let att_str t = try Hashtbl.find h_att t with
Not_found -> let s = Tag.to_string t in
let attname = String.sub s 3 ((String.length s) -3) in
Hashtbl.add h_att t attname;attname
in fun outc tree t ->
let att_str t = try Hashtbl.find h_att t with
Not_found -> let s = Tag.to_string t in
let attname = String.sub s 3 ((String.length s) -3) in
Hashtbl.add h_att t attname;attname
in fun outc tree t ->
if (tag tree l) == Tag.attribute then
begin
loop_attributes (first_child tree l);
if (next_sibling tree l) == nil then output_string outc "/>"
if (tag tree l) == Tag.attribute then
begin
loop_attributes (first_child tree l);
if (next_sibling tree l) == nil then output_string outc "/>"
let a,_,_,_ = Hashtbl.find t.ttable tag in a
let a,_,_,_ = Hashtbl.find t.ttable tag in a
let _,a,_,_ = Hashtbl.find t.ttable tag in a
let _,a,_,_ = Hashtbl.find t.ttable tag in a
let _,_,a,_ = Hashtbl.find t.ttable tag in a
let _,_,a,_ = Hashtbl.find t.ttable tag in a
let _,_,_,a = Hashtbl.find t.ttable tag in a
let tags t tag = Hashtbl.find t.ttable tag
let _,_,_,a = Hashtbl.find t.ttable tag in a
let tags t tag = Hashtbl.find t.ttable tag
let rec loop t n =
if t != nil then
let tag = (tree_tag tree.doc t ) in
let tagstr = Tag.to_string tag in
let tab = String.make n ' ' in
let rec loop t n =
if t != nil then
let tag = (tree_tag tree.doc t ) in
let tagstr = Tag.to_string tag in
let tab = String.make n ' ' in
tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr
else begin
Format.fprintf fmt "%s<%s>\n" tab tagstr;
tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr
else begin
Format.fprintf fmt "%s<%s>\n" tab tagstr;
let test_prefix t s = Array.length (text_prefix t.doc s)
let test_suffix t s = Array.length (text_suffix t.doc s)
let test_prefix t s = Array.length (text_prefix t.doc s)
let test_suffix t s = Array.length (text_suffix t.doc s)