type 'a node = private int
type node_kind = [`Text | `Tree ]
-type t = {
- doc : tree;
- ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t;
+type t = {
+ doc : tree;
+ children : Ptset.Int.t array;
+ siblings : Ptset.Int.t array;
+ descendants: Ptset.Int.t array;
+ followings: Ptset.Int.t array;
}
-external inode : 'a node -> int = "%identity"
-external nodei : int -> 'a node = "%identity"
+external inode : 'a node -> int = "%identity"
+external nodei : int -> 'a node = "%identity"
let compare_node x y = (inode x) - (inode y)
let equal_node : 'a node -> 'a node -> bool = (==)
-
-external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri"
+
+external parse_xml_uri : string -> int -> bool -> bool -> tree = "caml_call_shredder_uri"
external parse_xml_string : string -> int -> bool -> bool -> tree = "caml_call_shredder_string"
external tree_print_xml_fast3 : tree -> [`Tree ] node -> Unix.file_descr ->unit = "caml_xml_tree_print"
external tree_save : tree -> Unix.file_descr -> string -> unit = "caml_xml_tree_save"
external tree_load : Unix.file_descr -> string -> bool -> int -> tree = "caml_xml_tree_load"
-
+
external nullt : unit -> 'a node = "caml_xml_tree_nullt"
let nil : [`Tree ] node = nodei ~-1
let nulldoc : [`Text ] node = nodei ~-1
let root : [`Tree ] node = nodei 0
-external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text"
-external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text"
+external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text"
+external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text"
let text_is_empty t n = (equal_node nulldoc n) || text_is_empty t n
-external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix"
-external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix"
-external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal"
-external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains"
-external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan"
+external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix"
+external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix"
+external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal"
+external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains"
+external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan"
external text_count : tree -> string -> int = "caml_text_collection_count"
external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix"
external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains"
external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan"
-
+
external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc"
external tree_size : tree -> int = "caml_xml_tree_size" "noalloc"
+external tree_num_tags : tree -> int = "caml_xml_tree_num_tags" "noalloc"
external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc"
external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" "noalloc"
external tree_subtree_tags : tree -> [`Tree] node -> Tag.t -> int = "caml_xml_tree_subtree_elements" "noalloc"
external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc"
external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc"
external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc"
-external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc"
+external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc"
type unordered_set
external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc"
external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc"
-external benchmark_jump : tree -> Tag.t -> unit = "caml_benchmark_jump" "noalloc"
+external benchmark_jump : tree -> Tag.t -> int = "caml_benchmark_jump" "noalloc"
let benchmark_jump t s = benchmark_jump t.doc s
external benchmark_fcns : tree -> int = "caml_benchmark_fcns" "noalloc"
+external benchmark_fene : tree -> int = "caml_benchmark_fene" "noalloc"
+external benchmark_iter : tree -> int = "caml_benchmark_iter" "noalloc"
let benchmark_fcns t = benchmark_fcns t.doc
+let benchmark_fene t = benchmark_fene t.doc
+
+let benchmark_iter t = benchmark_iter t.doc
+
external benchmark_lcps : tree -> unit = "caml_benchmark_lcps" "noalloc"
let benchmark_lcps t = benchmark_lcps t.doc
let vector_htbl = HPtset.create MED_H_SIZE
let ptset_to_vector s =
- try
+ try
HPtset.find vector_htbl s
with
Not_found ->
let _ = Ptset.Int.iter (fun e -> unordered_set_insert v e) s in
HPtset.add vector_htbl s v; v
-
+
let subtree_size t i = tree_subtree_size t.doc i
let subtree_elements t i = tree_subtree_elements t.doc i
let text_size t = text_size t.doc
-module MemUnion = Hashtbl.Make (struct
- type t = Ptset.Int.t*Ptset.Int.t
- let equal (x,y) (z,t) = x == z && y == t
- let equal a b = equal a b || equal b a
- let hash (x,y) = (* commutative hash *)
- let x = Uid.to_int (Ptset.Int.uid x)
- and y = Uid.to_int (Ptset.Int.uid y)
- in
- if x <= y then HASHINT2(x,y) else HASHINT2(y,x)
- end)
-
-module MemAdd = Hashtbl.Make (
- struct
- type t = Tag.t*Ptset.Int.t
- let equal (x,y) (z,t) = (x == z)&&(y == t)
- let hash (x,y) = HASHINT2(x,Uid.to_int (Ptset.Int.uid y))
- end)
-
-module MemUpdate = struct
-include Hashtbl.Make (
- struct
- type t = Tag.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t
- let equal (a1,b1,c1,d1,e1) (a2,b2,c2,d2,e2) = a1==a2 &&
- b1 == b2 && c1 == c2 && d1 == d2 && e1 == e2
- let hash (a,b,c,d,e) =
- HASHINT4(HASHINT2(a,Uid.to_int (Ptset.Int.uid b)),
- Uid.to_int (Ptset.Int.uid c),
- Uid.to_int (Ptset.Int.uid d),
- Uid.to_int (Ptset.Int.uid e))
- end)
-end
-
-let collect_tags tree =
- let _ = Printf.eprintf "Collecting Tags\n%!" in
- let h_union = MemUnion.create BIG_H_SIZE in
- let pt_cup s1 s2 =
- try
- MemUnion.find h_union (s1,s2)
- with
- | Not_found -> let s = Ptset.Int.union s1 s2
- in
- MemUnion.add h_union (s1,s2) s;s
- in
- let h_add = MemAdd.create BIG_H_SIZE in
- let pt_add t s =
- try MemAdd.find h_add (t,s)
- with
- | Not_found -> let r = Ptset.Int.add t s in
- MemAdd.add h_add (t,s) r;r
- in
- let h = Hashtbl.create BIG_H_SIZE in
- let update t sc sb ss sa =
- let schild,sbelow,ssibling,safter =
- try
- Hashtbl.find h t
- with
- | Not_found ->
- (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty)
- in
- Hashtbl.replace h t
- (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
+let rec fold_siblings tree f node acc =
+ if node == nil then acc else fold_siblings tree f (tree_next_sibling tree node) (f node acc)
+module TS =
+ struct
+ type t = bool array
+ let create n = Array.create n false
+ let add e a = a.(e) <- true; a
+ let merge a b =
+ for i = 0 to Array.length a - 1 do
+ a.(i) <- a.(i) || b.(i)
+ done
+ let clear a =
+ for i = 0 to Array.length a - 1 do
+ a.(i) <- false;
+ done
+
+ let to_ptset a =
+ let r = ref Ptset.Int.empty in
+ for i = 0 to Array.length a - 1 do
+ r := Ptset.Int.add i !r;
+ done;
+ !r
+ end
+
+
+let collect_children_siblings tree =
+ let ntags = (tree_num_tags tree) in
+ let () = Printf.eprintf ">>>length: %i\n%!" ntags in
+ let table_c = Array.init (tree_num_tags tree) (fun _ -> TS.create ntags) in
+ let table_n = Array.init (tree_num_tags tree) (fun _ -> TS.create ntags) in
+ let acc_tag n s = TS.add (tree_tag tree n) s in
+ let count = ref 0 in
+ let size = tree_subtree_size tree root in
+ let tmp = TS.create ntags in
+ let rec loop node =
+ if node == nil then ()
+ else
+ let () = if !count mod 10000 == 0 then
+ Printf.eprintf "Node %i / %i\n%!" !count size;
+ in
+ let () = if !count mod 1000000 == 0 then Gc.compact() in
+ let () = count := !count + 1 in
+ let tag = tree_tag tree node in
+ let () = TS.clear tmp in
+ let children =
+ fold_siblings tree
+ acc_tag
+ (tree_first_child tree node) tmp
+ in
+ let () = TS.merge table_c.(tag) children in
+ let () = TS.clear tmp in
+ let siblings =
+ fold_siblings tree
+ acc_tag
+ (tree_next_sibling tree node) tmp
+ in
+ TS.merge table_n.(tag) siblings;
+ loop (tree_first_child tree node);
+ loop (tree_next_sibling tree node)
in
- let rec loop right id acc_after =
- if id == nil
- then Ptset.Int.empty,Ptset.Int.empty,acc_after else
- let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in
- let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in
- let tag = tree_tag tree id in
- update tag child1 desc1 sibling2 after2;
- ( pt_add tag sibling2,
- pt_add tag (pt_cup desc1 desc2),
- if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after )
+ loop root;
+ ( Array.map TS.to_ptset table_c,
+ Array.map TS.to_ptset table_n )
+
+let collect_children_siblings tree =
+ let table_c = Array.create (tree_num_tags tree) Ptset.Int.empty in
+ let table_n = Array.copy table_c in
+ let rec loop node =
+ if node == nil then Ptset.Int.empty
+ else
+ let children = loop (tree_first_child tree node) in
+ let tag = tree_tag tree node in
+ let () = table_c.(tag) <- Ptset.Int.union table_c.(tag) children in
+ let siblings = loop (tree_next_sibling tree node) in
+ Ptset.Int.add tag siblings
in
- let _ = loop false (tree_root tree) Ptset.Int.empty in
- let _ = Printf.eprintf "Finished\n%!" in
- h
+ ignore (loop root);
+ table_c, table_n
+let collect_descendants tree =
+ let table_d = Array.create (tree_num_tags tree) Ptset.Int.empty in
+ let rec loop node =
+ if node == nil then Ptset.Int.empty
+ else
+ let d1 = loop (tree_first_child tree node) in
+ let d2 = loop (tree_next_sibling tree node) in
+ let tag = tree_tag tree node in
+ table_d.(tag) <- Ptset.Int.union table_d.(tag) d1;
+ Ptset.Int.add tag (Ptset.Int.union d1 d2)
+ in
+ ignore (loop root);
+ table_d
+
+let collect_followings tree =
+ let table_f = Array.create (tree_num_tags tree) Ptset.Int.empty in
+ let rec loop node acc =
+ if node == nil then acc else
+ let f1 = loop (tree_next_sibling tree node) acc in
+ let f2 = loop (tree_first_child tree node) f1 in
+ let tag = tree_tag tree node in
+ table_f.(tag) <- Ptset.Int.union table_f.(tag) f1;
+ Ptset.Int.add tag (Ptset.Int.union f1 f2)
+ in
+ ignore (loop root Ptset.Int.empty);
+ table_f
+
+let collect_tags tree =
+ let c,n = time (collect_children_siblings) tree ~msg:"Collecting child and sibling tags" in
+ let d = time collect_descendants tree ~msg:"Collecting descendant tags" in
+ let f = time collect_followings tree ~msg:"Collecting following tags" in
+ c,n,d,f
+
let contains_array = ref [| |]
-let contains_index = Hashtbl.create 4096
+let contains_index = Hashtbl.create 4096
let in_array _ i =
try
Hashtbl.find contains_index i
with
Not_found -> false
-let init_textfun f t s =
- let a = match f with
- | `CONTAINS -> text_contains t.doc s
- | `STARTSWITH -> text_prefix t.doc s
- | `ENDSWITH -> text_suffix t.doc s
- | `EQUALS -> text_equals t.doc s
+let init_textfun f t s =
+ let a = match f with
+ | `CONTAINS -> text_contains t.doc s
+ | `STARTSWITH -> text_prefix t.doc s
+ | `ENDSWITH -> text_suffix t.doc s
+ | `EQUALS -> text_equals t.doc s
in
(*Array.fast_sort (compare) a; *)
contains_array := a;
Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array
-
+
let count_contains t s = text_count_contains t.doc s
let init_naive_contains t s =
let i,j = tree_doc_ids t.doc (tree_root t.doc)
in
let regexp = Str.regexp_string s in
- let matching arg =
+ let matching arg =
try
let _ = Str.search_forward regexp arg 0;
in true
with _ -> false
in
- let rec loop n acc l =
+ let rec loop n acc l =
if n >= j then acc,l
else
let s = text_get_text t.doc n
in
- if matching s
- then loop (nodei ((inode n)+1)) (n::acc) (l+1)
+ if matching s
+ then loop (nodei ((inode n)+1)) (n::acc) (l+1)
else loop (nodei ((inode n)+1)) acc l
in
let acc,l = loop i [] 0 in
else loop (idx+1) x y
in
if a.(0) > j || a.(l-1) < i then nulldoc
- else loop !last_idx i j
-
-let text_below tree t =
+ else loop !last_idx i j
+
+let text_below tree t =
let l = Array.length !contains_array in
let i,j = tree_doc_ids tree.doc t in
let id = if l == 0 then i else (array_find !contains_array i j) in
tree_parent_node tree.doc id
-
+
let text_next tree t root =
let l = Array.length !contains_array in
- let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
- let _,j = tree_doc_ids tree.doc root in
+ let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
+ let _,j = tree_doc_ids tree.doc root in
let id = if l == 0 then if inf > j then nulldoc else inf
else array_find !contains_array inf j
- in
+ in
tree_parent_node tree.doc id
module DocIdSet = struct
include Set.Make (struct type t = [`Text] node
let compare = compare_node end)
-
+
end
let is_nil t = t == nil
let node_of_t t =
let _ = Tag.init (Obj.magic t) in
- let table = collect_tags t
- in (*
- let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
- Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
- Printf.eprintf "Child tags: ";
- Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) c;
- Printf.eprintf "\nDescendant tags: ";
- Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) d;
- Printf.eprintf "\nNextSibling tags: ";
- Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) ns;
- Printf.eprintf "\nFollowing tags: ";
- Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) f;
- Printf.eprintf "\n\n%!";) table
+ let c,n,d,f = collect_tags t
in
-
- *)
- { doc= t;
- ttable = table;
+ { doc= t;
+ children = c;
+ siblings = n;
+ descendants = d;
+ followings = f
+
}
let finalize _ = Printf.eprintf "Release the string list !\n%!"
let parse f str =
node_of_t
- (f str
- !Options.sample_factor
+ (f str
+ !Options.sample_factor
!Options.index_empty_texts
!Options.disable_text_collection)
-
+
let parse_xml_uri str = parse parse_xml_uri str
let parse_xml_string str = parse parse_xml_string str
let size t = tree_size t.doc;;
-
+
external pool : tree -> Tag.pool = "%identity"
let magic_string = "SXSI_INDEX"
-let version_string = "2"
+let version_string = "3"
let pos fd =
Unix.lseek fd 0 Unix.SEEK_CUR
let pr_pos fd = Printf.eprintf "At position %i\n%!" (pos fd)
-let write fd s =
+let write fd s =
let sl = String.length s in
let ssl = Printf.sprintf "%020i" sl in
ignore (Unix.write fd ssl 0 20);
let buffer = String.create size in
let _ = really_read fd buffer 0 size in
buffer
-
+
+let save_tag_table channel t =
+ let t = Array.map (fun s -> Array.of_list (Ptset.Int.elements s)) t in
+ Marshal.to_channel channel t []
let save t str =
let fd = Unix.openfile str [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in
output_char out_c '\n';
output_string out_c version_string;
output_char out_c '\n';
- Marshal.to_channel out_c t.ttable [ ];
+ save_tag_table out_c t.children;
+ save_tag_table out_c t.siblings;
+ save_tag_table out_c t.descendants;
+ save_tag_table out_c t.followings;
(* we need to move the fd to the correct position *)
flush out_c;
ignore (Unix.lseek fd (pos_out out_c) Unix.SEEK_SET);
tree_save t.doc fd str;
close_out out_c
;;
+let load_tag_table channel =
+ let table : int array array = Marshal.from_channel channel in
+ Array.map (fun a -> Ptset.Int.from_list (Array.to_list a)) table
-let load ?(sample=64) ?(load_text=true) str =
+let load ?(sample=64) ?(load_text=true) str =
let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
let in_c = Unix.in_channel_of_descr fd in
let _ = set_binary_mode_in in_c true in
- let load_table () =
+ let load_table () =
(let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
(let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
- let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
- Marshal.from_channel in_c
- in
- let ntable = Hashtbl.create (Hashtbl.length table) in
- Hashtbl.iter (fun k (s1,s2,s3,s4) ->
- let ss1 = Ptset.Int.fold (Ptset.Int.add) s1 Ptset.Int.empty
- and ss2 = Ptset.Int.fold (Ptset.Int.add) s2 Ptset.Int.empty
- and ss3 = Ptset.Int.fold (Ptset.Int.add) s3 Ptset.Int.empty
- and ss4 = Ptset.Int.fold (Ptset.Int.add) s4 Ptset.Int.empty
- in Hashtbl.add ntable k (ss1,ss2,ss3,ss4)
- ) table;
- Hashtbl.clear table;
- (* The in_channel read a chunk of fd, so we might be after
- the start of the XMLTree save file. Reset to the correct
- position *)
- ntable
+ let c = load_tag_table in_c in
+ let s = load_tag_table in_c in
+ let d = load_tag_table in_c in
+ let f = load_tag_table in_c in
+ c,s,d,f
in
let _ = Printf.eprintf "\nLoading tag table : " in
- let ntable = time (load_table) () in
+ let c,s,d,f = time (load_table) () in
ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
let tree = { doc = tree_load fd str load_text sample;
- ttable = ntable;}
+ children = c;
+ siblings = s;
+ descendants = d;
+ followings = f
+ }
in close_in in_c;
tree
-
+
let tag_pool t = pool t.doc
-
+
let compare = compare_node
let equal a b = a == b
-
+
let nts = function
-1 -> "Nil"
| i -> Printf.sprintf "Node (%i)" i
-
+
let dump_node t = nts (inode t)
let is_left t n = tree_is_first_child t.doc n
-let is_below_right t n1 n2 =
- tree_is_ancestor t.doc (tree_parent t.doc n1) n2
+let is_below_right t n1 n2 =
+ tree_is_ancestor t.doc (tree_parent t.doc n1) n2
&& not (tree_is_ancestor t.doc n1 n2)
let is_binary_ancestor t n1 n2 =
n2 > n1 && n2 < fin
(* (is_below_right t n1 n2) ||
(tree_is_ancestor t.doc n1 n2) *)
-
+
let parent t n = tree_parent t.doc n
let first_child t = let doc = t.doc in ();fun n -> tree_first_child doc n
let first_element t = let doc = t.doc in (); fun n -> tree_first_element doc n
-
+let first_element t n = tree_first_element t.doc n
(* these function will be called in two times: first partial application
on the tag, then application of the tag and the tree, then application of
the other arguments. We use the trick to let the compiler optimize application
let next_sibling t = let doc = t.doc in (); fun n -> tree_next_sibling doc n
let next_element t = let doc = t.doc in (); fun n -> tree_next_element doc n
+let next_element t n = tree_next_element t.doc n
let tagged_following_sibling t tag = (); fun n -> tree_tagged_following_sibling t.doc n tag
let tagged_following_sibling_below t tag = (); fun n _ -> tree_tagged_following_sibling t.doc n tag
-let select_following_sibling_below t = fun ts ->
+let select_following_sibling_below t = fun ts ->
let v = (ptset_to_vector ts) in ();
fun n _ -> tree_select_following_sibling t.doc n v
let id t n = tree_node_xml_id t.doc n
-
+
let tag t n = if n == nil then Tag.nullt else tree_tag t.doc n
-let tagged_descendant t tag =
- let doc = t.doc in (); fun n -> tree_tagged_descendant doc n tag
+let tagged_descendant t tag =
+ let doc = t.doc in (); fun n -> tree_tagged_descendant doc n tag
-let select_descendant t = fun ts ->
+let select_descendant t = fun ts ->
let v = (ptset_to_vector ts) in ();
fun n -> tree_select_descendant t.doc n v
else loop (idx+1) x y
in
if a.(0) > j || a.(l-1) < i then nil
- else loop !last_idx i j
+ else loop !last_idx i j
let stack = ref []
let init_stack () = stack := []
let push x = stack:= x::!stack
- let peek () = match !stack with
+ let peek () = match !stack with
p::_ -> p
| _ -> failwith "peek"
let pop () = match !stack with
p::r -> stack:=r; p
| _ -> failwith "pop"
- let next t = nodei ( (inode t) + 1 )
- let next2 t = nodei ( (inode t) + 2 )
- let next3 t = nodei ( (inode t) + 3 )
-
+ let next t = nodei ( (inode t) + 1 )
+ let next2 t = nodei ( (inode t) + 2 )
+ let next3 t = nodei ( (inode t) + 3 )
+
let print_xml_fast2 =
let _ = init_stack () in
- let h = Hashtbl.create MED_H_SIZE in
+ let h = Hashtbl.create MED_H_SIZE in
let tag_str t = try Hashtbl.find h t with
Not_found -> let s = Tag.to_string t in
Hashtbl.add h t s;s
in
- let h_att = Hashtbl.create MED_H_SIZE in
+ let h_att = Hashtbl.create MED_H_SIZE in
let att_str t = try Hashtbl.find h_att t with
Not_found -> let s = Tag.to_string t in
let attname = String.sub s 3 ((String.length s) -3) in
if t <= fin then
if tree_is_open tree t then
(* opening tag *)
- if tag == Tag.pcdata then
+ if tag == Tag.pcdata then
begin
output_string outc (text_get_text tree (tree_my_text_unsafe tree t));
loop (next2 t) (* skip closing $ *)
end
else
let tagstr = tag_str tag in
- let _ = output_char outc '<';
+ let _ = output_char outc '<';
output_string outc tagstr in
let t' = next t in
if tree_is_open tree t' then
let _ = push tagstr in
let tag' = tree_tag tree t' in
- if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in
+ if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in
output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag')
else (* closing with no content *)
let _ = output_string outc "/>" in
loop (next t);
end
and loop t = loop_tag t (tree_tag tree t)
- and loop_attr t n =
- if tree_is_open tree t then
+ and loop_attr t n =
+ if tree_is_open tree t then
let attname = att_str (tree_tag tree t) in
output_char outc ' ';
output_string outc attname;
in loop t
let print_xml_fast =
- let h = Hashtbl.create MED_H_SIZE in
+ let h = Hashtbl.create MED_H_SIZE in
let tag_str t = try Hashtbl.find h t with
Not_found -> let s = Tag.to_string t in
Hashtbl.add h t s;s
in
- let h_att = Hashtbl.create MED_H_SIZE in
+ let h_att = Hashtbl.create MED_H_SIZE in
let att_str t = try Hashtbl.find h_att t with
Not_found -> let s = Tag.to_string t in
let attname = String.sub s 3 ((String.length s) -3) in
Hashtbl.add h_att t attname;attname
in fun outc tree t ->
- let rec loop ?(print_right=true) t =
- if t != nil
- then
+ let rec loop ?(print_right=true) t =
+ if t != nil
+ then
let tagid = tree_tag tree.doc t in
if tagid==Tag.pcdata
- then
+ then
begin
let tid = tree_my_text_unsafe tree.doc t in
output_string outc (text_get_text tree.doc tid);
end
else
let tagstr = tag_str tagid in
- let l = first_child tree t
- and r = next_sibling tree t
+ let l = first_child tree t
+ and r = next_sibling tree t
in
output_char outc '<';
output_string outc tagstr;
if l == nil then output_string outc "/>"
- else
+ else
if (tag tree l) == Tag.attribute then
begin
loop_attributes (first_child tree l);
if (next_sibling tree l) == nil then output_string outc "/>"
- else
- begin
- output_char outc '>';
+ else
+ begin
+ output_char outc '>';
loop (next_sibling tree l);
output_string outc "</";
output_string outc tagstr;
end
else
begin
- output_char outc '>';
+ output_char outc '>';
loop l;
output_string outc "</";
output_string outc tagstr;
output_char outc '>';
end;
if print_right then loop r
- and loop_attributes a =
+ and loop_attributes a =
if a != nil
then
let attname = att_str (tag tree a) in
loop_attributes (next_sibling tree a)
in
loop ~print_right:false t
-
-
- let print_xml_fast outc tree t =
+
+
+ let print_xml_fast outc tree t =
if (tag tree t) = Tag.document_node then
print_xml_fast outc tree (first_child tree t)
- else print_xml_fast outc tree t
-
-let tags_children t tag =
- let a,_,_,_ = Hashtbl.find t.ttable tag in a
-let tags_below t tag =
- let _,a,_,_ = Hashtbl.find t.ttable tag in a
-let tags_siblings t tag =
- let _,_,a,_ = Hashtbl.find t.ttable tag in a
-let tags_after t tag =
- let _,_,_,a = Hashtbl.find t.ttable tag in a
+ else print_xml_fast outc tree t
+let tags_children t tag = t.children.(tag)
-let tags t tag = Hashtbl.find t.ttable tag
+let tags_below t tag = t.descendants.(tag)
+let tags_siblings t tag = t.siblings.(tag)
-let rec binary_parent t n =
- let r =
+let tags_after t tag = t.followings.(tag)
+
+
+
+let tags t tag =
+ t.children.(tag),
+ t.descendants.(tag),
+ t.siblings.(tag),
+ t.followings.(tag)
+
+
+let rec binary_parent t n =
+ let r =
if tree_is_first_child t.doc n
then tree_parent t.doc n
else tree_prev_sibling t.doc n
let get_text t n =
let tid = tree_my_text t.doc n in
- if tid == nulldoc then "" else
+ if tid == nulldoc then "" else
text_get_text t.doc tid
-let dump_tree fmt tree =
+let dump_tree fmt tree =
let rec loop t n =
if t != nil then
let tag = (tree_tag tree.doc t ) in
let tagstr = Tag.to_string tag in
let tab = String.make n ' ' in
- if tag == Tag.pcdata || tag == Tag.attribute_data
- then
- Format.fprintf fmt "%s<%s>%s</%s>\n"
+ if tag == Tag.pcdata || tag == Tag.attribute_data
+ then
+ Format.fprintf fmt "%s<%s>%s</%s>\n"
tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr
else begin
Format.fprintf fmt "%s<%s>\n" tab tagstr;
loop root 0
;;
-
+
let print_xml_fast3 t = tree_print_xml_fast3 t.doc
-let stats t =
+let stats t =
let tree = t.doc in
- let rec loop left node acc_d total_d num_leaves =
+ let rec loop left node acc_d total_d num_leaves =
if node == nil then
(acc_d+total_d,if left then num_leaves+1 else num_leaves)
else
let test_prefix t s = Array.length (text_prefix t.doc s)
let test_suffix t s = Array.length (text_suffix t.doc s)
-let test_contains t s = Array.length (text_contains t.doc s)
+let test_contains t s = Array.length (text_contains t.doc s)
let test_equals t s = Array.length (text_equals t.doc s)