external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text"
external tree_root : tree -> [`Tree] node = "caml_xml_tree_root"
-
+external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size"
+
let tree_is_nil x = equal_node x nil
external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc"
doc : tree;
ttable : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t;
}
-
+let subtree_size t i = tree_subtree_size t.doc i
let text_size t = text_size t.doc
module MemUnion = Hashtbl.Make (struct
Hashtbl.replace h t
(pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa)
in
- let rec loop_right id acc_sibling acc_after=
+ let rec loop_right id acc_after =
if id == nil
- then (acc_sibling,acc_after)
+ then Ptset.Int.empty,Ptset.Int.empty,acc_after
else
- let sibling2,after2 = loop_right (tree_next_sibling tree id) acc_sibling acc_after in
- let child1,below1 = loop_left (tree_first_child tree id) after2 in
- let tag = tree_tag_id tree id in
- update tag child1 below1 sibling2 after2;
- (pt_add tag sibling2, (pt_add tag (pt_cup after2 below1)))
+ let sibling2,desc2,after2 = loop_right (tree_next_sibling tree id) acc_after in
+ let child1,desc1,after1 = loop_left (tree_first_child tree id) after2 in
+ let tag = tree_tag_id tree id in
+ update tag child1 desc1 sibling2 after2;
+ ( pt_add tag sibling2,
+ pt_add tag (pt_cup desc1 desc2),
+ pt_cup after1 (pt_cup desc1 desc2) )
and loop_left id acc_after =
- if id == nil
- then (Ptset.Int.empty,Ptset.Int.empty)
+ if id == nil
+ then Ptset.Int.empty,Ptset.Int.empty,acc_after
else
- let sibling2,after2 = loop_right (tree_next_sibling tree id) Ptset.Int.empty acc_after in
- let child1,below1 = loop_left (tree_first_child tree id) after2 in
- let tag = tree_tag_id tree id in
- update tag child1 below1 sibling2 after2;
- (pt_add tag sibling2,(pt_add tag (pt_cup after2 below1)))
+ let sibling2,desc2,after2 = loop_right (tree_next_sibling tree id) acc_after in
+ let child1,desc1,after1 = loop_left (tree_first_child tree id) after2 in
+ let tag = tree_tag_id tree id in
+ update tag child1 desc1 sibling2 after2;
+ (pt_add tag sibling2,
+ pt_add tag (pt_cup desc1 desc2),
+ acc_after )
in
let _ = loop_left (tree_root tree) Ptset.Int.empty in h
let _ = List.fold_left (fun cpt e -> a.(cpt) <- e; (cpt-1)) (l-1) acc
in
contains_array := a
+
+let last_idx = ref 0
+
+let array_find a i j =
+ let l = Array.length a in
+ let rec loop idx x y =
+ if x > y || idx >= l then nulldoc
+ else
+ if a.(idx) >= x then if a.(idx) > y then nulldoc else (last_idx := idx;a.(idx))
+ else loop (idx+1) x y
+ in
+ if a.(0) > j || a.(l-1) < i then nulldoc
+ else loop !last_idx i j
+let text_below tree t =
+ let l = Array.length !contains_array in
+ let i,j = tree_doc_ids tree.doc t in
+ let id = if l == 0 then i else (array_find !contains_array i j) in
+ tree_parent_doc tree.doc id
+
+let text_next tree t root =
+ let l = Array.length !contains_array in
+ let inf = nodei((inode(snd(tree_doc_ids tree.doc t)))+1) in
+ let _,j = tree_doc_ids tree.doc root in
+ let id = if l == 0 then if inf > j then nulldoc else inf
+ else array_find !contains_array inf j
+ in
+ tree_parent_doc tree.doc id
+
module DocIdSet = struct
let node_of_t t =
let _ = Tag.init (Obj.magic t) in
let table = collect_tags t
+ in (*
+ let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
+ Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
+ Printf.eprintf "Child tags: ";
+ Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) c;
+ Printf.eprintf "\nDescendant tags: ";
+ Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) d;
+ Printf.eprintf "\nNextSibling tags: ";
+ Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) ns;
+ Printf.eprintf "\nFollowing tags: ";
+ Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) f;
+ Printf.eprintf "\n\n%!";) table
in
+
+ *)
{ doc= t;
ttable = table;
}
external pool : tree -> Tag.pool = "%identity"
let magic_string = "SXSI_INDEX"
-let version_string = "1"
+let version_string = "2"
let pos fd =
Unix.lseek fd 0 Unix.SEEK_CUR
let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
let in_c = Unix.in_channel_of_descr fd in
let _ = set_binary_mode_in in_c true in
+ let load_table () =
(let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
(let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
(* The in_channel read a chunk of fd, so we might be after
the start of the XMLTree save file. Reset to the correct
position *)
- ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
- let tree = { doc = tree_load fd;
- ttable = ntable;}
- in close_in in_c;
- tree
+ ntable
+ in
+ let _ = Printf.eprintf "\nLoading tag table : " in
+ let ntable = time (load_table) () in
+ ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
+ let tree = { doc = tree_load fd;
+ ttable = ntable;}
+ in close_in in_c;
+ tree
let dump_node t = nts (inode t)
-
let is_left t n = tree_is_first_child t.doc n
-let is_below_right t n1 n2 = tree_is_ancestor t.doc (tree_parent t.doc n1) n2
-
+let is_below_right t n1 n2 =
+ tree_is_ancestor t.doc (tree_parent t.doc n1) n2
+ && not (tree_is_ancestor t.doc n1 n2)
+
let parent t n = tree_parent t.doc n
let first_child t = (); fun n -> tree_first_child t.doc n
then
begin
let tid = tree_my_text tree.doc t in
- let _ = Printf.eprintf "my_text %i returned %i\n%!" (inode t) (inode tid)
- in
output_string outc (text_get_cached_text tree.doc tid);
if print_right
then loop (next_sibling tree t);
let attname = String.sub s 3 ((String.length s) -3) in
let fsa = first_child tree a in
let tid = tree_my_text tree.doc fsa in
- let _ = Printf.eprintf "my_text %i returned %i\n%!" (inode fsa) (inode tid)
- in
output_char outc ' ';
output_string outc attname;
output_string outc "=\"";
let tags t tag = Hashtbl.find t.ttable tag
-let binary_parent t n =
+let rec binary_parent t n =
+ let r =
if tree_is_first_child t.doc n
then tree_parent t.doc n
else tree_prev_sibling t.doc n
+ in if tree_tag_id t.doc r = Tag.pcdata then
+ binary_parent t r
+ else r
let doc_ids t n = tree_doc_ids t.doc n