-
-
- let print_xml_fast outc t =
- let rec loop ?(print_right=true) t = match t.node with
- | Nil -> ()
- | String (s) -> output_string outc (string t)
- | Node _ when Tag.equal (tag t) Tag.pcdata -> loop (left t); loop (right t)
-
- | Node (_) ->
- let tg = Tag.to_string (tag t) in
- let l = left t
- and r = right t
- in
- output_char outc '<';
- output_string outc tg;
- ( match l.node with
- Nil -> output_string outc "/>"
- | String _ -> assert false
- | Node(_) when Tag.equal (tag l) Tag.attribute ->
- (loop_attributes (left l);
- match (right l).node with
- | Nil -> output_string outc "/>"
- | _ ->
- output_char outc '>';
- loop (right l);
- output_string outc "</";
- output_string outc tg;
- output_char outc '>' )
- | _ ->
- output_char outc '>';
- loop l;
- output_string outc "</";
- output_string outc tg;
- output_char outc '>'
- );if print_right then loop r
- and loop_attributes a =
-
- match a.node with
- | Node(_) ->
- let value =
- match (left a).node with
- | Nil -> ""
- | _ -> string (left(left a))
- in
- output_char outc ' ';
- output_string outc (Tag.to_string (tag a));
- output_string outc "=\"";
- output_string outc value;
- output_char outc '"';
- loop_attributes (right a)
- | _ -> ()
- in
- loop ~print_right:false t
+end
+let is_nil t = t == nil
+
+let is_node t = t != nil
+let is_root t = t == root
+
+let node_of_t t =
+ let _ = Tag.init (Obj.magic t) in
+ let table = collect_tags t
+ in (*
+ let _ = Hashtbl.iter (fun t (c,d,ns,f) ->
+ Printf.eprintf "Tag %s has:\n" (Tag.to_string t);
+ Printf.eprintf "Child tags: ";
+ Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) c;
+ Printf.eprintf "\nDescendant tags: ";
+ Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) d;
+ Printf.eprintf "\nNextSibling tags: ";
+ Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) ns;
+ Printf.eprintf "\nFollowing tags: ";
+ Ptset.Int.iter (fun t -> Printf.eprintf "%s "(Tag.to_string t)) f;
+ Printf.eprintf "\n\n%!";) table
+ in
+
+ *)
+ { doc= t;
+ ttable = table;
+ }
+
+let finalize _ = Printf.eprintf "Release the string list !\n%!"
+;;
+
+let parse f str =
+ node_of_t
+ (f str
+ !Options.sample_factor
+ !Options.index_empty_texts
+ !Options.disable_text_collection)
+
+let parse_xml_uri str = parse parse_xml_uri str
+let parse_xml_string str = parse parse_xml_string str
+
+
+external pool : tree -> Tag.pool = "%identity"
+
+let magic_string = "SXSI_INDEX"
+let version_string = "2"
+
+let pos fd =
+ Unix.lseek fd 0 Unix.SEEK_CUR
+
+let pr_pos fd = Printf.eprintf "At position %i\n%!" (pos fd)
+
+let write fd s =
+ let sl = String.length s in
+ let ssl = Printf.sprintf "%020i" sl in
+ ignore (Unix.write fd ssl 0 20);
+ ignore (Unix.write fd s 0 (String.length s))
+
+let rec really_read fd buffer start length =
+ if length <= 0 then () else
+ match Unix.read fd buffer start length with
+ 0 -> raise End_of_file
+ | r -> really_read fd buffer (start + r) (length - r);;
+
+let read fd =
+ let buffer = String.create 20 in
+ let _ = really_read fd buffer 0 20 in
+ let size = int_of_string buffer in
+ let buffer = String.create size in
+ let _ = really_read fd buffer 0 size in
+ buffer
+
+
+let save t str =
+ let fd = Unix.openfile str [ Unix.O_WRONLY;Unix.O_TRUNC;Unix.O_CREAT] 0o644 in
+ let out_c = Unix.out_channel_of_descr fd in
+ let _ = set_binary_mode_out out_c true in
+ output_string out_c magic_string;
+ output_char out_c '\n';
+ output_string out_c version_string;
+ output_char out_c '\n';
+ Marshal.to_channel out_c t.ttable [ ];
+ (* we need to move the fd to the correct position *)
+ flush out_c;
+ ignore (Unix.lseek fd (pos_out out_c) Unix.SEEK_SET);
+ tree_save t.doc fd;
+ close_out out_c
+;;
+
+let load ?(sample=64) str =
+ let fd = Unix.openfile str [ Unix.O_RDONLY ] 0o644 in
+ let in_c = Unix.in_channel_of_descr fd in
+ let _ = set_binary_mode_in in_c true in
+ let load_table () =
+ (let ms = input_line in_c in if ms <> magic_string then failwith "Invalid index file");
+ (let vs = input_line in_c in if vs <> version_string then failwith "Invalid version file");
+ let table : (Tag.t,(Ptset.Int.t*Ptset.Int.t*Ptset.Int.t*Ptset.Int.t)) Hashtbl.t =
+ Marshal.from_channel in_c
+ in
+ let ntable = Hashtbl.create (Hashtbl.length table) in
+ Hashtbl.iter (fun k (s1,s2,s3,s4) ->
+ let ss1 = Ptset.Int.fold (Ptset.Int.add) s1 Ptset.Int.empty
+ and ss2 = Ptset.Int.fold (Ptset.Int.add) s2 Ptset.Int.empty
+ and ss3 = Ptset.Int.fold (Ptset.Int.add) s3 Ptset.Int.empty
+ and ss4 = Ptset.Int.fold (Ptset.Int.add) s4 Ptset.Int.empty
+ in Hashtbl.add ntable k (ss1,ss2,ss3,ss4)
+ ) table;
+ Hashtbl.clear table;
+ (* The in_channel read a chunk of fd, so we might be after
+ the start of the XMLTree save file. Reset to the correct
+ position *)
+ ntable
+ in
+ let _ = Printf.eprintf "\nLoading tag table : " in
+ let ntable = time (load_table) () in
+ ignore(Unix.lseek fd (pos_in in_c) Unix.SEEK_SET);
+ let tree = { doc = tree_load fd;
+ ttable = ntable;}
+ in close_in in_c;
+ tree
+
+
+
+
+let tag_pool t = pool t.doc
+
+let compare = compare_node
+
+let equal a b = a == b
+
+let nts = function
+ -1 -> "Nil"
+ | i -> Printf.sprintf "Node (%i)" i
+
+let dump_node t = nts (inode t)
+
+let is_left t n = tree_is_first_child t.doc n
+
+let is_below_right t n1 n2 =
+ tree_is_ancestor t.doc (tree_parent t.doc n1) n2
+ && not (tree_is_ancestor t.doc n1 n2)
+
+let parent t n = tree_parent t.doc n
+
+let first_child t = (); fun n -> tree_first_child t.doc n
+let first_element t = (); fun n -> tree_first_element t.doc n
+
+(* these function will be called in two times: first partial application
+ on the tag, then application of the tag and the tree, then application of
+ the other arguments. We use the trick to let the compiler optimize application
+*)
+
+let tagged_child t tag = () ; fun n -> tree_tagged_child t.doc n tag
+
+let select_child t = fun ts ->
+ let v = ptset_to_vector ts in ();
+ fun n -> tree_select_child t.doc n v
+
+let next_sibling t = (); fun n -> tree_next_sibling t.doc n
+let next_element t = (); fun n -> tree_next_element t.doc n
+
+let tagged_sibling t tag = (); fun n -> tree_tagged_sibling t.doc n tag