CAMLparam1(unit);
CAMLreturn (NULLT);
}
+
+extern "C" CAMLprim value caml_xml_tree_save(value tree,value filename){
+ CAMLparam2(tree,filename);
+ XMLTREE(tree)->Save((unsigned char *) String_val(filename));
+ CAMLreturn (Val_unit);
+}
+
+extern "C" CAMLprim value caml_xml_tree_load(value filename,value samplerate){
+ CAMLparam2(filename,samplerate);
+ CAMLlocal1(doc);
+ XMLTree * tree;
+ tree = XMLTree::Load((unsigned char *) String_val(filename),Int_val(samplerate));
+ caml_init_ops();
+ doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
+ memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
+ CAMLreturn(doc);
+}
let total_time () = List.fold_left (+.) 0. !l;;
-let main filename query output =
- (* Just a trick to allow the C++ code to print debugging stuff first *)
- let v = time (fun () -> let v = Tree.Binary.parse_xml_uri filename;
- in Printf.eprintf "Parsing document : %!";v
- ) ()
- in
+let main v query output =
+ (*
+ (* Just a trick to allow the C++ code to print debugging stuff first *)
+ let v = time (fun () -> let v = Tree.Binary.parse_xml_uri filename;
+ in Printf.eprintf "Parsing document : %!";v
+ ) ()
+ in
+ *)
let _ = Tag.init (Tree.Binary.tag_pool v) in
Printf.eprintf "Parsing query : ";
let query = try
Options.parse_cmdline();;
-main !Options.input_file !Options.query !Options.output_file;;
+let v =
+ if (Filename.check_suffix !Options.input_file ".srx")
+ then
+ begin
+ Printf.eprintf "Loading from file : ";
+ time (Tree.Binary.load ~sample:!Options.sample_factor )
+ (Filename.chop_suffix !Options.input_file ".srx");
+ end
+ else
+ let v =
+ time (fun () -> let v = Tree.Binary.parse_xml_uri !Options.input_file;
+ in Printf.eprintf "Parsing document : %!";v
+ ) ()
+ in
+ if !Options.save_file <> ""
+ then begin
+ Printf.eprintf "Writing file to disk : ";
+ time (Tree.Binary.save v) !Options.save_file;
+ end;
+ v
+in
+ main v !Options.query !Options.output_file;;
IFDEF DEBUG
THEN
let query = ref ""
let input_file = ref ""
let output_file = ref None
-
+let save_file = ref ""
let usage_msg = Printf.sprintf "%s <input.xml> 'query' [output]" Sys.argv.(0)
-let anon_fun = let pos = ref 0 in
+
+let pos = ref 0
+let anon_fun =
fun s -> match !pos with
| 0 -> input_file:= s;incr pos
| 1 -> query := s; incr pos
let spec = [ "-f", Arg.Set_int(sample_factor),"sample factor [default=64]";
"-i", Arg.Set(index_empty_texts),"index empty texts [default=false]";
- "-d", Arg.Set(disable_text_collection),"Disable text collection[default=false]"; ]
+ "-d", Arg.Set(disable_text_collection),"Disable text collection[default=false]";
+ "-s", Arg.Set_string(save_file),"Save the intermediate representation into file.srx";
+ ]
+
+let parse_cmdline() =
+ let _ = Arg.parse spec anon_fun usage_msg
+ in
+ if (!pos > 3 || !pos < 2)
+ then begin Arg.usage spec usage_msg; exit 1 end
-let parse_cmdline() = Arg.parse spec anon_fun usage_msg
+
val query : string ref
val input_file : string ref
val output_file : string option ref
-
+val save_file : string ref
type t
val parse_xml_uri : string -> t
val parse_xml_string : string -> t
+ val save : t -> string -> unit
+ val load : ?sample:int -> string -> t
val tag_pool : t -> Tag.pool
val string : t -> string
val descr : t -> descr
external int_of_node : 'a node -> int = "%identity"
- external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri"
-
-
+ external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri"
external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string"
+ external save_tree : t -> string -> unit = "caml_xml_tree_save"
+ external load_tree : string -> int -> t = "caml_xml_tree_load"
+
module Text =
struct
!Options.disable_text_collection),__LOCATION__))
+ let save t str = save_tree t.doc str
+
+ let load ?(sample=64) str = node_of_t (load_tree str sample)
+
+
external pool : doc -> Tag.pool = "%identity"
let tag_pool t = pool t.doc
type t
val parse_xml_uri : string -> t
val parse_xml_string : string -> t
+ val save : t -> string -> unit
+ val load : ?sample:int -> string -> t
val tag_pool : t -> Tag.pool
val string : t -> string
val descr : t -> descr