From: kim Date: Thu, 29 Jan 2009 03:47:19 +0000 (+0000) Subject: Add serialization functions X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=9abf8a6f78264fbf4eec1676b4a26018967c97e6;p=SXSI%2Fxpathcomp.git Add serialization functions git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@88 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 38d660b..723f9d9 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -271,3 +271,20 @@ extern "C" CAMLprim value caml_xml_tree_nullt(value unit){ CAMLparam1(unit); CAMLreturn (NULLT); } + +extern "C" CAMLprim value caml_xml_tree_save(value tree,value filename){ + CAMLparam2(tree,filename); + XMLTREE(tree)->Save((unsigned char *) String_val(filename)); + CAMLreturn (Val_unit); +} + +extern "C" CAMLprim value caml_xml_tree_load(value filename,value samplerate){ + CAMLparam2(filename,samplerate); + CAMLlocal1(doc); + XMLTree * tree; + tree = XMLTree::Load((unsigned char *) String_val(filename),Int_val(samplerate)); + caml_init_ops(); + doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); + memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); + CAMLreturn(doc); +} diff --git a/main.ml b/main.ml index 217b2e2..c53edf2 100644 --- a/main.ml +++ b/main.ml @@ -22,12 +22,14 @@ let time f x = let total_time () = List.fold_left (+.) 0. !l;; -let main filename query output = - (* Just a trick to allow the C++ code to print debugging stuff first *) - let v = time (fun () -> let v = Tree.Binary.parse_xml_uri filename; - in Printf.eprintf "Parsing document : %!";v - ) () - in +let main v query output = + (* + (* Just a trick to allow the C++ code to print debugging stuff first *) + let v = time (fun () -> let v = Tree.Binary.parse_xml_uri filename; + in Printf.eprintf "Parsing document : %!";v + ) () + in + *) let _ = Tag.init (Tree.Binary.tag_pool v) in Printf.eprintf "Parsing query : "; let query = try @@ -63,7 +65,28 @@ let main filename query output = Options.parse_cmdline();; -main !Options.input_file !Options.query !Options.output_file;; +let v = + if (Filename.check_suffix !Options.input_file ".srx") + then + begin + Printf.eprintf "Loading from file : "; + time (Tree.Binary.load ~sample:!Options.sample_factor ) + (Filename.chop_suffix !Options.input_file ".srx"); + end + else + let v = + time (fun () -> let v = Tree.Binary.parse_xml_uri !Options.input_file; + in Printf.eprintf "Parsing document : %!";v + ) () + in + if !Options.save_file <> "" + then begin + Printf.eprintf "Writing file to disk : "; + time (Tree.Binary.save v) !Options.save_file; + end; + v +in + main v !Options.query !Options.output_file;; IFDEF DEBUG THEN diff --git a/options.ml b/options.ml index 6d6dc86..ffa5fa7 100644 --- a/options.ml +++ b/options.ml @@ -5,11 +5,13 @@ let disable_text_collection = ref false let query = ref "" let input_file = ref "" let output_file = ref None - +let save_file = ref "" let usage_msg = Printf.sprintf "%s 'query' [output]" Sys.argv.(0) -let anon_fun = let pos = ref 0 in + +let pos = ref 0 +let anon_fun = fun s -> match !pos with | 0 -> input_file:= s;incr pos | 1 -> query := s; incr pos @@ -18,8 +20,16 @@ let anon_fun = let pos = ref 0 in let spec = [ "-f", Arg.Set_int(sample_factor),"sample factor [default=64]"; "-i", Arg.Set(index_empty_texts),"index empty texts [default=false]"; - "-d", Arg.Set(disable_text_collection),"Disable text collection[default=false]"; ] + "-d", Arg.Set(disable_text_collection),"Disable text collection[default=false]"; + "-s", Arg.Set_string(save_file),"Save the intermediate representation into file.srx"; + ] + +let parse_cmdline() = + let _ = Arg.parse spec anon_fun usage_msg + in + if (!pos > 3 || !pos < 2) + then begin Arg.usage spec usage_msg; exit 1 end -let parse_cmdline() = Arg.parse spec anon_fun usage_msg + diff --git a/options.mli b/options.mli index aabd1fd..bd18d57 100644 --- a/options.mli +++ b/options.mli @@ -5,5 +5,5 @@ val disable_text_collection : bool ref val query : string ref val input_file : string ref val output_file : string option ref - +val save_file : string ref diff --git a/tree.ml b/tree.ml index 3bfbfce..9cab2c7 100644 --- a/tree.ml +++ b/tree.ml @@ -13,6 +13,8 @@ sig type t val parse_xml_uri : string -> t val parse_xml_string : string -> t + val save : t -> string -> unit + val load : ?sample:int -> string -> t val tag_pool : t -> Tag.pool val string : t -> string val descr : t -> descr @@ -45,11 +47,12 @@ struct external int_of_node : 'a node -> int = "%identity" - external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri" - - + external parse_xml_uri : string -> int -> bool -> bool -> t = "caml_call_shredder_uri" external parse_xml_string : string -> int -> bool -> bool -> t = "caml_call_shredder_string" + external save_tree : t -> string -> unit = "caml_xml_tree_save" + external load_tree : string -> int -> t = "caml_xml_tree_load" + module Text = struct @@ -191,6 +194,11 @@ struct !Options.disable_text_collection),__LOCATION__)) + let save t str = save_tree t.doc str + + let load ?(sample=64) str = node_of_t (load_tree str sample) + + external pool : doc -> Tag.pool = "%identity" let tag_pool t = pool t.doc diff --git a/tree.mli b/tree.mli index f3a4de6..ad244e4 100644 --- a/tree.mli +++ b/tree.mli @@ -12,6 +12,8 @@ sig type t val parse_xml_uri : string -> t val parse_xml_string : string -> t + val save : t -> string -> unit + val load : ?sample:int -> string -> t val tag_pool : t -> Tag.pool val string : t -> string val descr : t -> descr