From 6df6ad6cf27e57872bd5891b49354acb0a5ce6a4 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Thu, 16 Feb 2012 14:08:36 +0100 Subject: [PATCH] Remove hard-coded acess to SXSI for tag operations. * Now a data structure can register operations for tags via the Tag module. --- src/OCamlDriver.cpp | 17 ++++++++- src/grammar.ml | 19 +++++++++- src/main.ml | 3 +- src/tag.ml | 89 ++++++++++++++++++++++----------------------- src/tag.mli | 26 +++++++------ src/tree.ml | 36 +++++++++++++++--- src/tree.mli | 4 +- 7 files changed, 125 insertions(+), 69 deletions(-) diff --git a/src/OCamlDriver.cpp b/src/OCamlDriver.cpp index 7a12c7f..be46709 100644 --- a/src/OCamlDriver.cpp +++ b/src/OCamlDriver.cpp @@ -940,11 +940,11 @@ extern "C" value caml_grammar_is_nil(value grammar, value rule) CAMLreturn(Val_bool(GRAMMAR(grammar)->isNil(Long_val(rule)))); } -extern "C" value caml_grammar_get_tag(value grammar, value symbol) +extern "C" value caml_grammar_get_tag(value grammar, value tag) { CAMLparam1(grammar); CAMLlocal1(res); - const char * s = (GRAMMAR(grammar)->getTagName(Long_val(symbol) >> 2)).c_str(); + const char * s = (GRAMMAR(grammar)->getTagName(Long_val(tag))).c_str(); res = caml_copy_string(s); CAMLreturn(res); } @@ -966,3 +966,16 @@ extern "C" value caml_grammar_get_param_pos(value grammar, value rule) CAMLparam1(grammar); CAMLreturn(Val_int(GRAMMAR(grammar)->getParamPos(Long_val(rule)))); } + +extern "C" value caml_grammar_translate_tag(value grammar, value tag) +{ + CAMLparam1(grammar); + CAMLreturn(Val_int(GRAMMAR(grammar)->translateTag(Int_val(tag)))); +} + +extern "C" value caml_grammar_register_tag(value grammar, value str) +{ + CAMLparam2(grammar, str); + char * s = String_val(str); + CAMLreturn(Val_int(GRAMMAR(grammar)->getTagID(s))); +} diff --git a/src/grammar.ml b/src/grammar.ml index 03e9544..cf167fc 100644 --- a/src/grammar.ml +++ b/src/grammar.ml @@ -24,7 +24,17 @@ type partial = Node of tn_symbol * partial array | Leaf of node external is_nil : t -> t_symbol -> bool = "caml_grammar_is_nil" -external get_tag : t -> t_symbol -> string = "caml_grammar_get_tag" +external translate_tag : t -> Tag.t -> Tag.t = "caml_grammar_translate_tag" +external to_string : t -> Tag.t -> string = "caml_grammar_get_tag" +external register_tag : t -> string -> Tag.t = "caml_grammar_register_tag" + + + +let tag_operations t = { + Tag.tag = (fun s -> register_tag t s); + Tag.to_string = (fun s -> to_string t s); + Tag.translate = (fun s -> translate_tag t s); +} external get_symbol_at : t -> symbol -> node -> symbol = "caml_grammar_get_symbol_at" external first_child : t -> symbol -> node -> node = "caml_grammar_first_child" @@ -44,6 +54,12 @@ let is_parameter (n : [< any_type ] Node.t) = n land 3 == 2 +let symbol_tag (n : t_symbol) = (Node.to_int n) lsr 2 +;; +let get_tag g (n : t_symbol) = to_string g (symbol_tag n) + + + external parameter : [< any_type ] Node.t -> p_symbol = "%identity" external terminal : [< any_type ] Node.t -> t_symbol = "%identity" external non_terminal : [< any_type ] Node.t -> n_symbol = "%identity" @@ -158,6 +174,7 @@ let load filename bp = in Unix.close fd; traversal g; + Tag.init (tag_operations g); g diff --git a/src/main.ml b/src/main.ml index cd3d275..cb7585f 100644 --- a/src/main.ml +++ b/src/main.ml @@ -33,7 +33,7 @@ let mk_runtime run auto doc arg count print outfile = let main v query_string output = - Tag.init (Tree.tag_pool v); + Tag.init (Tree.tag_operations v); let query = time ~msg:"Parsing query" XPath.parse query_string in @@ -95,6 +95,7 @@ let document = let g = time ~msg:"Loading grammar" (Grammar.load !Options.input_file) true in begin ignore(g); + Unix.sleep 10; (* Leave monitoring process the time to read the HWM *) exit 0 end else if Filename.check_suffix !Options.input_file ".srx" diff --git a/src/tag.ml b/src/tag.ml index ce1979c..2cc1663 100644 --- a/src/tag.ml +++ b/src/tag.ml @@ -8,26 +8,44 @@ type t = int -type pool type operations = { - tag : pool -> string -> t; - to_string : pool -> t ->string; - nullt : pool -> t; - translate : pool -> t -> t + tag : string -> t; + to_string : t ->string; + translate : t -> t } +type intern = { + mutable tag_ : string -> t; + mutable to_string_ : t ->string; + mutable translate_ : t -> t +} + + + + + let dummy_fun =function _ -> failwith "Tag.ml Uninitialized tag structure" +let ops = { + tag_ = dummy_fun; + to_string_ = dummy_fun; + translate_ = dummy_fun +} + +let init p = + ops.tag_ <- p.tag; + ops.to_string_ <- p.to_string; + ops.translate_ <- p.translate + +let tag s = ops.tag_ s +let to_string t = ops.to_string_ t +let translate s = ops.translate_ s -external null_pool : unit -> pool = "caml_xml_tree_nullt" -external null_tag : unit -> t = "caml_xml_tree_nullt" -external register_tag : pool -> string -> t = "caml_xml_tree_register_tag" -external tag_name : pool -> t -> string = "caml_xml_tree_get_tag_name" -external num_tags : pool -> int = "caml_xml_tree_num_tags" -let nullt = null_tag () + +let nullt = ~-1 let dummy = nullt (* Defined in XMLTree.cpp *) let document_node = 0 @@ -40,44 +58,11 @@ let pcdata_close = 6 let attribute_data_close= 7 -let pool = Weak.create 1 - -let init p = Weak.set pool 0 (Some p) - -let get_pool () = match Weak.get pool 0 with - | Some x -> x - | None -> failwith "Tag.ml: Uninitialized Document" - -let tag s = match s with - | "<$>" -> pcdata - | "<@>" -> attribute - | "" -> document_node - | "<@$>" -> attribute_data - | _ -> register_tag (get_pool()) s - let compare = (-) let equal = (==) let hash x = x - -let to_string t = - if t == pcdata then "<$>" - else if t == attribute_data then "<@$>" - else if t == attribute then "<@>" - else if t == nullt then "" - else tag_name (get_pool()) t - - -let xml_operations = { - tag = (fun _ x -> tag x); - to_string = (fun _ x -> to_string x); - nullt = (fun _ -> nullt); - translate = (fun _ x -> x); -} - - - - +(* let dump_tags () = Format.eprintf "Tags are:\n"; let doc = get_pool() in @@ -85,7 +70,7 @@ let dump_tags () = for i = 0 to ntags - 1 do Format.eprintf "%i, -><%s/>\n%!" i (to_string i) done - +*) let print ppf t = Format.fprintf ppf "%s" (to_string t) @@ -96,3 +81,15 @@ let check t = let dump = print + + + + + + + + + + +(* To move *) + diff --git a/src/tag.mli b/src/tag.mli index 7a11be0..04254e0 100644 --- a/src/tag.mli +++ b/src/tag.mli @@ -1,13 +1,11 @@ type t = int -type pool + type operations = { - tag : pool -> string -> t; - to_string : pool -> t -> string; - nullt : pool -> t; - translate : pool -> t -> t + tag : string -> t; + to_string : t -> string; + translate : t -> t } -val tag : string -> t val document_node : t val attribute : t @@ -17,14 +15,14 @@ val document_node_close : t val attribute_close : t val pcdata_close : t val attribute_data_close : t +val nullt : t +val dummy : t -val init : pool -> (* operations -> *) unit -val to_string : t -> string val compare : t -> t -> int val equal : t -> t -> bool -val nullt : t -val dummy : t + + val dump : Format.formatter -> t -> unit val check : t -> unit (* Check internal invariants *) @@ -32,4 +30,10 @@ val check : t -> unit (* Check internal invariants *) val hash : t -> int val print : Format.formatter -> t -> unit -val dump_tags : unit -> unit +val init : operations -> unit + +val tag : string -> t +val to_string : t -> string +val translate : t -> t + + diff --git a/src/tree.ml b/src/tree.ml index 6bda292..e1374a7 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -21,6 +21,34 @@ type node = [ `Tree ] Node.t type tree +external register_tag : tree -> string -> Tag.t = "caml_xml_tree_register_tag" + +external tag_name : tree -> Tag.t -> string = "caml_xml_tree_get_tag_name" + +let tag t = (); fun s -> + match s with + | "<$>" -> Tag.pcdata + | "<@>" -> Tag.attribute + | "" -> Tag.document_node + | "<@$>" -> Tag.attribute_data + | _ -> register_tag t s + +let to_string d = (); + fun t -> + if t == Tag.pcdata then "<$>" + else if t == Tag.attribute_data then "<@$>" + else if t == Tag.attribute then "<@>" + else if t == Tag.nullt then "" + else tag_name d t + +let translate x = x + +let mk_tag_ops t = { + Tag.tag = tag t; + Tag.to_string = to_string t; + Tag.translate = translate +} + module TreeBuilder = struct type t @@ -133,7 +161,7 @@ type t = { } - +let tag_operations t = mk_tag_ops t.doc (* external parse_xml_uri : string -> int -> bool -> bool -> int -> tree = "caml_call_shredder_uri" external parse_xml_string : string -> int -> bool -> bool -> int -> tree = "caml_call_shredder_string" @@ -493,7 +521,7 @@ let is_node t = t != nil let is_root t = t == root let node_of_t t = - let _ = Tag.init (Obj.magic t) in + let _ = Tag.init (mk_tag_ops t) in let f, n, c, d = time collect_labels t ~msg:"Building tag relationship table" in let c = Array.map TagS.to_ptset c in let n = Array.map TagS.to_ptset n in @@ -522,8 +550,6 @@ let parse_xml_string str = node_of_t (TreeBuilder.parse_string str) let size t = tree_size t.doc;; -external pool : tree -> Tag.pool = "%identity" - let magic_string = "SXSI_INDEX" let version_string = "3" @@ -615,8 +641,6 @@ let load ?(sample=64) ?(load_text=true) str = -let tag_pool t = pool t.doc - let equal a b = a == b let nts = function diff --git a/src/tree.mli b/src/tree.mli index db15525..d329b5a 100644 --- a/src/tree.mli +++ b/src/tree.mli @@ -7,7 +7,6 @@ val parse_xml_uri : string -> t val parse_xml_string : string -> t val save : t -> string -> unit val load : ?sample:int -> ?load_text:bool -> string -> t -val tag_pool : t -> Tag.pool val nil : node @@ -68,7 +67,8 @@ val closing : t -> node -> node val stats : t -> unit val num_tags : t -> int -val tag_pool : t -> Tag.pool + +val tag_operations : t -> Tag.operations val print_xml : t -> node -> Unix.file_descr -> unit val flush : t -> Unix.file_descr -> unit -- 2.17.1