From c10ce35cd399aff15a49f3b24a31b38cb2191da0 Mon Sep 17 00:00:00 2001 From: kim Date: Tue, 27 Jan 2009 22:33:46 +0000 Subject: [PATCH] Fix stupid bug with Tag indices git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@80 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 10 ++++- main.ml | 2 +- tag.ml | 98 ++++++++++--------------------------------------- tag.mli | 5 ++- tree.ml | 19 ++++++---- tree.mli | 1 + 6 files changed, 46 insertions(+), 89 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index c906f0f..f6764fa 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -216,6 +216,15 @@ extern "C" CAMLprim value caml_xml_tree_tag(value tree, value id){ CAMLreturn (caml_copy_string(tag)); } +extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){ + CAMLparam2(tree,tagid); + const char* tag; + tag = (const char*) XMLTREE(tree)->GetTagName((TagType) (Int_val(tagid))); + + CAMLreturn (caml_copy_string(tag)); +} + + extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){ CAMLparam2(tree,id); CAMLreturn (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); @@ -227,7 +236,6 @@ extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ unsigned char* tag; tag = (unsigned char*) (String_val(str)); id = Val_int(XMLTREE(tree)->RegisterTag(tag)); - free(tag); CAMLreturn (id); } diff --git a/main.ml b/main.ml index 48f81bd..ba93328 100644 --- a/main.ml +++ b/main.ml @@ -23,12 +23,12 @@ let total_time () = List.fold_left (+.) 0. !l;; let main filename query output = - (* Just a trick to allow the C++ code to print debugging stuff first *) let v = time (fun () -> let v = Tree.Binary.parse_xml_uri filename; in Printf.eprintf "Parsing document : %!";v ) () in + let _ = Tag.init (Tree.Binary.tag_pool v) in MM(v,__LOCATION__); Printf.eprintf "Parsing query : "; let query = try diff --git a/tag.ml b/tag.ml index 8c77691..e5c3820 100644 --- a/tag.ml +++ b/tag.ml @@ -5,93 +5,35 @@ (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) (* maybe utf8 string... *) -module T = -struct - type t = string - - (* special tag which denotes attribute nodes, should not be a valid - xml tag name *) - let attribute = "<@>" - (* Likewise for strings *) - let pcdata = "<$>" - - let compare : t -> t -> int = String.compare - - let hash (s:t) = - let rec loop acc = function - | -1 -> acc - | n -> loop (( acc lsl 6 ) + (acc lsl 16) - acc + (Char.code s.[n])) (n-1) - - in - loop 0 ((String.length s)-1) - - let equal x y = compare x y == 0 -end -module HMap = Map.Make (struct type t = int let compare x y = x - y end) -module HTag = -struct type t = int -let attribute = T.hash T.attribute -let pcdata = T.hash T.pcdata +type pool -let pool = ref HMap.empty +external null_pool : unit -> pool = "caml_xml_tree_nullt" +external null_tag : unit -> t = "caml_xml_tree_nullt" +external register_tag : pool -> string -> t = "caml_xml_tree_register_tag" +external tag_name : pool -> t -> string = "caml_xml_tree_tag_name" +let nullt = null_tag () +let pcdata = max_int +let attribute = max_int - 1 -let add_pool s = - let hash = T.hash s in - pool := HMap.add hash s !pool - -let clear_pool () = - pool := HMap.empty; - add_pool ""; - add_pool T.attribute; - add_pool T.pcdata - -let _ = clear_pool () +let pool = ref (null_pool ()) -let init l = - clear_pool () +let init p = pool := p -let tag s = - let hash = T.hash s in - try - let s' = HMap.find hash !pool - in - if s <> s' then - failwith (Printf.sprintf "hash conflict s1=%s, s2=%s, %i" s s' hash) - else hash - with - Not_found -> - add_pool s; - hash +let tag s = match s with + | "<$>" -> pcdata + | "<@>" -> attribute + | _ -> register_tag !pool s -let compare x y = x - y +let compare = (-) let equal = (==) -let hash t = T.hash t -let print fmt t = - Format.fprintf fmt "%s" ( - try - HMap.find t !pool - with - Not_found -> failwith (Printf.sprintf "%i not found!" t)) -let to_string x = HMap.find x !pool +let to_string t = + if t = pcdata then "<$>" + else if t = attribute then "<@>" + else tag_name !pool t -end -module STag = -struct - type t = string - let attribute = T.attribute - let pcdata = T.pcdata - external tag : string -> t = "%identity" - external clear_pool : unit -> unit = "%identity" - let init _ = () - let compare = String.compare - let equal = (=) - let print fmt s = Format.fprintf fmt "%s" s - external to_string : t -> string = "%identity" -end - -include STag +let print ppf t = Format.fprintf ppf "%s" (to_string t) diff --git a/tag.mli b/tag.mli index 41b5485..c7a15cb 100644 --- a/tag.mli +++ b/tag.mli @@ -5,11 +5,12 @@ (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) type t +type pool + val attribute : t val pcdata : t val tag : string -> t -val init : string array -> unit -val clear_pool : unit -> unit +val init : pool -> unit val compare : t -> t -> int val equal : t -> t -> bool val print : Format.formatter -> t -> unit diff --git a/tree.ml b/tree.ml index 8ebcdcc..1b4ce2e 100644 --- a/tree.ml +++ b/tree.ml @@ -12,6 +12,7 @@ sig type t val parse_xml_uri : string -> t val parse_xml_string : string -> t + val tag_pool : t -> Tag.pool val string : t -> string val descr : t -> descr val left : t -> t @@ -95,8 +96,8 @@ struct external is_leaf : t -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" - external tag : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" - external tag_id : t -> [`Tree ] node -> unit = "caml_xml_tree_tag_id" +(* external tag : t -> [`Tree ] node -> T = "caml_xml_tree_tag"*) + external tag_id : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" let is_last t n = equal nil (next_sibling t n) @@ -118,7 +119,7 @@ struct begin Printf.eprintf "Node %i has tag '%s' DocOrder=%i, DocID of PrevText,MyText,NextText : (%i = %s,%i = %s,%i = %s)\n%!" (int_of_node id) - (Tag.to_string (tag t id)) + (Tag.to_string (tag_id t id)) (node_xml_id t id) (int_of_node (prev_text t id)) (Text.get_text t (prev_text t id)) @@ -180,6 +181,10 @@ struct let parse_xml_uri str = node_of_t (parse_xml_uri str) let parse_xml_string str = node_of_t (parse_xml_string str) + + external pool : doc -> Tag.pool = "%identity" + let tag_pool t = pool t.doc + let compare a b = match a.node,b.node with | Node(NC i),Node(NC j) -> compare i j | _, Node(NC( _ )) -> 1 @@ -265,14 +270,14 @@ struct let tag = function { node=Node(SC _) } -> Tag.pcdata - | { doc=d; node=Node(NC n)} -> tag d n - | _ -> failwith "Tag" + | { doc=d; node=Node(NC n)} -> tag_id d n + | _ -> failwith "tag" - let tag_id = +(* let tag_id = function { node=Node(SC _) } -> () | { doc=d; node=Node(NC n)} -> tag_id d n | _ -> () - +*) let string_below t id = let pid = parent_doc t.doc id in match t.node with diff --git a/tree.mli b/tree.mli index fd7bad7..6fa19b0 100644 --- a/tree.mli +++ b/tree.mli @@ -12,6 +12,7 @@ sig type t val parse_xml_uri : string -> t val parse_xml_string : string -> t + val tag_pool : t -> Tag.pool val string : t -> string val descr : t -> descr val left : t -> t -- 2.17.1