X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;ds=sidebyside;f=src%2Ftree.ml;fp=src%2Ftree.ml;h=ae256ddfc6baf24c4d381b247e012175fb77a47f;hb=e4e34e687798285e65df74561915f7721b066631;hp=2b8c1aad5d777292f03b5c124c6ff5f3d0ef35de;hpb=431946f30b0733498ffa731cfa97692b2c942208;p=SXSI%2Fxpathcomp.git diff --git a/src/tree.ml b/src/tree.ml index 2b8c1aa..ae256dd 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -5,7 +5,8 @@ (* Distributed under the terms of the LGPL (see LICENCE) *) (******************************************************************************) INCLUDE "debug.ml" - INCLUDE "utils.ml" +INCLUDE "log.ml" +INCLUDE "utils.ml" external init_lib : unit -> unit = "sxsi_cpp_init" @@ -56,6 +57,7 @@ struct external open_tag : t -> string -> unit = "caml_xml_tree_builder_open_tag" external close_tag : t -> string -> unit = "caml_xml_tree_builder_close_tag" external text : t -> string -> unit = "caml_xml_tree_builder_text" + external trim : string -> string = "caml_trim" let is_whitespace s = let rec loop len i = @@ -184,11 +186,16 @@ let bit_vector_create n = let len = if n <= 0 then 0 else (n - 1) / 8 + 1 in String.make len '\000' +type tag_list + +external tag_list_alloc : int -> tag_list = "caml_tag_list_alloc" +external tag_list_set : tag_list -> int -> Tag.t -> unit = "caml_tag_list_set" "noalloc" + type t = { doc : tree; elements: Ptset.Int.t; attributes: Ptset.Int.t; - attribute_array : Tag.t array; + attribute_array : tag_list; children : Ptset.Int.t array; siblings : Ptset.Int.t array; descendants: Ptset.Int.t array; @@ -214,10 +221,6 @@ external nullt : unit -> 'a Node.t = "caml_xml_tree_nullt" let nil : [`Tree ] Node.t = Node.nil let root : [`Tree ] Node.t = Node.null -type tag_list - -external tag_list_alloc : int -> tag_list = "caml_tag_list_alloc" -external tag_list_set : tag_list -> int -> Tag.t -> unit = "caml_tag_list_set" "noalloc" module HPtset = Hashtbl.Make(Ptset.Int) @@ -339,16 +342,25 @@ let subtree_tags t n tag = tree_subtree_tags t.doc n tag external tree_subtree_size : tree -> [`Tree] Node.t -> int = "caml_xml_tree_subtree_size" "noalloc" let subtree_size t n = tree_subtree_size t.doc n +let rec iter_array_tag i a len tree node acc = + if i == len then acc + else + iter_array_tag (i+1) a len tree node + (acc - (tree_subtree_tags tree node a.(i))) + +external tree_subtree_elements : tree -> [`Tree] Node.t -> tag_list -> int = "caml_xml_tree_subtree_elements" "noalloc" + +let subtree_elements t node = + tree_subtree_elements t.doc node t.attribute_array +(* let subtree_elements t node = let size = tree_subtree_size t.doc node - 1 in - if size == 0 then 0 + if size <= 0 then 0 else let size = size - (tree_subtree_tags t.doc node Tag.pcdata) in - if size < 2 then size else - let acc = ref size in - for i = 0 to Array.length t.attribute_array - 1 do - acc := !acc - tree_subtree_tags t.doc node t.attribute_array.(i) - done; - !acc + if size < 3 then size else + let a = t.attribute_array in + iter_array_tag 0 a (Array.length a) t.doc node size +*) external tree_closing : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_closing" "noalloc" let closing t n = tree_closing t.doc n @@ -557,7 +569,7 @@ let node_of_t t = in { doc= t; attributes = attributes; - attribute_array = Array.of_list (Ptset.Int.elements attributes); + attribute_array = tag_list_of_set attributes; elements = elements; children = c; siblings = n; @@ -652,7 +664,7 @@ let load ?(sample=64) ?(load_text=true) str = in let tree = { doc = xml_tree; attributes = attributes; - attribute_array = Array.of_list (Ptset.Int.elements attributes); + attribute_array = tag_list_of_set attributes; elements = elements; children = c; siblings = s; @@ -721,6 +733,7 @@ let query_fun = function let _pred_cache = Hashtbl.create 17 ;; let mk_pred query s = + LOG ( __ "bottom-up" 3 "Calling mk_pred for '%s'\n" s); let f = query_fun query in let memo = ref (fun _ _ -> failwith "Undefined") in memo := begin fun tree node -> @@ -733,9 +746,13 @@ let mk_pred query s = in let bv = results.bv in memo := begin fun _ n -> - bit_vector_unsafe_get bv (Node.to_int n) + let r = bit_vector_unsafe_get bv (Node.to_int n) in + LOG( __ "bottom-up" 3 "Running predicate on node %i = %b@\n" (Node.to_int n) r); + r end; - bit_vector_unsafe_get bv (Node.to_int node) + let r = bit_vector_unsafe_get bv (Node.to_int node) in + LOG( __ "bottom-up" 3 "Running predicate on node %i = %b@\n" (Node.to_int node) r); + r end; Predicate.make memo