(* Distributed under the terms of the LGPL (see LICENCE) *)
(******************************************************************************)
INCLUDE "debug.ml"
- INCLUDE "utils.ml"
+INCLUDE "log.ml"
+INCLUDE "utils.ml"
external init_lib : unit -> unit = "sxsi_cpp_init"
external open_tag : t -> string -> unit = "caml_xml_tree_builder_open_tag"
external close_tag : t -> string -> unit = "caml_xml_tree_builder_close_tag"
external text : t -> string -> unit = "caml_xml_tree_builder_text"
+ external trim : string -> string = "caml_trim"
let is_whitespace s =
let rec loop len i =
let len = if n <= 0 then 0 else (n - 1) / 8 + 1 in
String.make len '\000'
+type tag_list
+
+external tag_list_alloc : int -> tag_list = "caml_tag_list_alloc"
+external tag_list_set : tag_list -> int -> Tag.t -> unit = "caml_tag_list_set" "noalloc"
+
type t = {
doc : tree;
elements: Ptset.Int.t;
attributes: Ptset.Int.t;
- attribute_array : Tag.t array;
+ attribute_array : tag_list;
children : Ptset.Int.t array;
siblings : Ptset.Int.t array;
descendants: Ptset.Int.t array;
let nil : [`Tree ] Node.t = Node.nil
let root : [`Tree ] Node.t = Node.null
-type tag_list
-
-external tag_list_alloc : int -> tag_list = "caml_tag_list_alloc"
-external tag_list_set : tag_list -> int -> Tag.t -> unit = "caml_tag_list_set" "noalloc"
module HPtset = Hashtbl.Make(Ptset.Int)
external tree_subtree_size : tree -> [`Tree] Node.t -> int = "caml_xml_tree_subtree_size" "noalloc"
let subtree_size t n = tree_subtree_size t.doc n
+let rec iter_array_tag i a len tree node acc =
+ if i == len then acc
+ else
+ iter_array_tag (i+1) a len tree node
+ (acc - (tree_subtree_tags tree node a.(i)))
+
+external tree_subtree_elements : tree -> [`Tree] Node.t -> tag_list -> int = "caml_xml_tree_subtree_elements" "noalloc"
+
+let subtree_elements t node =
+ tree_subtree_elements t.doc node t.attribute_array
+(*
let subtree_elements t node =
let size = tree_subtree_size t.doc node - 1 in
- if size == 0 then 0
+ if size <= 0 then 0
else let size = size - (tree_subtree_tags t.doc node Tag.pcdata) in
- if size < 2 then size else
- let acc = ref size in
- for i = 0 to Array.length t.attribute_array - 1 do
- acc := !acc - tree_subtree_tags t.doc node t.attribute_array.(i)
- done;
- !acc
+ if size < 3 then size else
+ let a = t.attribute_array in
+ iter_array_tag 0 a (Array.length a) t.doc node size
+*)
external tree_closing : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_closing" "noalloc"
let closing t n = tree_closing t.doc n
in
{ doc= t;
attributes = attributes;
- attribute_array = Array.of_list (Ptset.Int.elements attributes);
+ attribute_array = tag_list_of_set attributes;
elements = elements;
children = c;
siblings = n;
in
let tree = { doc = xml_tree;
attributes = attributes;
- attribute_array = Array.of_list (Ptset.Int.elements attributes);
+ attribute_array = tag_list_of_set attributes;
elements = elements;
children = c;
siblings = s;
let _pred_cache = Hashtbl.create 17
;;
let mk_pred query s =
+ LOG ( __ "bottom-up" 3 "Calling mk_pred for '%s'\n" s);
let f = query_fun query in
let memo = ref (fun _ _ -> failwith "Undefined") in
memo := begin fun tree node ->
in
let bv = results.bv in
memo := begin fun _ n ->
- bit_vector_unsafe_get bv (Node.to_int n)
+ let r = bit_vector_unsafe_get bv (Node.to_int n) in
+ LOG( __ "bottom-up" 3 "Running predicate on node %i = %b@\n" (Node.to_int n) r);
+ r
end;
- bit_vector_unsafe_get bv (Node.to_int node)
+ let r = bit_vector_unsafe_get bv (Node.to_int node) in
+ LOG( __ "bottom-up" 3 "Running predicate on node %i = %b@\n" (Node.to_int node) r);
+ r
end;
Predicate.make memo
#include "utils_stub.hpp"
+extern "C" {
+#define CAML_NAME_SPACE
+
+#include <caml/mlvalues.h>
+#include <caml/alloc.h>
+#include <caml/memory.h>
+#include <caml/callback.h>
+#include <caml/fail.h>
+#include <caml/custom.h>
+#include <caml/bigarray.h>
+}
+#include <ctype.h>
+
extern "C" value caml_clz(value i)
{
return Val_long( ((sizeof(unsigned long)*8) - __builtin_clzl(Long_val(i))) - 1);
return Val_long( ( 1 << (sizeof(unsigned long)*8 - __builtin_clzl(Long_val(i)) - 1)));
}
+
+static char * rtrim(char *str)
+{
+ char *ptr;
+ int len;
+
+ len = strlen(str);
+ for (ptr = str + len - 1; ptr >= str && isspace((int)*ptr ); --ptr);
+ ptr[1] = '\0';
+
+ return str;
+}
+
+static char * ltrim(char *str)
+{
+ char *ptr;
+ int len;
+
+ for (ptr = str; *ptr && isspace((int)*ptr); ++ptr);
+
+ len = strlen(ptr);
+ memmove(str, ptr, len + 1);
+
+ return str;
+}
+extern "C" value caml_trim(value s)
+{
+ CAMLparam1(s);
+ CAMLlocal1(res);
+ char * ptr;
+ char * str = String_val(s);
+ ptr = rtrim(str);
+ str = ltrim(ptr);
+ res = caml_copy_string(str);
+ CAMLreturn(res);
+}
+
xml_tree::tag_t*& TAGLIST(value x)
{
return Obj_val<xml_tree::tag_t*>(x);
}
+
+
static void finalize_tag_list(value x)
{
xml_tree::tag_t * t = TAGLIST(x);
TAG(tag))));
}
-NoAlloc extern "C" value caml_xml_tree_subtree_elements(value tree, value node)
+NoAlloc extern "C" value caml_xml_tree_subtree_elements(value tree, value node,
+ value atts)
{
- return (Val_int(XMLTREE(tree)->subtree_elements(TREENODE(node))));
+ return (Val_int(XMLTREE(tree)->subtree_elements(TREENODE(node),
+ TAGLIST(atts))));
}
NoAlloc extern "C" value caml_xml_tree_is_leaf(value tree, value node){
/** Full reporting into a bit vector
*/
-static std::vector<int32_t> sort_results(std::vector<int32_t> v)
+static void pr_vector(std::vector<int32_t> v)
+{
+ for(auto i = v.begin(); i!= v.end(); ++i)
+ fprintf(stderr, "%i ", *i);
+ fprintf(stderr, "\n");
+}
+static std::vector<int32_t> sort_results(std::vector<int32_t> v, xml_tree *t)
{
std::vector<int32_t> res;
std::sort(v.begin(), v.end());
if (i == v.end()) return res;
};
prev = *i;
- res.push_back(prev);
+ res.push_back(t->parent_node(prev));
};
return res;
}
#define BV_QUERY(pref, Pref) \
extern "C" value caml_text_collection_## pref ##_bv(value tree, value str, value dobvv){ \
- CAMLparam3(tree, str, dobvv); \
+ CAMLparam3(tree, str, dobvv); \
CAMLlocal3(res, res_bv, res_array); \
int j; \
uchar * cstr = (uchar *) strdup(String_val(str)); \
std::vector<int32_t> uresults = XMLTREE(tree)->Pref(cstr); \
- std::vector<int32_t> results = sort_results(uresults); \
+ std::vector<int32_t> results = sort_results(uresults, XMLTREE(tree)); \
bool dobv = Bool_val(dobvv); \
res_bv = caml_alloc_string(dobv ? ((XMLTREE(tree)->size() / 4) + 2) : 0); \
unsigned long slen = caml_string_length(res_bv); \
memset(&(Byte(res_bv,0)), 0, slen); \
res_array = caml_alloc_shr(results.size(), 0); \
for (unsigned int i = 0; i < results.size(); ++i) { \
- j = XMLTREE(tree)->parent_node(results[i]); \
+ j = results[i]; \
if (dobv) { \
Byte(res_bv, j >> 3) |= (1 << (j & 7)); \
}; \
caml_initialize(&Field(res_array, i), Val_int(j)); \
}; \
+ fprintf(stderr, "Raw results: %i, Sorted reulsts %i\n", uresults.size(), results.size()); \
free(cstr); \
res = caml_alloc(2, 0); \
Store_field(res, 0, res_bv); \