--- /dev/null
+#include <cassert>
+#include <ctime>
+#include <iostream>
+#include <vector>
+
+#include <caml/alloc.h>
+#include <caml/callback.h>
+#include <caml/memory.h>
+
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include "tatoo.h"
+
+namespace ml {
+value pack(void *ptr)
+{
+ uintptr_t p = reinterpret_cast<uintptr_t>(ptr);
+ static_assert(sizeof(long) >= sizeof(uintptr_t), "We use long to store pointers");
+ assert(!(p&1));
+ return Val_long(p>>1);
+}
+
+void* unpack(value val) { return reinterpret_cast<void *>(Long_val(val) << 1); }
+
+template<typename T>
+T* unpack(value val) { return reinterpret_cast<T *>(unpack(val)); }
+}
+
+namespace xml {
+xmlNode* node_first_child(xmlNode *node)
+{
+ switch(node->type) {
+ case XML_ELEMENT_NODE:
+ if(node->properties) {
+ return reinterpret_cast<xmlNode *>(node->properties);
+ break;
+ }
+ case XML_ATTRIBUTE_NODE:
+ case XML_DOCUMENT_NODE:
+ case XML_TEXT_NODE:
+ case XML_PI_NODE:
+ case XML_COMMENT_NODE:
+ return node->children;
+ default:
+ assert(false && "Unknown node type");
+ }
+}
+xmlNode* node_next_sibling(xmlNode *node)
+{
+ if(node->next)
+ return node->next;
+ if(node->type == XML_ATTRIBUTE_NODE)
+ return node->parent->children;
+ return NULL;
+}
+} // namespace xml
+
+static value *init_document;
+static value *xpath_compile;
+static value *auto_evaluate;
+
+typedef std::vector<xmlNode *> nodelist;
+
+CAMLprim value node_getFirstChild(value node)
+{
+ CAMLparam1(node);
+ CAMLreturn(ml::pack(xml::node_first_child(ml::unpack<xmlNode>(node))));
+}
+
+CAMLprim value node_getNextSibling(value node)
+{
+ CAMLparam1(node);
+ CAMLreturn(ml::pack(xml::node_next_sibling(ml::unpack<xmlNode>(node))));
+}
+
+CAMLprim value node_getNodeType(value node)
+{
+ CAMLparam1(node);
+ CAMLreturn(Val_int(ml::unpack<xmlNode>(node)->type));
+}
+
+CAMLprim value node_getNodeName(value node)
+{
+ CAMLparam1(node);
+ const xmlChar *name = ml::unpack<xmlNode>(node)->name;
+ static const char *empty = "";
+ CAMLreturn(caml_copy_string(name ? reinterpret_cast<const char *>(name) : empty));
+}
+
+CAMLprim value node_getPreorder(value node)
+{
+ CAMLparam1(node);
+ CAMLreturn(Val_long(reinterpret_cast<uintptr_t>(ml::unpack<xmlNode>(node)->_private)));
+}
+
+CAMLprim value nodelist_getLength(value list)
+{
+ CAMLparam1(list);
+ CAMLreturn(Val_long(ml::unpack<nodelist>(list)->size()));
+}
+
+CAMLprim value nodelist_item(value list, value idx)
+{
+ CAMLparam2(list, idx);
+ CAMLreturn(ml::pack(ml::unpack<nodelist>(list)->operator[](Long_val(idx))));
+}
+
+CAMLprim value nodelist_new(value unit)
+{
+ CAMLparam1(unit);
+ CAMLreturn(ml::pack(new nodelist)); // XXX: lifetime...
+}
+
+CAMLprim value nodelist_add(value list, value node)
+{
+ CAMLparam2(list, node);
+ ml::unpack<nodelist>(list)->push_back(ml::unpack<xmlNode>(node));
+ CAMLreturn(list);
+}
+
+CAMLprim value getNull(value unit) { CAMLparam1(unit); CAMLreturn(ml::pack(NULL)); }
+
+CAMLprim value dereference_object(value obj)
+{
+ CAMLparam1(obj);
+ //jni::env().DeleteGlobalRef(reinterpret_cast<jobject>(obj));
+ CAMLreturn(Val_unit);
+}
+
+static uintptr_t decorate_(xmlNode *node, uintptr_t preorder)
+{
+ if(not node) return preorder;
+
+ assert(!node->_private && "Non-null private data encountered");
+ node->_private = reinterpret_cast<void *>(preorder++);
+
+ preorder = decorate_(xml::node_first_child(node), preorder);
+ preorder = decorate_(xml::node_next_sibling(node), preorder);
+
+ return preorder;
+}
+
+static value decorate(xmlDoc *doc)
+{
+ uintptr_t size = decorate_(reinterpret_cast<xmlNode *>(doc), 0);
+ return caml_callback2(*init_document, ml::pack(doc), Val_long(size));
+}
+
+template<typename Fn, typename... Args>
+auto time(const char *msg, const Fn &fn, Args&&... args) -> decltype(fn(args...))
+{
+ timespec start;
+ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start);
+ auto ret = fn(args...);
+ timespec end;
+ clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);
+ end.tv_nsec -= start.tv_nsec;
+ end.tv_sec -= start.tv_sec;
+ printf("\n---> %s: %.3f ms\n", msg, end.tv_sec * 1e3 + end.tv_nsec / 1e6);
+ return std::move(ret);
+}
+
+static void process(const char *file, const char *expr)
+{
+ CAMLparam0();
+ CAMLlocal3(docval, aut, res);
+
+ xmlDoc *doc = time("parse", xmlReadFile, file, nullptr, 0);
+
+ docval = time("decorate", decorate, doc);
+
+ aut = time("compile", caml_callback, *xpath_compile, caml_copy_string(expr));
+
+ nodelist nl = { reinterpret_cast<xmlNode *>(doc) };
+ res = time("evaluate", caml_callback3, *auto_evaluate, aut, docval, ml::pack(&nl));
+
+ nl = std::move(*ml::unpack<nodelist>(res));
+ printf("\n---> Number of results: %zd\n", nl.size());
+ /* for(auto node: nl)
+ printf("---> %s\n", node->name);*/
+
+ xmlFreeDoc(doc);
+ CAMLreturn0;
+}
+
+void usage(const char *program)
+{
+ fprintf(stderr,
+ "Usage:\n"
+ " %s <file> <query>\n", program);
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ LIBXML_TEST_VERSION;
+
+ if(argc != 3)
+ usage(argv[0]);
+
+ caml_startup(argv);
+
+ init_document = caml_named_value("init_document"); assert(init_document);
+ xpath_compile = caml_named_value("xpath_compile"); assert(xpath_compile);
+ auto_evaluate = caml_named_value("auto_evaluate"); assert(auto_evaluate);
+
+ process(argv[1], argv[2]);
+
+ xmlCleanupParser();
+ xmlMemoryDump();
+ return 0;
+}
--- /dev/null
+type +'a jvm_pointer
+
+external dereference_object : 'a jvm_pointer -> unit = "dereference_object"
+external get_null : unit -> 'a jvm_pointer = "getNull"
+
+let null = get_null ()
+
+module Java_tree : sig
+ include Tree.S
+ val init_document : node -> int -> t
+end =
+ struct
+
+ type node = [`Node] jvm_pointer
+
+
+ external dereference_node : node -> unit = "dereference_object"
+
+ external node_get_preorder : node -> int = "node_getPreorder"
+
+
+ (*external node_set_preorder : node -> int -> unit = "node_setPreorder"*)
+
+
+ type t = {
+ root : node;
+ size : int;
+ tag_cache : QName.t array
+ }
+
+ let nil = get_null ()
+
+ let dummy = get_null ()
+
+ let size t = t.size
+
+ let load_xml_file _ = assert false
+ let load_xml_string _ = assert false
+ let print_xml _ _ _ = assert false
+ let root t = t.root
+
+
+
+ external node_get_node_type : node -> int = "node_getNodeType"
+
+ let node_kind_of_int i =
+ Tree.NodeKind.(
+ match i with
+ | 1 -> Element
+ | 2 -> Attribute
+ | 3 | 4 -> Text
+ | 7 -> ProcessingInstruction
+ | 8 -> Comment
+ | 9 -> Document
+ | _ -> failwith ("Unimplemented document kind, please report " ^ string_of_int i)
+ )
+
+
+ external node_get_node_name : node -> string = "node_getNodeName"
+
+ let kind _ node =
+ assert (node != null);
+ node_kind_of_int (node_get_node_type node)
+
+ external node_get_first_child : node -> node = "node_getFirstChild"
+
+ let first_child _ node =
+ if node == nil then nil else
+ node_get_first_child node
+
+ external node_get_next_sibling : node -> node = "node_getNextSibling"
+
+
+ let next_sibling tree node =
+
+ if node == nil then nil else
+ node_get_next_sibling node
+
+ let parent _ _ = assert false
+
+ let data _ _ = assert false
+
+ let tag tree node =
+ if node == nil then QName.nil else
+ let pre = node_get_preorder node in
+ let label = tree.tag_cache.(pre) in
+ if label != QName.nil then label else
+ let label = node_get_node_name node in
+ let rlabel =
+ Tree.NodeKind.(
+ match kind tree node with
+ | Document -> QName.document
+ | Text -> QName.text
+ | Attribute -> QName.attribute (QName.of_string label)
+ | ProcessingInstruction ->
+ QName.processing_instruction (QName.of_string label)
+ | _ -> QName.of_string label
+ )
+ in
+ tree.tag_cache.(pre) <- rlabel; rlabel
+
+ let preorder tree node =
+ if node == nil then -1 else
+ node_get_preorder node
+
+ let finalize t =
+ dereference_object (t.root)
+
+ let init_document node i =
+ let s = { size = i;
+ root = node;
+ tag_cache = Array.create i QName.nil
+ }
+ in
+ Gc.finalise (finalize) s;
+ s
+
+
+ let print_node _ _ = assert false
+ let by_preorder _ _ = assert false
+ end
+
+
+module Java_node_list : Node_list.S with type node = Java_tree.node
+ =
+ struct
+ type node = Java_tree.node
+ type node_list = [`NodeList] jvm_pointer
+ type t = node_list
+ external length : node_list -> int = "nodelist_getLength"
+ external create : unit -> node_list = "nodelist_new"
+ external add : node_list -> node -> node_list = "nodelist_add"
+ external item : node_list -> int -> node = "nodelist_item"
+
+ let add n l =
+ add l n
+
+ let iter f l =
+ for i = 0 to length l - 1 do
+ f (item l i)
+ done
+
+ end
+
+module Runtime = Run.Make(Java_tree)(Java_node_list)
+
+let _ = Callback.register "init_document" Java_tree.init_document
+
+let xpath_compile p =
+ let auto =
+ Xpath.Compile.path
+ (Xpath.Parser.parse (Ulexing.from_utf8_string p))
+ in
+ Ata.print Format.err_formatter auto;
+ Format.pp_print_flush Format.err_formatter ();
+ auto
+
+let _ = Callback.register "xpath_compile" xpath_compile
+
+let auto_evaluate auto tree list =
+ Runtime.eval auto tree list
+
+let _ = Callback.register "auto_evaluate" auto_evaluate