From 85ba2cc3211b4c6ea7da4cbaf9db3bd51765e6f7 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 3 Jan 2014 16:56:15 +0100 Subject: [PATCH] Ocaml <-> C++ binding --- src/bindings/c++/Makefile | 23 ++++ src/bindings/c++/tatoo-engine.cc | 213 +++++++++++++++++++++++++++++++ src/bindings/c++/tatoo.h | 24 ++++ src/bindings/c++/tatoo_driver.ml | 163 +++++++++++++++++++++++ 4 files changed, 423 insertions(+) create mode 100644 src/bindings/c++/Makefile create mode 100644 src/bindings/c++/tatoo-engine.cc create mode 100644 src/bindings/c++/tatoo.h create mode 100644 src/bindings/c++/tatoo_driver.ml diff --git a/src/bindings/c++/Makefile b/src/bindings/c++/Makefile new file mode 100644 index 0000000..b5f2368 --- /dev/null +++ b/src/bindings/c++/Makefile @@ -0,0 +1,23 @@ +CXX = g++ + +all: tatoo-c++ + +tatoo-engine.o: tatoo-engine.cc tatoo.h + $(CXX) -I `ocamlc -where` `pkg-config libxml-2.0 --cflags` -DNDEBUG -fPIC \ + -DPIC -std=c++0x -c -O3 -o $@ $< -Wall -Wextra -Wno-unused-parameter + +tatoo-c++: tatoo-engine.o tatoo-driver.o + $(CXX) -o $@ -O3 tatoo-driver.o tatoo-engine.o -ldl -lrt `pkg-config libxml-2.0 --libs` \ + -L`ocamlc -where` -lasmrun -lunix -lbigarray + +tatoo_driver.depx tatoo_driver.cmx: tatoo_driver.ml + ../../../remake $@ + +tatoo-driver.o: tatoo_driver.cmx tatoo_driver.depx + ocamlfind ocamlopt -output-obj -o $@ -I ../.. -cc $(CXX) -linkall -linkpkg -package bigarray,ulex,unix $(shell cat tatoo_driver.depx | sed -e 's:src:../..:g' | sed -e 's:depx:cmx:g' ) $< + +test: TatooTest.class libtatoo-java.so + java -cp . -Xss80m -Djava.library.path=. TatooTest ../../../tests/xmark_0.50.xml '/descendant::keyword[ancestor::listitem]/*' + +clean: + rm -f *.cm* *.depx *.o TatooTest.class tatoo-c++ diff --git a/src/bindings/c++/tatoo-engine.cc b/src/bindings/c++/tatoo-engine.cc new file mode 100644 index 0000000..1ff2a49 --- /dev/null +++ b/src/bindings/c++/tatoo-engine.cc @@ -0,0 +1,213 @@ +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include "tatoo.h" + +namespace ml { +value pack(void *ptr) +{ + uintptr_t p = reinterpret_cast(ptr); + static_assert(sizeof(long) >= sizeof(uintptr_t), "We use long to store pointers"); + assert(!(p&1)); + return Val_long(p>>1); +} + +void* unpack(value val) { return reinterpret_cast(Long_val(val) << 1); } + +template +T* unpack(value val) { return reinterpret_cast(unpack(val)); } +} + +namespace xml { +xmlNode* node_first_child(xmlNode *node) +{ + switch(node->type) { + case XML_ELEMENT_NODE: + if(node->properties) { + return reinterpret_cast(node->properties); + break; + } + case XML_ATTRIBUTE_NODE: + case XML_DOCUMENT_NODE: + case XML_TEXT_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + return node->children; + default: + assert(false && "Unknown node type"); + } +} +xmlNode* node_next_sibling(xmlNode *node) +{ + if(node->next) + return node->next; + if(node->type == XML_ATTRIBUTE_NODE) + return node->parent->children; + return NULL; +} +} // namespace xml + +static value *init_document; +static value *xpath_compile; +static value *auto_evaluate; + +typedef std::vector nodelist; + +CAMLprim value node_getFirstChild(value node) +{ + CAMLparam1(node); + CAMLreturn(ml::pack(xml::node_first_child(ml::unpack(node)))); +} + +CAMLprim value node_getNextSibling(value node) +{ + CAMLparam1(node); + CAMLreturn(ml::pack(xml::node_next_sibling(ml::unpack(node)))); +} + +CAMLprim value node_getNodeType(value node) +{ + CAMLparam1(node); + CAMLreturn(Val_int(ml::unpack(node)->type)); +} + +CAMLprim value node_getNodeName(value node) +{ + CAMLparam1(node); + const xmlChar *name = ml::unpack(node)->name; + static const char *empty = ""; + CAMLreturn(caml_copy_string(name ? reinterpret_cast(name) : empty)); +} + +CAMLprim value node_getPreorder(value node) +{ + CAMLparam1(node); + CAMLreturn(Val_long(reinterpret_cast(ml::unpack(node)->_private))); +} + +CAMLprim value nodelist_getLength(value list) +{ + CAMLparam1(list); + CAMLreturn(Val_long(ml::unpack(list)->size())); +} + +CAMLprim value nodelist_item(value list, value idx) +{ + CAMLparam2(list, idx); + CAMLreturn(ml::pack(ml::unpack(list)->operator[](Long_val(idx)))); +} + +CAMLprim value nodelist_new(value unit) +{ + CAMLparam1(unit); + CAMLreturn(ml::pack(new nodelist)); // XXX: lifetime... +} + +CAMLprim value nodelist_add(value list, value node) +{ + CAMLparam2(list, node); + ml::unpack(list)->push_back(ml::unpack(node)); + CAMLreturn(list); +} + +CAMLprim value getNull(value unit) { CAMLparam1(unit); CAMLreturn(ml::pack(NULL)); } + +CAMLprim value dereference_object(value obj) +{ + CAMLparam1(obj); + //jni::env().DeleteGlobalRef(reinterpret_cast(obj)); + CAMLreturn(Val_unit); +} + +static uintptr_t decorate_(xmlNode *node, uintptr_t preorder) +{ + if(not node) return preorder; + + assert(!node->_private && "Non-null private data encountered"); + node->_private = reinterpret_cast(preorder++); + + preorder = decorate_(xml::node_first_child(node), preorder); + preorder = decorate_(xml::node_next_sibling(node), preorder); + + return preorder; +} + +static value decorate(xmlDoc *doc) +{ + uintptr_t size = decorate_(reinterpret_cast(doc), 0); + return caml_callback2(*init_document, ml::pack(doc), Val_long(size)); +} + +template +auto time(const char *msg, const Fn &fn, Args&&... args) -> decltype(fn(args...)) +{ + timespec start; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); + auto ret = fn(args...); + timespec end; + clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); + end.tv_nsec -= start.tv_nsec; + end.tv_sec -= start.tv_sec; + printf("\n---> %s: %.3f ms\n", msg, end.tv_sec * 1e3 + end.tv_nsec / 1e6); + return std::move(ret); +} + +static void process(const char *file, const char *expr) +{ + CAMLparam0(); + CAMLlocal3(docval, aut, res); + + xmlDoc *doc = time("parse", xmlReadFile, file, nullptr, 0); + + docval = time("decorate", decorate, doc); + + aut = time("compile", caml_callback, *xpath_compile, caml_copy_string(expr)); + + nodelist nl = { reinterpret_cast(doc) }; + res = time("evaluate", caml_callback3, *auto_evaluate, aut, docval, ml::pack(&nl)); + + nl = std::move(*ml::unpack(res)); + printf("\n---> Number of results: %zd\n", nl.size()); + /* for(auto node: nl) + printf("---> %s\n", node->name);*/ + + xmlFreeDoc(doc); + CAMLreturn0; +} + +void usage(const char *program) +{ + fprintf(stderr, + "Usage:\n" + " %s \n", program); + exit(1); +} + +int main(int argc, char *argv[]) +{ + LIBXML_TEST_VERSION; + + if(argc != 3) + usage(argv[0]); + + caml_startup(argv); + + init_document = caml_named_value("init_document"); assert(init_document); + xpath_compile = caml_named_value("xpath_compile"); assert(xpath_compile); + auto_evaluate = caml_named_value("auto_evaluate"); assert(auto_evaluate); + + process(argv[1], argv[2]); + + xmlCleanupParser(); + xmlMemoryDump(); + return 0; +} diff --git a/src/bindings/c++/tatoo.h b/src/bindings/c++/tatoo.h new file mode 100644 index 0000000..e9492f1 --- /dev/null +++ b/src/bindings/c++/tatoo.h @@ -0,0 +1,24 @@ +#ifndef TATOO_H +#define TATOO_H +#pragma once + +#include + +extern "C" { + +CAMLprim value node_getFirstChild(value node); +CAMLprim value node_getNextSibling(value node); +CAMLprim value node_getNodeType(value node); +CAMLprim value node_getNodeName(value node); +CAMLprim value node_getPreorder(value node); + +CAMLprim value nodelist_getLength(value list); +CAMLprim value nodelist_item(value list, value idx); +CAMLprim value nodelist_new(value unit); +CAMLprim value nodelist_add(value list, value node); + +CAMLprim value getNull(value unit); +CAMLprim value dereference_object(value obj); +} + +#endif diff --git a/src/bindings/c++/tatoo_driver.ml b/src/bindings/c++/tatoo_driver.ml new file mode 100644 index 0000000..3b4c561 --- /dev/null +++ b/src/bindings/c++/tatoo_driver.ml @@ -0,0 +1,163 @@ +type +'a jvm_pointer + +external dereference_object : 'a jvm_pointer -> unit = "dereference_object" +external get_null : unit -> 'a jvm_pointer = "getNull" + +let null = get_null () + +module Java_tree : sig + include Tree.S + val init_document : node -> int -> t +end = + struct + + type node = [`Node] jvm_pointer + + + external dereference_node : node -> unit = "dereference_object" + + external node_get_preorder : node -> int = "node_getPreorder" + + + (*external node_set_preorder : node -> int -> unit = "node_setPreorder"*) + + + type t = { + root : node; + size : int; + tag_cache : QName.t array + } + + let nil = get_null () + + let dummy = get_null () + + let size t = t.size + + let load_xml_file _ = assert false + let load_xml_string _ = assert false + let print_xml _ _ _ = assert false + let root t = t.root + + + + external node_get_node_type : node -> int = "node_getNodeType" + + let node_kind_of_int i = + Tree.NodeKind.( + match i with + | 1 -> Element + | 2 -> Attribute + | 3 | 4 -> Text + | 7 -> ProcessingInstruction + | 8 -> Comment + | 9 -> Document + | _ -> failwith ("Unimplemented document kind, please report " ^ string_of_int i) + ) + + + external node_get_node_name : node -> string = "node_getNodeName" + + let kind _ node = + assert (node != null); + node_kind_of_int (node_get_node_type node) + + external node_get_first_child : node -> node = "node_getFirstChild" + + let first_child _ node = + if node == nil then nil else + node_get_first_child node + + external node_get_next_sibling : node -> node = "node_getNextSibling" + + + let next_sibling tree node = + + if node == nil then nil else + node_get_next_sibling node + + let parent _ _ = assert false + + let data _ _ = assert false + + let tag tree node = + if node == nil then QName.nil else + let pre = node_get_preorder node in + let label = tree.tag_cache.(pre) in + if label != QName.nil then label else + let label = node_get_node_name node in + let rlabel = + Tree.NodeKind.( + match kind tree node with + | Document -> QName.document + | Text -> QName.text + | Attribute -> QName.attribute (QName.of_string label) + | ProcessingInstruction -> + QName.processing_instruction (QName.of_string label) + | _ -> QName.of_string label + ) + in + tree.tag_cache.(pre) <- rlabel; rlabel + + let preorder tree node = + if node == nil then -1 else + node_get_preorder node + + let finalize t = + dereference_object (t.root) + + let init_document node i = + let s = { size = i; + root = node; + tag_cache = Array.create i QName.nil + } + in + Gc.finalise (finalize) s; + s + + + let print_node _ _ = assert false + let by_preorder _ _ = assert false + end + + +module Java_node_list : Node_list.S with type node = Java_tree.node + = + struct + type node = Java_tree.node + type node_list = [`NodeList] jvm_pointer + type t = node_list + external length : node_list -> int = "nodelist_getLength" + external create : unit -> node_list = "nodelist_new" + external add : node_list -> node -> node_list = "nodelist_add" + external item : node_list -> int -> node = "nodelist_item" + + let add n l = + add l n + + let iter f l = + for i = 0 to length l - 1 do + f (item l i) + done + + end + +module Runtime = Run.Make(Java_tree)(Java_node_list) + +let _ = Callback.register "init_document" Java_tree.init_document + +let xpath_compile p = + let auto = + Xpath.Compile.path + (Xpath.Parser.parse (Ulexing.from_utf8_string p)) + in + Ata.print Format.err_formatter auto; + Format.pp_print_flush Format.err_formatter (); + auto + +let _ = Callback.register "xpath_compile" xpath_compile + +let auto_evaluate auto tree list = + Runtime.eval auto tree list + +let _ = Callback.register "auto_evaluate" auto_evaluate -- 2.17.1