#include <unordered_set>
#include <algorithm>
-#include "XMLDocShredder.h"
+
#include "XMLTree.h"
+#include "XMLTreeBuilder.h"
+#include "Grammar.h"
#include "Utils.h"
+#include "common_stub.hpp"
+
+#define CAMLRAISEMSG(msg) (sxsi_raise_msg((char*) (msg)))
+
+#define XMLTREE(x) (Obj_val<XMLTree*>(x))
+
+#define HSET(x) (Obj_val<TagIdSet*>(x))
+
+#define XMLTREEBUILDER(x) (Obj_val<XMLTreeBuilder*>(x))
+
+#define GRAMMAR(x) (Obj_val<Grammar*>(x))
-extern "C" {
-/* OCaml memory managment */
-#include <caml/mlvalues.h>
-#include <caml/alloc.h>
-#include <caml/memory.h>
-#include <caml/callback.h>
-#include <caml/fail.h>
-#include <caml/custom.h>
-#include <caml/bigarray.h>
- //#include "results.h"
-#include <stdio.h>
-#define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) ))
-#define NOT_IMPLEMENTED(s) (caml_failwith(s))
-#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x)))
-#define HSET(x) ((TagIdSet*)((* (TagIdSet**) Data_custom_val(x))))
-#define TEXTCOLLECTION(x)
#define TREENODEVAL(i) ((treeNode) (Int_val(i)))
#define TAGVAL(i) ((TagType) (Int_val(i)))
#define XMLTREE_ROOT 0
#define NoAlloc
-
- static struct custom_operations ops;
- static struct custom_operations set_ops;
- static value * cpp_exception = NULL;
- static bool ops_initialized = false;
-
+extern "C" {
#include <sys/time.h>
#include <sys/resource.h>
-
-
-}
-
-extern "C" void caml_xml_tree_finalize(value tree){
- delete XMLTREE(tree);
- return;
-}
-
-extern "C" void caml_hset_finalize(value hblock){
- delete HSET(hblock);
- return;
+#include <stdio.h>
}
-extern "C" value caml_init_lib (value unit) {
- CAMLparam1(unit);
- struct rlimit rlim;
- if (!ops_initialized){
-
- getrlimit(RLIMIT_STACK, &rlim);
-
- if (rlim.rlim_max == RLIM_INFINITY && rlim.rlim_cur != RLIM_INFINITY) {
- rlim.rlim_cur = RLIM_INFINITY;
- setrlimit(RLIMIT_STACK, &rlim);
- };
-
- ops.identifier = (char*) "XMLTree";
- ops.finalize = caml_xml_tree_finalize;
- set_ops.identifier = (char*) "unordered_set";
- set_ops.finalize = caml_hset_finalize;
-
- cpp_exception = caml_named_value("CPlusPlusError");
- if (cpp_exception == NULL){
- string s = "FATAL: Unregistered exception ";
- s += "CPlusPlusError";
- caml_failwith(s.c_str());
- };
-
- ops_initialized = true;
-
- };
- CAMLreturn(Val_unit);
-
-}
+/** XMLTreeBuilder bindings
+ *
+ */
-extern "C" value caml_shredder_parse(XMLDocShredder *shredder){
- CAMLparam0();
- CAMLlocal1(doc);
- XMLTree * tree;
- shredder->processStartDocument("");
- shredder->parse();
- shredder->processEndDocument();
- doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
- tree = (XMLTree *) shredder->getXMLTree();
- memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
- CAMLreturn(doc);
-
-}
-
-extern "C" value caml_call_shredder_uri(value uri,value sf, value iet, value dtc, value idtype){
- CAMLparam1(uri);
- CAMLlocal1(doc);
- char *fn = String_val(uri);
- XMLDocShredder * shredder;
- TextCollectionBuilder::index_type_t id;
- switch (Int_val(idtype)){
+extern "C" value caml_xml_tree_builder_create(value unit)
+{
+ CAMLparam1(unit);
+ CAMLlocal1(result);
+ result = sxsi_alloc_custom<XMLTreeBuilder*>();
+ Obj_val<XMLTreeBuilder*>(result) = new XMLTreeBuilder();
+
+ CAMLreturn(result);
+}
+
+extern "C" value caml_xml_tree_builder_open_document(value vbuilder,
+ value vet,
+ value vsrate,
+ value vdtc,
+ value vidxtype)
+{
+ CAMLparam5(vbuilder, vet, vsrate, vdtc, vidxtype);
+ bool empty_text = Bool_val(vet);
+ int sample_rate = Int_val(vsrate);
+ bool disable_tc = Bool_val(vdtc);
+ TextCollectionBuilder::index_type_t idx_type;
+ switch (Int_val(vidxtype)){
case 0:
- id = TextCollectionBuilder::index_type_default;
+ idx_type = TextCollectionBuilder::index_type_default;
break;
case 1:
- id = TextCollectionBuilder::index_type_swcsa;
+ idx_type = TextCollectionBuilder::index_type_swcsa;
break;
case 2:
- id = TextCollectionBuilder::index_type_rlcsa;
+ idx_type = TextCollectionBuilder::index_type_rlcsa;
break;
default:
- CAMLRAISEMSG("");
+ CAMLRAISEMSG("Invalid Index Type");
};
+ int res = XMLTREEBUILDER(vbuilder)->OpenDocument(empty_text,
+ sample_rate,
+ disable_tc,
+ idx_type);
+ if (res == NULLT)
+ CAMLRAISEMSG("OpenDocument");
- try {
- shredder = new XMLDocShredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc), id);
- doc = caml_shredder_parse(shredder);
- delete shredder;
- }
- catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
- catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
- catch (char const * msg){ CAMLRAISEMSG(msg); };
- CAMLreturn (doc);
+ CAMLreturn (Val_unit);
+}
+extern "C" value caml_xml_tree_builder_close_document(value vbuilder)
+{
+ CAMLparam1(vbuilder);
+ CAMLlocal1(result);
+ XMLTree * tree = XMLTREEBUILDER(vbuilder)->CloseDocument();
+ if (tree == NULL)
+ CAMLRAISEMSG("CloseDocument");
+ result = sxsi_alloc_custom<XMLTree*>();
+ Obj_val<XMLTree*>(result) = tree;
+ CAMLreturn (result);
}
-extern "C" value caml_call_shredder_string(value data,value sf, value iet, value dtc, value idtype){
- CAMLparam1(data);
- CAMLlocal1(doc);
- XMLDocShredder * shredder;
- unsigned int ln = caml_string_length(data);
- unsigned char *fn = (unsigned char*) String_val(data);
- TextCollectionBuilder::index_type_t id;
- switch (Int_val(idtype)){
- case 0:
- id = TextCollectionBuilder::index_type_default;
- break;
- case 1:
- id = TextCollectionBuilder::index_type_swcsa;
- break;
- case 2:
- id = TextCollectionBuilder::index_type_rlcsa;
- break;
- default:
- CAMLRAISEMSG("");
- };
- try {
+extern "C" value caml_xml_tree_builder_new_open_tag(value vbuilder, value vtag)
+{
+ CAMLparam2(vbuilder, vtag);
+ const char * tag = String_val(vtag);
+ if (XMLTREEBUILDER(vbuilder)->NewOpenTag(std::string(tag)) == NULLT)
+ CAMLRAISEMSG("NewOpenTag");
- shredder = new XMLDocShredder (fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc), id);
- doc = caml_shredder_parse(shredder);
- delete shredder;
- }
- catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
- catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
- catch (char const * msg){ CAMLRAISEMSG(msg); };
- CAMLreturn(doc);
+ CAMLreturn (Val_unit);
}
+extern "C" value caml_xml_tree_builder_new_closing_tag(value vbuilder, value vtag)
+{
+ CAMLparam2(vbuilder, vtag);
+ const char * tag = String_val(vtag);
+ if (XMLTREEBUILDER(vbuilder)->NewClosingTag(std::string(tag)) == NULLT)
+ CAMLRAISEMSG("NewClosingTag");
+
+ CAMLreturn (Val_unit);
+}
+
+extern "C" value caml_xml_tree_builder_new_text(value vbuilder, value vtext)
+{
+ CAMLparam2(vbuilder, vtext);
+ const char * text = String_val(vtext);
+ if (XMLTREEBUILDER(vbuilder)->NewText(std::string(text)) == NULLT)
+ CAMLRAISEMSG("NewText");
+
+ CAMLreturn (Val_unit);
+}
+
+
+/*************************************************************************/
+
+/**
+ * XMLTree bindings
+ * All of the functions here call the _unsafe version and implement the logics themselves
+ * (test for NULLT and so on). This avoids one indirection + one call when the tests fails.
+ */
+
+
extern "C" value caml_xml_tree_save(value tree,value fd, value name){
CAMLparam3(tree, fd, name);
XMLTREE(tree)->Save(Int_val(fd), String_val(name));
extern "C" value caml_xml_tree_load(value fd, value name, value load_tc,value sf){
CAMLparam4(fd, name, load_tc, sf);
- CAMLlocal1(doc);
+ CAMLlocal1(result);
XMLTree * tree;
try {
+
tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf), String_val(name));
- doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
- memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
- CAMLreturn(doc);
+ result = sxsi_alloc_custom<XMLTree*>();
+ Obj_val<XMLTree*>(result) = tree;
+ CAMLreturn(result);
}
- catch (const xmlpp::internal_error& e){ CAMLRAISEMSG(e.what()); }
catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
- catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
+ catch (std::string msg){ CAMLRAISEMSG(msg.c_str()); }
catch (char const * msg){ CAMLRAISEMSG(msg); };
+ //never reached
+ return (Val_unit);
}
-
-/*************************************************************************/
-
-/**
- * XMLTree bindings
- * All of the functions here call the _unsafe version and implement the logics themselves
- * (test for NULLT and so on). This avoids one indirection + one call when the tests fails.
- */
-
-
NoAlloc extern "C" value caml_xml_tree_root(value tree){
return (Val_int(XMLTREE_ROOT));
}
return (Val_int(XMLTREE(tree)->NextElement(TREENODEVAL(node))));
}
+NoAlloc extern "C" value caml_xml_tree_next_node_before(value tree, value node, value ctx){
+ return (Val_int(XMLTREE(tree)->NextNodeBefore(TREENODEVAL(node), TREENODEVAL(ctx))));
+}
+
NoAlloc extern "C" value caml_xml_tree_prev_sibling(value tree, value node){
return (Val_int(XMLTREE(tree)->PrevSibling(TREENODEVAL(node))));
}
extern "C" value caml_unordered_set_alloc(value unit){
CAMLparam1(unit);
CAMLlocal1(hset);
- hset = caml_alloc_custom(&set_ops,sizeof(TagIdSet*),1,2);
- TagIdSet* ht = new TagIdSet();
- memcpy(Data_custom_val(hset),&ht,sizeof(TagIdSet*));
+ hset = sxsi_alloc_custom<TagIdSet*>();
+ Obj_val<TagIdSet*>(hset) = new TagIdSet();
CAMLreturn (hset);
}
CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
}
-bool docId_comp(DocID x, DocID y) { return x < y; };
+bool docId_comp(DocID x, DocID y) { return x < y; }
/**
* Existential queries
BV_QUERY(equals, Equals)
BV_QUERY(contains, Contains)
BV_QUERY(lessthan, LessThan)
+
+
+
+//////////////////////////////////////////// Grammar stuff
+
+extern "C" value caml_grammar_load(value file, value load_bp)
+{
+ CAMLparam2(file, load_bp);
+ CAMLlocal1(result);
+ Grammar *grammar;
+ int f1 = Int_val(file);
+ int f2 = dup(f1);
+ FILE * fd = fdopen(f2, "r");
+ if (fd == NULL)
+ CAMLRAISEMSG("Error opening grammar file");
+ grammar = Grammar::load(fd, Bool_val(load_bp));
+ fclose(fd);
+ result = sxsi_alloc_custom<Grammar*>();
+ Obj_val<Grammar*>(result) = grammar;
+ CAMLreturn(result);
+}
+
+extern "C" value caml_grammar_get_symbol_at(value grammar, value symbol, value preorder)
+{
+ CAMLparam3(grammar, symbol, preorder);
+ CAMLreturn(Val_long(GRAMMAR(grammar)->getSymbolAt(Long_val(symbol), Int_val(preorder))));
+}
+
+extern "C" value caml_grammar_first_child(value grammar, value rule, value pos)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_int(GRAMMAR(grammar)->firstChild(Long_val(rule), Int_val(pos))));
+}
+
+extern "C" value caml_grammar_next_sibling(value grammar, value rule, value pos)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_int(GRAMMAR(grammar)->nextSibling(Long_val(rule), Int_val(pos))));
+}
+
+extern "C" value caml_grammar_start_first_child(value grammar, value pos)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_int(GRAMMAR(grammar)->startFirstChild(Int_val(pos))));
+}
+
+extern "C" value caml_grammar_start_next_sibling(value grammar, value pos)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_int(GRAMMAR(grammar)->startNextSibling(Int_val(pos))));
+}
+
+extern "C" value caml_grammar_is_nil(value grammar, value rule)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_bool(GRAMMAR(grammar)->isNil(Long_val(rule))));
+}
+
+extern "C" value caml_grammar_get_tag(value grammar, value tag)
+{
+ CAMLparam1(grammar);
+ CAMLlocal1(res);
+ const char * s = (GRAMMAR(grammar)->getTagName(Long_val(tag))).c_str();
+ res = caml_copy_string(s);
+ CAMLreturn(res);
+}
+
+extern "C" value caml_grammar_get_id1(value grammar, value rule)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_long(GRAMMAR(grammar)->getID1(Long_val(rule))));
+}
+
+extern "C" value caml_grammar_get_id2(value grammar, value rule)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_long(GRAMMAR(grammar)->getID2(Long_val(rule))));
+}
+
+extern "C" value caml_grammar_get_param_pos(value grammar, value rule)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_int(GRAMMAR(grammar)->getParamPos(Long_val(rule))));
+}
+
+extern "C" value caml_grammar_translate_tag(value grammar, value tag)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_int(GRAMMAR(grammar)->translateTag(Int_val(tag))));
+}
+
+extern "C" value caml_grammar_register_tag(value grammar, value str)
+{
+ CAMLparam2(grammar, str);
+ char * s = String_val(str);
+ CAMLreturn(Val_int(GRAMMAR(grammar)->getTagID(s)));
+}
+
+extern "C" value caml_grammar_nil_id(value grammar)
+{
+ CAMLparam1(grammar);
+ CAMLreturn(Val_long((GRAMMAR(grammar)->getNiltagid()) * 4 + 1));
+}
+
+extern "C" {
+extern char *caml_young_end;
+extern char *caml_young_start;
+typedef char * addr;
+#define Is_young(val) \
+ ((addr)(val) < (addr)caml_young_end && (addr)(val) > (addr)caml_young_start)
+
+}
+extern "C" value caml_custom_is_young(value a){
+ return Val_bool(Is_young(a));
+}
+
+extern "C" value caml_custom_array_blit(value a1, value ofs1, value a2, value ofs2,
+ value n)
+{
+ value * src, * dst;
+ intnat count;
+
+ if (Is_young(a2)) {
+ /* Arrays of values, destination is in young generation.
+ Here too we can do a direct copy since this cannot create
+ old-to-young pointers, nor mess up with the incremental major GC.
+ Again, memmove takes care of overlap. */
+ memmove(&Field(a2, Long_val(ofs2)),
+ &Field(a1, Long_val(ofs1)),
+ Long_val(n) * sizeof(value));
+ return Val_unit;
+ }
+ /* Array of values, destination is in old generation.
+ We must use caml_modify. */
+ count = Long_val(n);
+ if (a1 == a2 && Long_val(ofs1) < Long_val(ofs2)) {
+ /* Copy in descending order */
+ for (dst = &Field(a2, Long_val(ofs2) + count - 1),
+ src = &Field(a1, Long_val(ofs1) + count - 1);
+ count > 0;
+ count--, src--, dst--) {
+ caml_modify(dst, *src);
+ }
+ } else {
+ /* Copy in ascending order */
+ for (dst = &Field(a2, Long_val(ofs2)), src = &Field(a1, Long_val(ofs1));
+ count > 0;
+ count--, src++, dst++) {
+ caml_modify(dst, *src);
+ }
+ }
+ /* Many caml_modify in a row can create a lot of old-to-young refs.
+ Give the minor GC a chance to run if it needs to. */
+ //caml_check_urgent_gc(Val_unit);
+ return Val_unit;
+}
+
+
+////////////////////// BP
+
+extern "C" value caml_bitmap_create(value size)
+{
+ CAMLparam1(size);
+ size_t bits = Long_val(size);
+ size_t words = bits / (8*sizeof(unsigned int));
+ unsigned int *buffer = (unsigned int*) calloc(words+1, sizeof(unsigned int));
+ if (buffer == NULL)
+ CAMLRAISEMSG("BP: cannot allocate memory");
+ CAMLreturn( (value) buffer);
+}
+
+extern "C" value caml_bitmap_resize(value bitmap, value nsize)
+{
+ CAMLparam2(bitmap, nsize);
+ size_t bits = Long_val(nsize);
+ size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int);
+ fprintf(stderr, "Growing to: %lu bytes\n", (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int));
+ unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes);
+ if (buffer == NULL)
+ CAMLRAISEMSG("BP: cannot reallocate memory");
+ CAMLreturn((value) buffer);
+}
+
+extern "C" value caml_bitmap_setbit(value bitmap, value i, value b)
+{
+ CAMLparam3(bitmap, i, b);
+ unsigned int j = Int_val(i);
+ unsigned int x = Bool_val(b);
+ bp_setbit ((unsigned int*) bitmap, j, x);
+ CAMLreturn(Val_unit);
+}
+
+extern "C" void caml_bp_delete(value b)
+{
+ CAMLparam1(b);
+ bp * B = Obj_val<bp*>(b);
+ bp_delete(B);
+ CAMLreturn0;
+}
+
+extern "C" value caml_bp_construct(value bitmap, value npar)
+{
+ CAMLparam2(bitmap, npar);
+ CAMLlocal1(res);
+ bp * b = bp_construct(Int_val(npar), (unsigned int *) bitmap, OPT_DEGREE);
+ res = sxsi_alloc_custom<bp*>(caml_bp_delete);
+ Obj_val<bp*>(res) = b;
+ CAMLreturn(res);
+}
+
+extern "C" value caml_bp_first_child(value b, value idx)
+{
+ CAMLparam2(b, idx);
+ CAMLreturn (Val_int( bp_first_child(Obj_val<bp*>(b), Int_val(idx))));
+}
+
+
+extern "C" value caml_bp_next_sibling(value b, value idx)
+{
+ CAMLparam2(b, idx);
+ CAMLreturn (Val_int(bp_next_sibling(Obj_val<bp*>(b), Int_val(idx))));
+}
+
+extern "C" value caml_bp_preorder_rank(value b, value idx)
+{
+ CAMLparam2(b, idx);
+ CAMLreturn (Val_int(bp_preorder_rank(Obj_val<bp*>(b), Int_val(idx)) - 1));
+}
+
+
+extern "C" value caml_bp_load(value file)
+{
+ CAMLparam1(file);
+ CAMLlocal1(result);
+ bp *B;
+ int f1 = Int_val(file);
+ int f2 = dup(f1);
+ FILE * fd = fdopen(f2, "r");
+ if (fd == NULL)
+ CAMLRAISEMSG("Error opening bp file");
+ B = loadTree(fd);
+ fclose(fd);
+ result = sxsi_alloc_custom<bp*>(caml_bp_delete);
+ Obj_val<bp*>(result) = B;
+ CAMLreturn(result);
+}
+
+extern "C" value caml_bp_save(value b, value file)
+{
+ CAMLparam2(b, file);
+ bp *B = Obj_val<bp*>(b);
+ int f1 = Int_val(file);
+ int f2 = dup(f1);
+ FILE * fd = fdopen(f2, "a");
+ fprintf(stderr, "Writing %i %p bytes\n", ((B->n+D-1)/D)*8, B );
+ fflush(stderr);
+ if (fd == NULL)
+ CAMLRAISEMSG("Error saving bp file");
+ saveTree(B, fd);
+ fclose(fd);
+ CAMLreturn(Val_unit);
+}
+