X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2FOCamlDriver.cpp;h=146a0dfbc878bf9ff5458736ca250c6fad411ddc;hb=1975eef2d0933da3c01faec1cd14bc8fbf6bf146;hp=db111dbc64740c103f920da09dd055be86fbbcd0;hpb=4b52da1a20a4fe031930bb96d2ca46bec06dc529;p=SXSI%2Fxpathcomp.git diff --git a/src/OCamlDriver.cpp b/src/OCamlDriver.cpp index db111db..146a0df 100644 --- a/src/OCamlDriver.cpp +++ b/src/OCamlDriver.cpp @@ -17,165 +17,137 @@ #include #include -#include "XMLDocShredder.h" + #include "XMLTree.h" +#include "XMLTreeBuilder.h" +#include "Grammar.h" #include "Utils.h" +#include "common_stub.hpp" + +#define CAMLRAISEMSG(msg) (sxsi_raise_msg((char*) (msg))) + +#define XMLTREE(x) (Obj_val(x)) + +#define HSET(x) (Obj_val(x)) + +#define XMLTREEBUILDER(x) (Obj_val(x)) + +#define GRAMMAR(x) (Obj_val(x)) -extern "C" { -/* OCaml memory managment */ -#include -#include -#include -#include -#include -#include -#include - //#include "results.h" -#include -#define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) -#define NOT_IMPLEMENTED(s) (caml_failwith(s)) -#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) -#define HSET(x) ((TagIdSet*)((* (TagIdSet**) Data_custom_val(x)))) -#define TEXTCOLLECTION(x) #define TREENODEVAL(i) ((treeNode) (Int_val(i))) #define TAGVAL(i) ((TagType) (Int_val(i))) #define XMLTREE_ROOT 0 #define NoAlloc - - static struct custom_operations ops; - static struct custom_operations set_ops; - static value * cpp_exception = NULL; - static bool ops_initialized = false; - +extern "C" { #include #include - - -} - -extern "C" void caml_xml_tree_finalize(value tree){ - delete XMLTREE(tree); - return; -} - -extern "C" void caml_hset_finalize(value hblock){ - delete HSET(hblock); - return; +#include } -extern "C" value caml_init_lib (value unit) { - CAMLparam1(unit); - - struct rlimit rlim; - - if (!ops_initialized){ - - getrlimit(RLIMIT_STACK, &rlim); - - if (rlim.rlim_max == RLIM_INFINITY && rlim.rlim_cur != RLIM_INFINITY) { - rlim.rlim_cur = RLIM_INFINITY; - setrlimit(RLIMIT_STACK, &rlim); - }; - - ops.identifier = (char*) "XMLTree"; - ops.finalize = caml_xml_tree_finalize; - set_ops.identifier = (char*) "unordered_set"; - set_ops.finalize = caml_hset_finalize; - - cpp_exception = caml_named_value("CPlusPlusError"); - if (cpp_exception == NULL){ - string s = "FATAL: Unregistered exception "; - s += "CPlusPlusError"; - caml_failwith(s.c_str()); - }; - ops_initialized = true; - }; - CAMLreturn(Val_unit); - -} +/** XMLTreeBuilder bindings + * + */ -extern "C" value caml_shredder_parse(XMLDocShredder *shredder){ - CAMLparam0(); - CAMLlocal1(doc); - XMLTree * tree; - shredder->processStartDocument(""); - shredder->parse(); - shredder->processEndDocument(); - doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); - tree = (XMLTree *) shredder->getXMLTree(); - memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); - CAMLreturn(doc); - -} - -extern "C" value caml_call_shredder_uri(value uri,value sf, value iet, value dtc, value idtype){ - CAMLparam1(uri); - CAMLlocal1(doc); - char *fn = String_val(uri); - XMLDocShredder * shredder; - TextCollectionBuilder::index_type_t id; - switch (Int_val(idtype)){ +extern "C" value caml_xml_tree_builder_create(value unit) +{ + CAMLparam1(unit); + CAMLlocal1(result); + result = sxsi_alloc_custom(); + Obj_val(result) = new XMLTreeBuilder(); + + CAMLreturn(result); +} + +extern "C" value caml_xml_tree_builder_open_document(value vbuilder, + value vet, + value vsrate, + value vdtc, + value vidxtype) +{ + CAMLparam5(vbuilder, vet, vsrate, vdtc, vidxtype); + bool empty_text = Bool_val(vet); + int sample_rate = Int_val(vsrate); + bool disable_tc = Bool_val(vdtc); + TextCollectionBuilder::index_type_t idx_type; + switch (Int_val(vidxtype)){ case 0: - id = TextCollectionBuilder::index_type_default; + idx_type = TextCollectionBuilder::index_type_default; break; case 1: - id = TextCollectionBuilder::index_type_swcsa; + idx_type = TextCollectionBuilder::index_type_swcsa; break; case 2: - id = TextCollectionBuilder::index_type_rlcsa; + idx_type = TextCollectionBuilder::index_type_rlcsa; break; default: - CAMLRAISEMSG(""); + CAMLRAISEMSG("Invalid Index Type"); }; + int res = XMLTREEBUILDER(vbuilder)->OpenDocument(empty_text, + sample_rate, + disable_tc, + idx_type); + if (res == NULLT) + CAMLRAISEMSG("OpenDocument"); - try { - shredder = new XMLDocShredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc), id); - doc = caml_shredder_parse(shredder); - delete shredder; - } - catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } - catch (string msg){ CAMLRAISEMSG(msg.c_str()); } - catch (char const * msg){ CAMLRAISEMSG(msg); }; - CAMLreturn (doc); + CAMLreturn (Val_unit); +} +extern "C" value caml_xml_tree_builder_close_document(value vbuilder) +{ + CAMLparam1(vbuilder); + CAMLlocal1(result); + XMLTree * tree = XMLTREEBUILDER(vbuilder)->CloseDocument(); + if (tree == NULL) + CAMLRAISEMSG("CloseDocument"); + result = sxsi_alloc_custom(); + Obj_val(result) = tree; + CAMLreturn (result); } -extern "C" value caml_call_shredder_string(value data,value sf, value iet, value dtc, value idtype){ - CAMLparam1(data); - CAMLlocal1(doc); - XMLDocShredder * shredder; - unsigned int ln = caml_string_length(data); - unsigned char *fn = (unsigned char*) String_val(data); - TextCollectionBuilder::index_type_t id; - switch (Int_val(idtype)){ - case 0: - id = TextCollectionBuilder::index_type_default; - break; - case 1: - id = TextCollectionBuilder::index_type_swcsa; - break; - case 2: - id = TextCollectionBuilder::index_type_rlcsa; - break; - default: - CAMLRAISEMSG(""); - }; - try { +extern "C" value caml_xml_tree_builder_new_open_tag(value vbuilder, value vtag) +{ + CAMLparam2(vbuilder, vtag); + const char * tag = String_val(vtag); + if (XMLTREEBUILDER(vbuilder)->NewOpenTag(std::string(tag)) == NULLT) + CAMLRAISEMSG("NewOpenTag"); - shredder = new XMLDocShredder (fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc), id); - doc = caml_shredder_parse(shredder); - delete shredder; - } - catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } - catch (string msg){ CAMLRAISEMSG(msg.c_str()); } - catch (char const * msg){ CAMLRAISEMSG(msg); }; - CAMLreturn(doc); + CAMLreturn (Val_unit); } +extern "C" value caml_xml_tree_builder_new_closing_tag(value vbuilder, value vtag) +{ + CAMLparam2(vbuilder, vtag); + const char * tag = String_val(vtag); + if (XMLTREEBUILDER(vbuilder)->NewClosingTag(std::string(tag)) == NULLT) + CAMLRAISEMSG("NewClosingTag"); + + CAMLreturn (Val_unit); +} + +extern "C" value caml_xml_tree_builder_new_text(value vbuilder, value vtext) +{ + CAMLparam2(vbuilder, vtext); + const char * text = String_val(vtext); + if (XMLTREEBUILDER(vbuilder)->NewText(std::string(text)) == NULLT) + CAMLRAISEMSG("NewText"); + + CAMLreturn (Val_unit); +} + + +/*************************************************************************/ + +/** + * XMLTree bindings + * All of the functions here call the _unsafe version and implement the logics themselves + * (test for NULLT and so on). This avoids one indirection + one call when the tests fails. + */ + + extern "C" value caml_xml_tree_save(value tree,value fd, value name){ CAMLparam3(tree, fd, name); XMLTREE(tree)->Save(Int_val(fd), String_val(name)); @@ -184,31 +156,23 @@ extern "C" value caml_xml_tree_save(value tree,value fd, value name){ extern "C" value caml_xml_tree_load(value fd, value name, value load_tc,value sf){ CAMLparam4(fd, name, load_tc, sf); - CAMLlocal1(doc); + CAMLlocal1(result); XMLTree * tree; try { + tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf), String_val(name)); - doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); - memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); - CAMLreturn(doc); + result = sxsi_alloc_custom(); + Obj_val(result) = tree; + CAMLreturn(result); } - catch (const xmlpp::internal_error& e){ CAMLRAISEMSG(e.what()); } catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } - catch (string msg){ CAMLRAISEMSG(msg.c_str()); } + catch (std::string msg){ CAMLRAISEMSG(msg.c_str()); } catch (char const * msg){ CAMLRAISEMSG(msg); }; + //never reached + return (Val_unit); } - -/*************************************************************************/ - -/** - * XMLTree bindings - * All of the functions here call the _unsafe version and implement the logics themselves - * (test for NULLT and so on). This avoids one indirection + one call when the tests fails. - */ - - NoAlloc extern "C" value caml_xml_tree_root(value tree){ return (Val_int(XMLTREE_ROOT)); } @@ -318,6 +282,10 @@ NoAlloc extern "C" value caml_xml_tree_next_element(value tree, value node){ return (Val_int(XMLTREE(tree)->NextElement(TREENODEVAL(node)))); } +NoAlloc extern "C" value caml_xml_tree_next_node_before(value tree, value node, value ctx){ + return (Val_int(XMLTREE(tree)->NextNodeBefore(TREENODEVAL(node), TREENODEVAL(ctx)))); +} + NoAlloc extern "C" value caml_xml_tree_prev_sibling(value tree, value node){ return (Val_int(XMLTREE(tree)->PrevSibling(TREENODEVAL(node)))); } @@ -342,6 +310,10 @@ NoAlloc extern "C" value caml_xml_tree_tagged_descendant(value tree, value node return (Val_int(XMLTREE(tree)->TaggedDescendant(TREENODEVAL(node), TAGVAL(tag)))); } +NoAlloc extern "C" value caml_xml_tree_tagged_next(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedNext(TREENODEVAL(node), TAGVAL(tag)))); +} + NoAlloc extern "C" value caml_xml_tree_select_descendant(value tree, value node, value tags){ return (Val_int(XMLTREE(tree)->SelectDescendant(TREENODEVAL(node), HSET(tags)))); } @@ -450,9 +422,8 @@ NoAlloc extern "C" value caml_unordered_set_length(value hset){ extern "C" value caml_unordered_set_alloc(value unit){ CAMLparam1(unit); CAMLlocal1(hset); - hset = caml_alloc_custom(&set_ops,sizeof(TagIdSet*),1,2); - TagIdSet* ht = new TagIdSet(); - memcpy(Data_custom_val(hset),&ht,sizeof(TagIdSet*)); + hset = sxsi_alloc_custom(); + Obj_val(hset) = new TagIdSet(); CAMLreturn (hset); } @@ -757,7 +728,7 @@ extern "C" value caml_text_collection_empty_text(value tree,value id){ CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id)))); } -bool docId_comp(DocID x, DocID y) { return x < y; }; +bool docId_comp(DocID x, DocID y) { return x < y; } /** * Existential queries @@ -923,3 +894,266 @@ BV_QUERY(suffix, Suffix) BV_QUERY(equals, Equals) BV_QUERY(contains, Contains) BV_QUERY(lessthan, LessThan) + + + +//////////////////////////////////////////// Grammar stuff + +extern "C" value caml_grammar_load(value file, value load_bp) +{ + CAMLparam2(file, load_bp); + CAMLlocal1(result); + Grammar *grammar; + int f1 = Int_val(file); + int f2 = dup(f1); + FILE * fd = fdopen(f2, "r"); + if (fd == NULL) + CAMLRAISEMSG("Error opening grammar file"); + grammar = Grammar::load(fd, Bool_val(load_bp)); + fclose(fd); + result = sxsi_alloc_custom(); + Obj_val(result) = grammar; + CAMLreturn(result); +} + +extern "C" value caml_grammar_get_symbol_at(value grammar, value symbol, value preorder) +{ + CAMLparam3(grammar, symbol, preorder); + CAMLreturn(Val_long(GRAMMAR(grammar)->getSymbolAt(Long_val(symbol), Int_val(preorder)))); +} + +extern "C" value caml_grammar_first_child(value grammar, value rule, value pos) +{ + CAMLparam1(grammar); + CAMLreturn(Val_int(GRAMMAR(grammar)->firstChild(Long_val(rule), Int_val(pos)))); +} + +extern "C" value caml_grammar_next_sibling(value grammar, value rule, value pos) +{ + CAMLparam1(grammar); + CAMLreturn(Val_int(GRAMMAR(grammar)->nextSibling(Long_val(rule), Int_val(pos)))); +} + +extern "C" value caml_grammar_start_first_child(value grammar, value pos) +{ + CAMLparam1(grammar); + CAMLreturn(Val_int(GRAMMAR(grammar)->startFirstChild(Int_val(pos)))); +} + +extern "C" value caml_grammar_start_next_sibling(value grammar, value pos) +{ + CAMLparam1(grammar); + CAMLreturn(Val_int(GRAMMAR(grammar)->startNextSibling(Int_val(pos)))); +} + +extern "C" value caml_grammar_is_nil(value grammar, value rule) +{ + CAMLparam1(grammar); + CAMLreturn(Val_bool(GRAMMAR(grammar)->isNil(Long_val(rule)))); +} + +extern "C" value caml_grammar_get_tag(value grammar, value tag) +{ + CAMLparam1(grammar); + CAMLlocal1(res); + const char * s = (GRAMMAR(grammar)->getTagName(Long_val(tag))).c_str(); + res = caml_copy_string(s); + CAMLreturn(res); +} + +extern "C" value caml_grammar_get_id1(value grammar, value rule) +{ + CAMLparam1(grammar); + CAMLreturn(Val_long(GRAMMAR(grammar)->getID1(Long_val(rule)))); +} + +extern "C" value caml_grammar_get_id2(value grammar, value rule) +{ + CAMLparam1(grammar); + CAMLreturn(Val_long(GRAMMAR(grammar)->getID2(Long_val(rule)))); +} + +extern "C" value caml_grammar_get_param_pos(value grammar, value rule) +{ + CAMLparam1(grammar); + CAMLreturn(Val_int(GRAMMAR(grammar)->getParamPos(Long_val(rule)))); +} + +extern "C" value caml_grammar_translate_tag(value grammar, value tag) +{ + CAMLparam1(grammar); + CAMLreturn(Val_int(GRAMMAR(grammar)->translateTag(Int_val(tag)))); +} + +extern "C" value caml_grammar_register_tag(value grammar, value str) +{ + CAMLparam2(grammar, str); + char * s = String_val(str); + CAMLreturn(Val_int(GRAMMAR(grammar)->getTagID(s))); +} + +extern "C" value caml_grammar_nil_id(value grammar) +{ + CAMLparam1(grammar); + CAMLreturn(Val_long((GRAMMAR(grammar)->getNiltagid()) * 4 + 1)); +} + +extern "C" { +extern char *caml_young_end; +extern char *caml_young_start; +typedef char * addr; +#define Is_young(val) \ + ((addr)(val) < (addr)caml_young_end && (addr)(val) > (addr)caml_young_start) + +} +extern "C" value caml_custom_is_young(value a){ + return Val_bool(Is_young(a)); +} + +extern "C" value caml_custom_array_blit(value a1, value ofs1, value a2, value ofs2, + value n) +{ + value * src, * dst; + intnat count; + + if (Is_young(a2)) { + /* Arrays of values, destination is in young generation. + Here too we can do a direct copy since this cannot create + old-to-young pointers, nor mess up with the incremental major GC. + Again, memmove takes care of overlap. */ + memmove(&Field(a2, Long_val(ofs2)), + &Field(a1, Long_val(ofs1)), + Long_val(n) * sizeof(value)); + return Val_unit; + } + /* Array of values, destination is in old generation. + We must use caml_modify. */ + count = Long_val(n); + if (a1 == a2 && Long_val(ofs1) < Long_val(ofs2)) { + /* Copy in descending order */ + for (dst = &Field(a2, Long_val(ofs2) + count - 1), + src = &Field(a1, Long_val(ofs1) + count - 1); + count > 0; + count--, src--, dst--) { + caml_modify(dst, *src); + } + } else { + /* Copy in ascending order */ + for (dst = &Field(a2, Long_val(ofs2)), src = &Field(a1, Long_val(ofs1)); + count > 0; + count--, src++, dst++) { + caml_modify(dst, *src); + } + } + /* Many caml_modify in a row can create a lot of old-to-young refs. + Give the minor GC a chance to run if it needs to. */ + //caml_check_urgent_gc(Val_unit); + return Val_unit; +} + + +////////////////////// BP + +extern "C" value caml_bitmap_create(value size) +{ + CAMLparam1(size); + size_t bits = Long_val(size); + size_t words = bits / (8*sizeof(unsigned int)); + unsigned int *buffer = (unsigned int*) calloc(words+1, sizeof(unsigned int)); + if (buffer == NULL) + CAMLRAISEMSG("BP: cannot allocate memory"); + CAMLreturn( (value) buffer); +} + +extern "C" value caml_bitmap_resize(value bitmap, value nsize) +{ + CAMLparam2(bitmap, nsize); + size_t bits = Long_val(nsize); + size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int); + fprintf(stderr, "Growing to: %lu bytes\n", (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int)); + unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes); + if (buffer == NULL) + CAMLRAISEMSG("BP: cannot reallocate memory"); + CAMLreturn((value) buffer); +} + +extern "C" value caml_bitmap_setbit(value bitmap, value i, value b) +{ + CAMLparam3(bitmap, i, b); + unsigned int j = Int_val(i); + unsigned int x = Bool_val(b); + bp_setbit ((unsigned int*) bitmap, j, x); + CAMLreturn(Val_unit); +} + +extern "C" void caml_bp_delete(value b) +{ + CAMLparam1(b); + bp * B = Obj_val(b); + bp_delete(B); + CAMLreturn0; +} + +extern "C" value caml_bp_construct(value bitmap, value npar) +{ + CAMLparam2(bitmap, npar); + CAMLlocal1(res); + bp * b = bp_construct(Int_val(npar), (unsigned int *) bitmap, OPT_DEGREE); + res = sxsi_alloc_custom(caml_bp_delete); + Obj_val(res) = b; + CAMLreturn(res); +} + +extern "C" value caml_bp_first_child(value b, value idx) +{ + CAMLparam2(b, idx); + CAMLreturn (Val_int( bp_first_child(Obj_val(b), Int_val(idx)))); +} + + +extern "C" value caml_bp_next_sibling(value b, value idx) +{ + CAMLparam2(b, idx); + CAMLreturn (Val_int(bp_next_sibling(Obj_val(b), Int_val(idx)))); +} + +extern "C" value caml_bp_preorder_rank(value b, value idx) +{ + CAMLparam2(b, idx); + CAMLreturn (Val_int(bp_preorder_rank(Obj_val(b), Int_val(idx)) - 1)); +} + + +extern "C" value caml_bp_load(value file) +{ + CAMLparam1(file); + CAMLlocal1(result); + bp *B; + int f1 = Int_val(file); + int f2 = dup(f1); + FILE * fd = fdopen(f2, "r"); + if (fd == NULL) + CAMLRAISEMSG("Error opening bp file"); + B = loadTree(fd); + fclose(fd); + result = sxsi_alloc_custom(caml_bp_delete); + Obj_val(result) = B; + CAMLreturn(result); +} + +extern "C" value caml_bp_save(value b, value file) +{ + CAMLparam2(b, file); + bp *B = Obj_val(b); + int f1 = Int_val(file); + int f2 = dup(f1); + FILE * fd = fdopen(f2, "a"); + fprintf(stderr, "Writing %i %p bytes\n", ((B->n+D-1)/D)*8, B ); + fflush(stderr); + if (fd == NULL) + CAMLRAISEMSG("Error saving bp file"); + saveTree(B, fd); + fclose(fd); + CAMLreturn(Val_unit); +} +