X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=OCamlDriver.cpp;h=e2c6f00b459eb899135e374b19ee8ebec14be0f0;hb=d550133ad7afdf65c5e284c2bcf67a5bdde6faa7;hp=d85be93c34c4be3b8c39c5656978a5117fcfee4a;hpb=25a3fa55f6de1835d2407283eeb43b01819543f6;p=SXSI%2Fxpathcomp.git diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index d85be93..e2c6f00 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -1,130 +1,150 @@ /************************************** * OCamlDriver.cpp * ------------------- - * A Test Ocaml Driver which calls the C++ methods and + * An Ocaml Driver which calls the C++ methods and * adds a C wrapper interface with OCaml code. * * Author: Kim Nguyen * Date: 04/11/08 */ -/* OCaml memory managment */ + + +#include +#include +#include "XMLDocShredder.h" +#include "XMLTree.h" +#include "Utils.h" + extern "C" { +/* OCaml memory managment */ #include #include #include #include #include #include - - -} //extern C -//#include "TextCollection/TextCollection.h" -#include "XMLDocShredder.h" -#include "XMLTree.h" -#include "Utils.h" -#define CAMLRAISECPP(e) (caml_failwith( ((e).what()))) +#define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) #define NOT_IMPLEMENTED(s) (caml_failwith(s)) #define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) +#define HSET(x) ((std::unordered_set*)((* (XMLTree**) Data_custom_val(x)))) #define TEXTCOLLECTION(x) #define TREENODEVAL(i) ((treeNode) (Int_val(i))) - -extern "C" { +#define XMLTREE_ROOT 0 + static struct custom_operations ops; - static bool initialized = false; + static struct custom_operations set_ops; + static value * cpp_exception = NULL; + static bool ops_initialized = false; + } + extern "C" void caml_xml_tree_finalize(value tree){ delete XMLTREE(tree); return; } -extern "C" void caml_init_ops () { +extern "C" void caml_hset_finalize(value hblock){ + delete HSET(hblock); + return; +} - if (initialized) - return; +extern "C" CAMLprim value caml_init_lib (value unit) { + CAMLparam1(unit); + if (!ops_initialized){ + + ops.identifier = (char*) "XMLTree"; ops.finalize = caml_xml_tree_finalize; - return; + set_ops.identifier = (char*) "unordered_set"; + set_ops.finalize = caml_hset_finalize; + + cpp_exception = caml_named_value("CPlusPlusError"); + if (cpp_exception == NULL){ + string s = "FATAL: Unregistered exception "; + s += "CPlusPlusError"; + caml_failwith(s.c_str()); + }; + + ops_initialized = true; + + }; + CAMLreturn(Val_unit); + +} +extern "C" CAMLprim value caml_shredder_parse(XMLDocShredder *shredder){ + CAMLparam0(); + CAMLlocal1(doc); + XMLTree * tree; + shredder->processStartDocument(""); + shredder->parse(); + shredder->processEndDocument(); + doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); + tree = (XMLTree *) shredder->getXMLTree(); + memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); + CAMLreturn(doc); + } - extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){ CAMLparam1(uri); CAMLlocal1(doc); char *fn = String_val(uri); + XMLDocShredder * shredder; try { - XMLDocShredder shredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc)); - XMLTree * tree; - shredder.processStartDocument(fn); - shredder.parse(); - shredder.processEndDocument(); - caml_init_ops(); - doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); - tree = (XMLTree *) shredder.storageIfc_->returnDocument(); - memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); - CAMLreturn(doc); + shredder = new XMLDocShredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc)); + doc = caml_shredder_parse(shredder); + delete shredder; } - catch (const std::exception& e){ - CAMLRAISECPP(e); - }; + catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } + catch (string msg){ CAMLRAISEMSG(msg.c_str()); } + catch (char const * msg){ CAMLRAISEMSG(msg); }; + CAMLreturn (doc); } - extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value iet, value dtc){ CAMLparam1(data); CAMLlocal1(doc); + XMLDocShredder * shredder; unsigned int ln = string_length(data); unsigned char *fn = (unsigned char*) String_val(data); - try { - XMLDocShredder shredder(fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc)); - XMLTree* tree; - shredder.processStartDocument(""); - shredder.parse(); - shredder.processEndDocument(); - caml_init_ops(); - doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); - tree = (XMLTree *) shredder.storageIfc_->returnDocument(); - memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); - CAMLreturn(doc); + shredder = new XMLDocShredder (fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc)); + doc = caml_shredder_parse(shredder); + delete shredder; } - catch (const std::exception& e) { - CAMLRAISECPP(e); - }; + catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } + catch (string msg){ CAMLRAISEMSG(msg.c_str()); } + catch (char const * msg){ CAMLRAISEMSG(msg); }; + CAMLreturn(doc); } -void traversal_rec(XMLTree* tree, treeNode id){ - DocID tid; - if (id == NULLT) - return; - //int tag = tree->Tag(id); - if (id) { - tid = tree->PrevText(id); - char * data = (char *) (tree->getTextCollection())->GetText(tid); - if (tree->IsLeaf(id)){ - tid = tree->MyText(id); - - data = (char*) (tree->getTextCollection())->GetText(tid); - }; - - if (tree->NextSibling(id) == NULLT){ - tid = tree->NextText(id); - data = (char*) (tree->getTextCollection())->GetText(tid); - }; - }; - traversal_rec(tree,tree->FirstChild(id)); - traversal_rec(tree,tree->NextSibling(id)); - return; +extern "C" CAMLprim value caml_xml_tree_save(value tree,value fd){ + CAMLparam2(tree,fd); + XMLTREE(tree)->Save(Int_val(fd)); + CAMLreturn (Val_unit); } -extern "C" CAMLprim value caml_cpp_traversal(value tree){ - CAMLparam1(tree); - traversal_rec(XMLTREE(tree),XMLTREE(tree)->Root()); - CAMLreturn(Val_unit); +extern "C" CAMLprim value caml_xml_tree_load(value fd){ + CAMLparam1(fd); + CAMLlocal1(doc); + XMLTree * tree; + try { + tree = XMLTree::Load(Int_val(fd)); + doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); + memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); + CAMLreturn(doc); + } + catch (const xmlpp::internal_error& e){ CAMLRAISEMSG(e.what()); } + catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } + catch (string msg){ CAMLRAISEMSG(msg.c_str()); } + catch (char const * msg){ CAMLRAISEMSG(msg); }; } + + extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ CAMLparam2(tree,id); CAMLlocal1(str); @@ -143,14 +163,6 @@ extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value CAMLreturn (str); } -extern "C" CAMLprim value caml_text_collection_size(value tree){ - CAMLparam1(tree); - // CAMLreturn (Val_int( XMLTREE(tree)->CachedText.size())); - NOT_IMPLEMENTED("text_collection_size"); - CAMLreturn (Val_unit); -} - - extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){ CAMLparam2(tree,id); @@ -176,6 +188,7 @@ extern "C" CAMLprim value caml_text_collection_count(value tree,value str){ CAMLreturn (Val_unit); } +bool docId_comp(DocID x, DocID y) { return x < y; }; extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ CAMLparam2(tree,str); @@ -184,40 +197,42 @@ extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ std::vector results; results = XMLTREE(tree)->Contains(cstr); //free(cstr); - resarray = caml_alloc_tuple(results.size()); + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); - for (unsigned int i=0; i results; + results = XMLTREE(tree)->Contains(cstr); + CAMLreturn (Val_unit); +} + extern "C" CAMLprim value caml_xml_tree_root(value tree){ CAMLparam1(tree); - CAMLreturn (Val_int(TREENODEVAL(XMLTREE(tree)->Root()))); + CAMLreturn (Val_int(TREENODEVAL(XMLTREE_ROOT))); } extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){ CAMLparam1(tree); CAMLreturn((value) XMLTREE(tree)->getTextCollection()); } extern "C" CAMLprim value caml_xml_tree_parent(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id)))); + return(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id)))); + return(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id)))); -} - -extern "C" CAMLprim value caml_xml_tree_prev_doc(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int (XMLTREE(tree)->PrevNode((DocID) Int_val(id)))); + return (Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id)))); } extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) { @@ -225,68 +240,64 @@ extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2)))); } -extern "C" CAMLprim value caml_xml_tree_serialize(value tree, value filename){ - CAMLparam2(tree,filename); - NOT_IMPLEMENTED("caml_xml_tree_serialize"); - CAMLreturn(Val_unit); +extern "C" CAMLprim value caml_xml_tree_last_child(value tree, value id){ + return(Val_int (XMLTREE(tree)->LastChild(TREENODEVAL(id)))); } -extern "C" CAMLprim value caml_xml_tree_unserialize(value filename){ - CAMLparam1(filename); - NOT_IMPLEMENTED("caml_xml_tree_unserialize"); - CAMLreturn(Val_unit); +extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){ + return Val_bool (XMLTREE(tree)->IsFirstChild(TREENODEVAL(id))); +} +extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ + return(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); +} +extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){ + return(Val_int (XMLTREE(tree)->FirstElement(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_tagged_child(value tree, value id, value tag){ + return(Val_int (XMLTREE(tree)->TaggedChild(TREENODEVAL(id),Int_val(tag)))); +} -extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); +extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ + return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){ + return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){ + return(Val_int (XMLTREE(tree)->TaggedFollSibling(TREENODEVAL(id),Int_val(tag)))); } + extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id)))); + return(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){ - CAMLparam3(tree,id,tag); - CAMLreturn(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag)))); + return(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag)))); } extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){ - CAMLparam3(tree,id,tag); - CAMLreturn(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag)))); + return(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag)))); +} +extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, value tag,value root){ + return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); } -extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); -} -extern "C" CAMLprim value caml_xml_tree_prev_text(value tree, value id){ - CAMLparam2(tree,id); - CAMLlocal1(res); - CAMLreturn(Val_int((XMLTREE(tree)->PrevText(TREENODEVAL(id))))); - CAMLreturn(res); -} -extern "C" CAMLprim value caml_xml_tree_next_text(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int((XMLTREE(tree)->NextText(TREENODEVAL(id))))); -} extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); + return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); } extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id))))); + return(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id))))); } extern "C" CAMLprim value caml_xml_tree_node_xml_id(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id))))); + return(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id))))); } extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){ @@ -300,13 +311,11 @@ extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){ extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){ - CAMLparam2(tree,id); - CAMLreturn (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); + return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){ - CAMLparam3(tree,id,tag); - CAMLreturn (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag)))); + return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag)))); } @@ -320,105 +329,47 @@ extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ } extern "C" CAMLprim value caml_xml_tree_nullt(value unit){ - CAMLparam1(unit); - CAMLreturn (NULLT); + return (NULLT); } -extern "C" CAMLprim value caml_xml_tree_save(value tree,value filename){ - CAMLparam2(tree,filename); - XMLTREE(tree)->Save((unsigned char *) String_val(filename)); - CAMLreturn (Val_unit); +extern "C" CAMLprim value caml_unordered_set_length(value hset){ + CAMLparam1(hset); + CAMLreturn (Val_int((HSET(hset))->size())); } -extern "C" CAMLprim value caml_xml_tree_load(value filename,value samplerate){ - CAMLparam2(filename,samplerate); - CAMLlocal1(doc); - XMLTree * tree; - tree = XMLTree::Load((unsigned char *) String_val(filename),Int_val(samplerate)); - caml_init_ops(); - doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); - memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); - CAMLreturn(doc); +extern "C" CAMLprim value caml_unordered_set_alloc(value len){ + CAMLparam1(len); + CAMLlocal1(hset); + hset = caml_alloc_custom(&set_ops,sizeof(std::unordered_set*),1,2); + std::unordered_set* ht = new std::unordered_set(); + memcpy(Data_custom_val(hset),&ht,sizeof(std::unordered_set*)); + CAMLreturn (hset); } -extern "C" { - static int caml_empty_vector[] = { 0 }; +extern "C" CAMLprim value caml_unordered_set_set(value vec, value v){ + HSET(vec)->insert((int) Int_val(v)); + return (Val_unit); } -extern "C" CAMLprim value caml_int_vector_empty(value unit){ - CAMLparam1(unit); - CAMLreturn ((value) caml_empty_vector); +extern "C" CAMLprim value caml_xml_tree_select_desc(value tree, value node, value tags){ + return (Val_int (XMLTREE(tree)->SelectDesc(TREENODEVAL(node), + HSET(tags)))); } - -extern "C" CAMLprim value caml_int_vector_length(value vec){ - CAMLparam1(vec); - CAMLreturn (Val_int( ((int*) caml_empty_vector)[0] )); +extern "C" CAMLprim value caml_xml_tree_select_child(value tree, value node, value tags){ + return (Val_int (XMLTREE(tree)->SelectChild(TREENODEVAL(node), + HSET(tags)))); } -extern "C" CAMLprim value caml_int_vector_alloc(value len){ - CAMLparam1(len); - int * vec = (int *) malloc(sizeof(int)*(Int_val(len)+1)); - vec[0] = Int_val(len); - CAMLreturn ((value) vec); +extern "C" CAMLprim value caml_xml_tree_select_foll_sibling(value tree, value node, value tags){ + return (Val_int (XMLTREE(tree)->SelectFollSibling(TREENODEVAL(node), + HSET(tags)))); } - -extern "C" CAMLprim value caml_int_vector_set(value vec, value i, value v){ - CAMLparam3(vec,i,v); - - ((int*) vec)[Int_val(i)+1] = Int_val(v); - CAMLreturn (Val_unit); +extern "C" CAMLprim value caml_xml_tree_select_foll_below(value tree, value node, value tags,value ctx){ + return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), + HSET(tags), + TREENODEVAL(ctx)))); } -#define VECT(x) ((int*) (x)) -extern "C" CAMLprim value caml_xml_tree_tagged_below(value tree, value node, value ctags, value dtags){ - CAMLparam4(tree,node,ctags,dtags); - - CAMLreturn (Val_int ( - (XMLTREE(tree)->TaggedBelow(TREENODEVAL(node), - &(VECT(ctags)[1]), - VECT(ctags)[0], - &(VECT(dtags)[1]), - VECT(dtags)[0])))); -} - -extern "C" CAMLprim value caml_xml_tree_tagged_next(value tree, value node, value ctags, value ftags,value root){ - CAMLparam5(tree,node,ctags,ftags,root); - CAMLreturn (Val_int ( - (XMLTREE(tree)->TaggedNext(TREENODEVAL(node), - &(VECT(ctags)[1]), - VECT(ctags)[0], - &(VECT(ftags)[1]), - VECT(ftags)[0], - TREENODEVAL(root))))); -} - -extern "C" CAMLprim value caml_xml_tree_tagged_desc_only(value tree, value node,value dtags){ - CAMLparam3(tree,node,dtags); - - CAMLreturn (Val_int ( - (XMLTREE(tree)->TaggedDescOnly(TREENODEVAL(node), - &(VECT(dtags)[1]), - VECT(dtags)[0])))); -} - -extern "C" CAMLprim value caml_xml_tree_tagged_foll_only(value tree, value node, value ftags,value root){ - CAMLparam4(tree,node,ftags,root); - CAMLreturn (Val_int ( - (XMLTREE(tree)->TaggedFollOnly(TREENODEVAL(node), - &(VECT(ftags)[1]), - VECT(ftags)[0], - TREENODEVAL(root))))); -} - -extern "C" CAMLprim value caml_xml_tree_tagged_desc_or_foll_only(value tree, value node, value ftags,value root){ - CAMLparam4(tree,node,ftags,root); - CAMLreturn (Val_int ( - (XMLTREE(tree)->TaggedDescOrFollOnly(TREENODEVAL(node), - &(VECT(ftags)[1]), - VECT(ftags)[0], - TREENODEVAL(root))))); -} - extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ CAMLparam2(tree,node); CAMLlocal1(tuple);