X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=OCamlDriver.cpp;h=35d03b87a9045a67a8de56330d716ed1ff8bab8a;hb=cf6d366b25132eea7b0f1966c11d034d748af0fa;hp=b0e57d94b1b9a4c4baf7b876078f6e4b3894abeb;hpb=c8f4fdfb1408aee5e3a4b402ff1ead9c3cdd9a4c;p=SXSI%2Fxpathcomp.git diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index b0e57d9..35d03b8 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -1,133 +1,516 @@ /************************************** * OCamlDriver.cpp * ------------------- - * A Test Ocaml Driver which calls the C++ methods and + * An Ocaml Driver which calls the C++ methods and * adds a C wrapper interface with OCaml code. * * Author: Kim Nguyen * Date: 04/11/08 */ -/* OCaml memory managment */ + + +#include +#include +#include "XMLDocShredder.h" +#include "XMLTree.h" +#include "Utils.h" + extern "C" { +/* OCaml memory managment */ #include #include #include #include #include -} //extern C - -#include "XMLDocShredder.h" -#include "XMLTree.h" -#include "TextCollection/TextCollection.h" -#include "Utils.h" +#include +#include "results.h" +#include -#define CAMLRAISECPP(e) (caml_failwith( ((e).what()))) +#define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) #define NOT_IMPLEMENTED(s) (caml_failwith(s)) -#define XMLTREE(x) ((XMLTree *)(x)) +#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) +#define HSET(x) ((std::unordered_set*)((* (std::unordered_set**) Data_custom_val(x)))) #define TEXTCOLLECTION(x) #define TREENODEVAL(i) ((treeNode) (Int_val(i))) +#define XMLTREE_ROOT 0 + + static struct custom_operations ops; + static struct custom_operations set_ops; + static value * cpp_exception = NULL; + static bool ops_initialized = false; + +} + +extern "C" void caml_xml_tree_finalize(value tree){ + delete XMLTREE(tree); + return; +} -extern "C" CAMLprim value caml_call_shredder_uri(value uri){ +extern "C" void caml_hset_finalize(value hblock){ + delete HSET(hblock); + return; +} + +extern "C" CAMLprim value caml_init_lib (value unit) { + CAMLparam1(unit); + if (!ops_initialized){ + + + ops.identifier = (char*) "XMLTree"; + ops.finalize = caml_xml_tree_finalize; + set_ops.identifier = (char*) "unordered_set"; + set_ops.finalize = caml_hset_finalize; + + cpp_exception = caml_named_value("CPlusPlusError"); + if (cpp_exception == NULL){ + string s = "FATAL: Unregistered exception "; + s += "CPlusPlusError"; + caml_failwith(s.c_str()); + }; + + ops_initialized = true; + + }; + CAMLreturn(Val_unit); + +} +extern "C" CAMLprim value caml_shredder_parse(XMLDocShredder *shredder){ + CAMLparam0(); + CAMLlocal1(doc); + XMLTree * tree; + shredder->processStartDocument(""); + shredder->parse(); + shredder->processEndDocument(); + doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); + tree = (XMLTree *) shredder->getXMLTree(); + memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); + CAMLreturn(doc); + +} + +extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){ CAMLparam1(uri); CAMLlocal1(doc); char *fn = String_val(uri); + XMLDocShredder * shredder; try { - XMLDocShredder shredder(fn); - shredder.processStartDocument(fn); - shredder.parse(); - shredder.processEndDocument(); - doc = (value) shredder.storageIfc_->returnDocument(); - - CAMLreturn(doc); + shredder = new XMLDocShredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc)); + doc = caml_shredder_parse(shredder); + delete shredder; } - catch (const std::exception& e){ - CAMLRAISECPP(e); - }; + catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } + catch (string msg){ CAMLRAISEMSG(msg.c_str()); } + catch (char const * msg){ CAMLRAISEMSG(msg); }; + CAMLreturn (doc); } - -extern "C" CAMLprim value caml_call_shredder_string(value data){ +extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value iet, value dtc){ CAMLparam1(data); CAMLlocal1(doc); + XMLDocShredder * shredder; unsigned int ln = string_length(data); unsigned char *fn = (unsigned char*) String_val(data); - try { - XMLDocShredder shredder(fn,ln); - shredder.processStartDocument(""); - shredder.parse(); - shredder.processEndDocument(); - doc = (value) shredder.storageIfc_->returnDocument(); - + shredder = new XMLDocShredder (fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc)); + doc = caml_shredder_parse(shredder); + delete shredder; + } + catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } + catch (string msg){ CAMLRAISEMSG(msg.c_str()); } + catch (char const * msg){ CAMLRAISEMSG(msg); }; + CAMLreturn(doc); +} + +extern "C" CAMLprim value caml_xml_tree_save(value tree,value fd){ + CAMLparam2(tree,fd); + XMLTREE(tree)->Save(Int_val(fd)); + CAMLreturn (Val_unit); +} + +extern "C" CAMLprim value caml_xml_tree_load(value fd, value load_tc,value sf){ + CAMLparam3(fd,load_tc,sf); + CAMLlocal1(doc); + XMLTree * tree; + try { + tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf)); + doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); + memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); CAMLreturn(doc); } - catch (const std::exception& e) { - CAMLRAISECPP(e); - }; + catch (const xmlpp::internal_error& e){ CAMLRAISEMSG(e.what()); } + catch (const std::exception& e){ CAMLRAISEMSG(e.what()); } + catch (string msg){ CAMLRAISEMSG(msg.c_str()); } + catch (char const * msg){ CAMLRAISEMSG(msg); }; +} + +extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ + CAMLparam2(tree,id); + CAMLlocal1(str); + uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id)); + str = caml_copy_string((const char*)txt); + CAMLreturn (str); +} + +extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value id){ + CAMLparam2(tree,id); + CAMLlocal1(str); + char* txt = (char*) XMLTREE(tree)->GetText((DocID) Int_val(id)); + str = caml_copy_string(txt); + CAMLreturn (str); +} + + +extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){ + CAMLparam2(tree,id); + CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id)))); } -extern "C" CAMLprim value caml_text_collection_get_text(value tc, value id){ - CAMLparam2(tc,id); +extern "C" CAMLprim value caml_text_collection_is_contains(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr))); +} + +extern "C" CAMLprim value caml_text_collection_count_contains(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr)))); + +} +extern "C" CAMLprim value caml_text_collection_count(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr)))); + CAMLreturn (Val_unit); - const char* txt = (const char*) ((TextCollection*) tc)->GetText((DocID) Int_val(id)); - CAMLreturn (caml_copy_string(txt)); } +bool docId_comp(DocID x, DocID y) { return x < y; }; + + +extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results; + results = XMLTREE(tree)->Contains(cstr); + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); +} + +extern "C" CAMLprim value caml_text_collection_equals(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results; + results = XMLTREE(tree)->Equal(cstr); + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); +} +extern "C" CAMLprim value caml_text_collection_startswith(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results; + results = XMLTREE(tree)->Prefix(cstr); + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); +} +extern "C" CAMLprim value caml_text_collection_endswith(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results; + results = XMLTREE(tree)->Suffix(cstr); + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); +} + + + +extern "C" CAMLprim value caml_text_collection_unsorted_contains(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results; + results = XMLTREE(tree)->Contains(cstr); + resarray = caml_alloc_tuple(results.size()); + for (size_t i = 0; i < results.size() ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + CAMLreturn (resarray); +} + extern "C" CAMLprim value caml_xml_tree_root(value tree){ CAMLparam1(tree); - CAMLreturn (TREENODEVAL(XMLTREE(tree)->Root())); + CAMLreturn (Val_int(TREENODEVAL(XMLTREE_ROOT))); } extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){ CAMLparam1(tree); - CAMLreturn((value) XMLTREE(tree)->GetTextCollection()); + CAMLreturn((value) XMLTREE(tree)->getTextCollection()); +} +extern "C" CAMLprim value caml_xml_tree_parent(value tree, value id){ + return(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id)))); +} +extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){ + return(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){ + return (Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) { + CAMLparam3(tree,id1,id2); + CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2)))); } +extern "C" CAMLprim value caml_xml_tree_last_child(value tree, value id){ + return(Val_int (XMLTREE(tree)->LastChild(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){ + return Val_bool (XMLTREE(tree)->IsFirstChild(TREENODEVAL(id))); +} extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ - CAMLparam2(tree,id); - CAMLlocal1(res); - CAMLreturn(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); + return(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); +} +extern "C" CAMLprim value caml_xml_tree_closing(value tree, value id){ + return(Val_int (XMLTREE(tree)->Closing(TREENODEVAL(id)))); } +extern "C" CAMLprim value caml_xml_tree_is_open(value tree, value id){ + return(Val_bool (XMLTREE(tree)->IsOpen(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){ + return(Val_int (XMLTREE(Field(tree,0))->FirstElement(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_child(value tree, value id, value tag){ + return(Val_int (XMLTREE(tree)->TaggedChild(TREENODEVAL(id),Int_val(tag)))); +} + extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); + return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); } -extern "C" CAMLprim value caml_xml_tree_prev_text(value tree, value id){ - CAMLparam2(tree,id); - CAMLlocal1(res); - CAMLreturn(Val_int((XMLTREE(tree)->PrevText(TREENODEVAL(id))))); - CAMLreturn(res); +extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){ + return(Val_int (XMLTREE(Field(tree,0))->NextElement(TREENODEVAL(id)))); } -extern "C" CAMLprim value caml_xml_tree_next_text(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int((XMLTREE(tree)->NextText(TREENODEVAL(id))))); + +extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){ + return(Val_int (XMLTREE(tree)->TaggedFollSibling(TREENODEVAL(id),Int_val(tag)))); } + + +extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){ + return(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){ + return(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag)))); +} + + +extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){ + return(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag)))); +} +extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, value tag,value root){ + return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); +} +extern "C" CAMLprim value caml_xml_tree_tagged_foll_before(value tree, value id, value tag,value root){ + return(Val_int (XMLTREE(tree)->TaggedFollBefore(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); +} + extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){ - CAMLparam2(tree,id); - CAMLreturn(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); + return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); +} + +extern "C" CAMLprim value caml_xml_tree_my_text_unsafe(value tree, value id){ + return(Val_int((XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(id))))); } extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){ - CAMLparam2(tree,id); + return(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id))))); +} +extern "C" CAMLprim value caml_xml_tree_node_xml_id(value tree, value id){ + return(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id))))); +} + +extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){ + CAMLparam2(tree,tagid); + CAMLlocal1(str); + char* tag; + tag = (char*) XMLTREE(tree)->GetTagNameByRef((TagType) (Int_val(tagid))); + str = caml_copy_string((const char*) tag); + CAMLreturn (str); +} - NOT_IMPLEMENTED("caml_xml_tree_text_xml_id"); + +extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){ + return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){ + return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag)))); +} + +extern "C" CAMLprim value caml_xml_tree_subtree_size(value tree,value id){ + return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(id)))); +} + +extern "C" CAMLprim value caml_xml_tree_subtree_elements(value tree,value id){ + return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(id)))); +} + + +extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(id); + unsigned char* tag; + tag = (unsigned char*) (String_val(str)); + id = Val_int(XMLTREE(tree)->RegisterTag(tag)); + CAMLreturn (id); +} + +extern "C" CAMLprim value caml_xml_tree_nullt(value unit){ + return (NULLT); +} + +extern "C" CAMLprim value caml_unordered_set_length(value hset){ + CAMLparam1(hset); + CAMLreturn (Val_int((HSET(hset))->size())); +} + +extern "C" CAMLprim value caml_unordered_set_alloc(value len){ + CAMLparam1(len); + CAMLlocal1(hset); + hset = caml_alloc_custom(&set_ops,sizeof(std::unordered_set*),1,2); + std::unordered_set* ht = new std::unordered_set(); + memcpy(Data_custom_val(hset),&ht,sizeof(std::unordered_set*)); + CAMLreturn (hset); +} + +extern "C" CAMLprim value caml_unordered_set_set(value vec, value v){ + HSET(vec)->insert((int) Int_val(v)); + return (Val_unit); +} + +extern "C" CAMLprim value caml_xml_tree_select_desc(value tree, value node, value tags){ + return (Val_int (XMLTREE(tree)->SelectDesc(TREENODEVAL(node), + HSET(tags)))); +} +extern "C" CAMLprim value caml_xml_tree_select_child(value tree, value node, value tags){ + return (Val_int (XMLTREE(tree)->SelectChild(TREENODEVAL(node), + HSET(tags)))); +} +extern "C" CAMLprim value caml_xml_tree_select_foll_sibling(value tree, value node, value tags){ + return (Val_int (XMLTREE(tree)->SelectFollSibling(TREENODEVAL(node), + HSET(tags)))); +} +extern "C" CAMLprim value caml_xml_tree_select_foll_below(value tree, value node, value tags,value ctx){ + return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), + HSET(tags), + TREENODEVAL(ctx)))); +} +extern "C" CAMLprim value caml_xml_tree_select_foll_before(value tree, value node, value tags,value ctx){ + return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), + HSET(tags), + TREENODEVAL(ctx)))); +} + + +extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ + CAMLparam2(tree,node); + CAMLlocal1(tuple); + tuple = caml_alloc_tuple(2); + range r = (XMLTREE(tree)->DocIds(TREENODEVAL(node))); + caml_initialize(&Field(tuple,0),Val_int(r.min)); + caml_initialize(&Field(tuple,1),Val_int(r.max)); + CAMLreturn (tuple); +} + +extern "C" value caml_result_set_create(value size){ + results* res = (results*) malloc(sizeof(results)); + results r = createResults (Int_val(size)); + res->n = r.n; + res->lgn = r.lgn; + res->tree = r.tree; + return ((value) (res)); +} + +extern "C" CAMLprim value caml_result_set_set(value result,value p){ + CAMLparam1(p); + setResult ( *((results*) result), Int_val(p)); CAMLreturn (Val_unit); } -extern "C" CAMLprim value caml_xml_tree_node_xml_id(value tree, value id){ - CAMLparam2(tree,id); - NOT_IMPLEMENTED("caml_xml_tree_node_xml_id"); +extern "C" CAMLprim value caml_result_set_clear(value result,value p1,value p2){ + CAMLparam2(p1,p2); + clearRange ( *((results*) result), Int_val(p1), Int_val(p2)); CAMLreturn (Val_unit); } -extern "C" CAMLprim value caml_xml_tree_tag(value tree, value id){ - CAMLparam2(tree,id); - const char* tag; - tag =(const char*) XMLTREE(tree)->GetTagName(XMLTREE(tree)->Tag(TREENODEVAL(id))); - CAMLreturn (caml_copy_string(tag)); +extern "C" CAMLprim value caml_result_set_next(value result,value p){ + CAMLparam1(p); + results r; + r = *( (results *) result); + CAMLreturn (Val_int(nextResult(r, Int_val(p)))); } -extern "C" CAMLprim value caml_xml_tree_nullt(value unit){ - CAMLparam1(unit); - CAMLreturn (NULLT); + +extern "C" CAMLprim value caml_result_set_count(value result){ + CAMLparam0(); + results r; + r = *( (results *) result); + CAMLreturn (Val_int(countResult(r))); +} + +extern "C" CAMLprim value caml_xml_tree_print(value tree,value node,value fd){ + CAMLparam3(tree,node,fd); + XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node)); + CAMLreturn(Val_unit); } + +extern "C" CAMLprim value caml_set_tag_bits(value result, value tag, value tree, value node) +{ + CAMLparam3(tag,tree,node); + results r; + XMLTree *t = XMLTREE(Field(tree,0)); + treeNode opening = TREENODEVAL(node); + treeNode closing = t->Closing(opening); + TagType target_tag = Int_val(tag); + treeNode first = t->TaggedDesc(opening,target_tag); + r = *( (results *) result); + opening = first; + while (opening != NULLT){ + setResult(r,opening); + opening = t->TaggedFollBefore(opening,target_tag,closing); + }; + CAMLreturn(Val_int(first)); +} +