*/
/* OCaml memory managment */
+#include <unordered_set>
extern "C" {
#include <caml/mlvalues.h>
#include <caml/alloc.h>
#include <caml/callback.h>
#include <caml/fail.h>
#include <caml/custom.h>
-
-} //extern C
+
+} //extern C
+
//#include "TextCollection/TextCollection.h"
#include "XMLDocShredder.h"
#include "XMLTree.h"
#include "Utils.h"
-#define CAMLRAISECPP(e) (caml_failwith( ((e).what())))
+#define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) ))
#define NOT_IMPLEMENTED(s) (caml_failwith(s))
#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x)))
+#define HSET(x) ((std::unordered_set<int>*)((* (XMLTree**) Data_custom_val(x))))
#define TEXTCOLLECTION(x)
#define TREENODEVAL(i) ((treeNode) (Int_val(i)))
+#define XMLTREE_ROOT 0
+
+
extern "C" {
static struct custom_operations ops;
- static bool initialized = false;
+ static struct custom_operations set_ops;
+ static value * cpp_exception = NULL;
+ static bool ops_initialized = false;
+
}
extern "C" void caml_xml_tree_finalize(value tree){
delete XMLTREE(tree);
return;
}
+extern "C" void caml_hset_finalize(value hblock){
+ delete HSET(hblock);
+ return;
+}
-extern "C" void caml_init_ops () {
-
- if (initialized)
- return;
+extern "C" CAMLprim value caml_init_lib (value unit) {
+ CAMLparam1(unit);
+ if (!ops_initialized){
+
+
ops.identifier = (char*) "XMLTree";
ops.finalize = caml_xml_tree_finalize;
- return;
+ set_ops.identifier = (char*) "unordered_set";
+ set_ops.finalize = caml_hset_finalize;
+
+ cpp_exception = caml_named_value("CPlusPlusError");
+
+ ops_initialized = true;
+
+ };
+ CAMLreturn(Val_unit);
+
+}
+extern "C" CAMLprim value caml_shredder_parse(XMLDocShredder *shredder){
+ CAMLparam0();
+ CAMLlocal1(doc);
+ XMLTree * tree;
+ shredder->processStartDocument("");
+ shredder->parse();
+ shredder->processEndDocument();
+ doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
+ tree = (XMLTree *) shredder->getXMLTree();
+ memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
+ CAMLreturn(doc);
+
}
extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){
CAMLparam1(uri);
CAMLlocal1(doc);
char *fn = String_val(uri);
+ XMLDocShredder * shredder;
try {
- XMLDocShredder shredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc));
- XMLTree * tree;
- shredder.processStartDocument(fn);
- shredder.parse();
- shredder.processEndDocument();
- caml_init_ops();
- doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
- tree = (XMLTree *) shredder.storageIfc_->returnDocument();
- memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
- CAMLreturn(doc);
+ shredder = new XMLDocShredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc));
+ doc = caml_shredder_parse(shredder);
+ delete shredder;
}
- catch (const std::exception& e){
- CAMLRAISECPP(e);
- };
+ catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
+ catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
+ catch (char const * msg){ CAMLRAISEMSG(msg); };
+ CAMLreturn (doc);
}
-
extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value iet, value dtc){
CAMLparam1(data);
CAMLlocal1(doc);
+ XMLDocShredder * shredder;
unsigned int ln = string_length(data);
unsigned char *fn = (unsigned char*) String_val(data);
-
try {
- XMLDocShredder shredder(fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc));
- XMLTree* tree;
- shredder.processStartDocument("");
- shredder.parse();
- shredder.processEndDocument();
- caml_init_ops();
- doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
- tree = (XMLTree *) shredder.storageIfc_->returnDocument();
- memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
- CAMLreturn(doc);
+ shredder = new XMLDocShredder (fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc));
+ doc = caml_shredder_parse(shredder);
+ delete shredder;
}
- catch (const std::exception& e) {
- CAMLRAISECPP(e);
- };
+ catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
+ catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
+ catch (char const * msg){ CAMLRAISEMSG(msg); };
+ CAMLreturn(doc);
}
-void traversal_rec(XMLTree* tree, treeNode id){
- DocID tid;
- if (id == NULLT)
- return;
- // int tag = tree->Tag(id);
- if (id) {
- tid = tree->PrevText(id);
- char * data = (char *) (tree->getTextCollection())->GetText(tid);
- if (tree->IsLeaf(id)){
- tid = tree->MyText(id);
-
- data = (char*) (tree->getTextCollection())->GetText(tid);
- };
-
- if (tree->NextSibling(id) == NULLT){
- tid = tree->NextText(id);
- data = (char*) (tree->getTextCollection())->GetText(tid);
- };
- };
- traversal_rec(tree,tree->FirstChild(id));
- traversal_rec(tree,tree->NextSibling(id));
- return;
+extern "C" CAMLprim value caml_xml_tree_save(value tree,value fd){
+ CAMLparam2(tree,fd);
+ XMLTREE(tree)->Save(Int_val(fd));
+ CAMLreturn (Val_unit);
}
-extern "C" CAMLprim value caml_cpp_traversal(value tree){
- CAMLparam1(tree);
- traversal_rec(XMLTREE(tree),XMLTREE(tree)->Root());
- CAMLreturn(Val_unit);
+extern "C" CAMLprim value caml_xml_tree_load(value fd){
+ CAMLparam1(fd);
+ CAMLlocal1(doc);
+ XMLTree * tree;
+ try {
+ tree = XMLTree::Load(Int_val(fd));
+ doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
+ memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
+ CAMLreturn(doc);
+ }
+ catch (const xmlpp::internal_error& e){ CAMLRAISEMSG(e.what()); }
+ catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
+ catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
+ catch (char const * msg){ CAMLRAISEMSG(msg); };
}
+
+
extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){
- CAMLparam2(tree,id);
- const char* txt = (const char*) (XMLTREE(tree)->GetText((DocID) Int_val(id)));
- CAMLreturn (caml_copy_string(txt));
+ CAMLparam2(tree,id);
+ CAMLlocal1(str);
+ uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id));
+ str = caml_copy_string((const char*)txt);
+ delete (txt);
+ CAMLreturn (str);
+}
+
+extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value id){
+ CAMLparam2(tree,id);
+ CAMLlocal1(str);
+ char* txt = (char*) XMLTREE(tree)->GetCachedText((DocID) Int_val(id));
+ str = caml_copy_string(txt);
+ free(txt);
+ CAMLreturn (str);
}
+
+
extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){
CAMLparam2(tree,id);
CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
uchar * cstr = (uchar *) String_val(str);
CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr))));
+}
+extern "C" CAMLprim value caml_text_collection_count(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr))));
+ CAMLreturn (Val_unit);
+
}
extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){
uchar * cstr = (uchar *) String_val(str);
std::vector<DocID> results;
results = XMLTREE(tree)->Contains(cstr);
-
+ //free(cstr);
resarray = caml_alloc_tuple(results.size());
for (unsigned int i=0; i<results.size();i++){
};
CAMLreturn (resarray);
}
+extern "C" CAMLprim value caml_text_collection_unsorted_contains(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ std::vector<DocID> results;
+ results = XMLTREE(tree)->Contains(cstr);
+ CAMLreturn (Val_unit);
+}
extern "C" CAMLprim value caml_xml_tree_root(value tree){
CAMLparam1(tree);
- CAMLreturn (TREENODEVAL(XMLTREE(tree)->Root()));
+ CAMLreturn (Val_int(TREENODEVAL(XMLTREE_ROOT)));
}
extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){
CAMLparam1(tree);
CAMLreturn((value) XMLTREE(tree)->getTextCollection());
}
extern "C" CAMLprim value caml_xml_tree_parent(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id))));
+ return(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id))));
}
+extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){
+ return(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id))));
+}
+
extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->ParentNode(TREENODEVAL(id))));
+ return (Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id))));
}
+
extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) {
CAMLparam3(tree,id1,id2);
CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2))));
}
-extern "C" CAMLprim value caml_xml_tree_serialize(value tree, value filename){
- CAMLparam2(tree,filename);
- NOT_IMPLEMENTED("caml_xml_tree_serialize");
- CAMLreturn(Val_unit);
+extern "C" CAMLprim value caml_xml_tree_last_child(value tree, value id){
+ return(Val_int (XMLTREE(tree)->LastChild(TREENODEVAL(id))));
}
-extern "C" CAMLprim value caml_xml_tree_unserialize(value filename){
- CAMLparam1(filename);
- NOT_IMPLEMENTED("caml_xml_tree_unserialize");
- CAMLreturn(Val_unit);
+extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){
+ return Val_bool (XMLTREE(tree)->IsFirstChild(TREENODEVAL(id)));
+}
+extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){
+ return(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id))));
+}
+extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){
+ return(Val_int (XMLTREE(tree)->FirstElement(TREENODEVAL(id))));
}
+extern "C" CAMLprim value caml_xml_tree_tagged_child(value tree, value id, value tag){
+ return(Val_int (XMLTREE(tree)->TaggedChild(TREENODEVAL(id),Int_val(tag))));
+}
-extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id))));
+extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){
+ return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){
+ return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){
+ return(Val_int (XMLTREE(tree)->TaggedFollSibling(TREENODEVAL(id),Int_val(tag))));
}
+
extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id))));
+ return(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id))));
}
-extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id))));
+extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){
+ return(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag))));
}
-extern "C" CAMLprim value caml_xml_tree_prev_text(value tree, value id){
- CAMLparam2(tree,id);
- CAMLlocal1(res);
- CAMLreturn(Val_int((XMLTREE(tree)->PrevText(TREENODEVAL(id)))));
- CAMLreturn(res);
+
+extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){
+ return(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag))));
}
-extern "C" CAMLprim value caml_xml_tree_next_text(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int((XMLTREE(tree)->NextText(TREENODEVAL(id)))));
+extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, value tag,value root){
+ return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root))));
}
+
+
+
extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id)))));
+ return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id)))));
}
extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id)))));
+ return(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id)))));
}
extern "C" CAMLprim value caml_xml_tree_node_xml_id(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id)))));
-}
-extern "C" CAMLprim value caml_xml_tree_tag(value tree, value id){
- CAMLparam2(tree,id);
- const char* tag;
- tag =(const char*) XMLTREE(tree)->GetTagName(XMLTREE(tree)->Tag(TREENODEVAL(id)));
-
- CAMLreturn (caml_copy_string(tag));
+ return(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id)))));
}
extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){
CAMLparam2(tree,tagid);
- const char* tag;
- tag = (const char*) XMLTREE(tree)->GetTagName((TagType) (Int_val(tagid)));
-
- CAMLreturn (caml_copy_string(tag));
+ CAMLlocal1(str);
+ char* tag;
+ tag = (char*) XMLTREE(tree)->GetTagNameByRef((TagType) (Int_val(tagid)));
+ str = caml_copy_string((const char*) tag);
+ CAMLreturn (str);
}
extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){
- CAMLparam2(tree,id);
- CAMLreturn (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id))));
+ return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id))));
}
+extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){
+ return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag))));
+}
+
+
extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){
CAMLparam2(tree,str);
CAMLlocal1(id);
}
extern "C" CAMLprim value caml_xml_tree_nullt(value unit){
- CAMLparam1(unit);
- CAMLreturn (NULLT);
+ return (NULLT);
+}
+
+extern "C" CAMLprim value caml_unordered_set_length(value hset){
+ CAMLparam1(hset);
+ CAMLreturn (Val_int((HSET(hset))->size()));
+}
+
+extern "C" CAMLprim value caml_unordered_set_alloc(value len){
+ CAMLparam1(len);
+ CAMLlocal1(hset);
+ hset = caml_alloc_custom(&set_ops,sizeof(std::unordered_set<int>*),1,2);
+ std::unordered_set<int>* ht = new std::unordered_set<int>();
+ memcpy(Data_custom_val(hset),&ht,sizeof(std::unordered_set<int>*));
+ CAMLreturn (hset);
+}
+
+extern "C" CAMLprim value caml_unordered_set_set(value vec, value v){
+ HSET(vec)->insert((int) Int_val(v));
+ return (Val_unit);
+}
+
+extern "C" CAMLprim value caml_xml_tree_select_desc(value tree, value node, value tags){
+ return (Val_int (XMLTREE(tree)->SelectDesc(TREENODEVAL(node),
+ HSET(tags))));
+}
+extern "C" CAMLprim value caml_xml_tree_select_child(value tree, value node, value tags){
+ return (Val_int (XMLTREE(tree)->SelectChild(TREENODEVAL(node),
+ HSET(tags))));
+}
+extern "C" CAMLprim value caml_xml_tree_select_foll_sibling(value tree, value node, value tags){
+ return (Val_int (XMLTREE(tree)->SelectFollSibling(TREENODEVAL(node),
+ HSET(tags))));
+}
+extern "C" CAMLprim value caml_xml_tree_select_foll_below(value tree, value node, value tags,value ctx){
+ return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node),
+ HSET(tags),
+ TREENODEVAL(ctx))));
+}
+
+
+extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){
+ CAMLparam2(tree,node);
+ CAMLlocal1(tuple);
+ tuple = caml_alloc_tuple(2);
+ range r = (XMLTREE(tree)->DocIds(TREENODEVAL(node)));
+ caml_initialize(&Field(tuple,0),Val_int(r.min));
+ caml_initialize(&Field(tuple,1),Val_int(r.max));
+ CAMLreturn (tuple);
}