/**************************************
* OCamlDriver.cpp
* -------------------
- * A Test Ocaml Driver which calls the C++ methods and
+ * An Ocaml Driver which calls the C++ methods and
* adds a C wrapper interface with OCaml code.
*
* Author: Kim Nguyen
* Date: 04/11/08
*/
-/* OCaml memory managment */
+/***
+ * Conventions:
+ * functions never doing any allocation (non caml_alloc*, caml_copy_string,...)
+ * have NOALLOC in the comment and their external declaration can have "noalloc"
+ */
+
+
+#include <unordered_set>
+#include <algorithm>
+#include "XMLDocShredder.h"
+#include "XMLTree.h"
+#include "Utils.h"
+
extern "C" {
+/* OCaml memory managment */
#include <caml/mlvalues.h>
#include <caml/alloc.h>
#include <caml/memory.h>
#include <caml/callback.h>
#include <caml/fail.h>
#include <caml/custom.h>
-
+#include "results.h"
+#include <stdio.h>
-} //extern C
-
-//#include "TextCollection/TextCollection.h"
-#include "XMLDocShredder.h"
-#include "XMLTree.h"
-#include "Utils.h"
-
-#define CAMLRAISECPP(e) (caml_failwith( ((e).what())))
+#define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) ))
#define NOT_IMPLEMENTED(s) (caml_failwith(s))
#define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x)))
+#define HSET(x) ((TagIdSet*)((* (TagIdSet**) Data_custom_val(x))))
#define TEXTCOLLECTION(x)
#define TREENODEVAL(i) ((treeNode) (Int_val(i)))
+#define TAGVAL(i) ((TagType) (Int_val(i)))
+#define XMLTREE_ROOT 0
+#define NoAlloc
-extern "C" {
+
static struct custom_operations ops;
- static bool initialized = false;
+ static struct custom_operations set_ops;
+ static value * cpp_exception = NULL;
+ static bool ops_initialized = false;
+
}
+
extern "C" void caml_xml_tree_finalize(value tree){
delete XMLTREE(tree);
return;
}
-extern "C" void caml_init_ops () {
+extern "C" void caml_hset_finalize(value hblock){
+ delete HSET(hblock);
+ return;
+}
- if (initialized)
- return;
+extern "C" value caml_init_lib (value unit) {
+ CAMLparam1(unit);
+ if (!ops_initialized){
+
+
ops.identifier = (char*) "XMLTree";
ops.finalize = caml_xml_tree_finalize;
- return;
+ set_ops.identifier = (char*) "unordered_set";
+ set_ops.finalize = caml_hset_finalize;
+
+ cpp_exception = caml_named_value("CPlusPlusError");
+ if (cpp_exception == NULL){
+ string s = "FATAL: Unregistered exception ";
+ s += "CPlusPlusError";
+ caml_failwith(s.c_str());
+ };
+
+ ops_initialized = true;
+
+ };
+ CAMLreturn(Val_unit);
+
}
-
-extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){
- CAMLparam1(uri);
+extern "C" value caml_shredder_parse(XMLDocShredder *shredder){
+ CAMLparam0();
CAMLlocal1(doc);
- char *fn = String_val(uri);
- try {
- XMLDocShredder shredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc));
XMLTree * tree;
- shredder.processStartDocument(fn);
- shredder.parse();
- shredder.processEndDocument();
- caml_init_ops();
+ shredder->processStartDocument("");
+ shredder->parse();
+ shredder->processEndDocument();
doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
- tree = (XMLTree *) shredder.storageIfc_->returnDocument();
+ tree = (XMLTree *) shredder->getXMLTree();
memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
CAMLreturn(doc);
- }
- catch (const std::exception& e){
- CAMLRAISECPP(e);
- };
}
-extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value iet, value dtc){
+extern "C" value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){
+ CAMLparam1(uri);
+ CAMLlocal1(doc);
+ char *fn = String_val(uri);
+ XMLDocShredder * shredder;
+ try {
+ shredder = new XMLDocShredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc));
+ doc = caml_shredder_parse(shredder);
+ delete shredder;
+ }
+ catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
+ catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
+ catch (char const * msg){ CAMLRAISEMSG(msg); };
+ CAMLreturn (doc);
+
+}
+extern "C" value caml_call_shredder_string(value data,value sf, value iet, value dtc){
CAMLparam1(data);
CAMLlocal1(doc);
- unsigned int ln = string_length(data);
+ XMLDocShredder * shredder;
+ unsigned int ln = caml_string_length(data);
unsigned char *fn = (unsigned char*) String_val(data);
-
try {
- XMLDocShredder shredder(fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc));
- XMLTree* tree;
- shredder.processStartDocument("");
- shredder.parse();
- shredder.processEndDocument();
- caml_init_ops();
+ shredder = new XMLDocShredder (fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc));
+ doc = caml_shredder_parse(shredder);
+ delete shredder;
+ }
+ catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
+ catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
+ catch (char const * msg){ CAMLRAISEMSG(msg); };
+ CAMLreturn(doc);
+}
+
+extern "C" value caml_xml_tree_save(value tree,value fd, value str){
+ CAMLparam3(tree,fd,str);
+ XMLTREE(tree)->Save(Int_val(fd), String_val(str));
+ CAMLreturn (Val_unit);
+}
+
+extern "C" value caml_xml_tree_load(value fd, value str, value load_tc,value sf){
+ CAMLparam4(fd,str,load_tc,sf);
+ CAMLlocal1(doc);
+ XMLTree * tree;
+ try {
+ tree = XMLTree::Load(Int_val(fd),String_val(str),Bool_val(load_tc),Int_val(sf));
+ printf("Pointer to tree is %p\n", (void*) tree);
doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
- tree = (XMLTree *) shredder.storageIfc_->returnDocument();
memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
CAMLreturn(doc);
}
- catch (const std::exception& e) {
- CAMLRAISECPP(e);
- };
+ catch (const xmlpp::internal_error& e){ CAMLRAISEMSG(e.what()); }
+ catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
+ catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
+ catch (char const * msg){ CAMLRAISEMSG(msg); };
}
-void traversal_rec(XMLTree* tree, treeNode id){
- DocID tid;
- if (id == NULLT)
- return;
- // int tag = tree->Tag(id);
- if (id) {
- tid = tree->PrevText(id);
- char * data = (char *) (tree->getTextCollection())->GetText(tid);
- if (tree->IsLeaf(id)){
- tid = tree->MyText(id);
-
- data = (char*) (tree->getTextCollection())->GetText(tid);
- };
-
- if (tree->NextSibling(id) == NULLT){
- tid = tree->NextText(id);
- data = (char*) (tree->getTextCollection())->GetText(tid);
- };
- };
- traversal_rec(tree,tree->FirstChild(id));
- traversal_rec(tree,tree->NextSibling(id));
- return;
-}
-
-extern "C" CAMLprim value caml_cpp_traversal(value tree){
- CAMLparam1(tree);
- traversal_rec(XMLTREE(tree),XMLTREE(tree)->Root());
- CAMLreturn(Val_unit);
-}
-extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){
+/**
+ * Interface to the TextCollection
+ */
+
+/**
+ * Utility functions
+ */
+
+extern "C" value caml_text_collection_get_text(value tree, value id){
CAMLparam2(tree,id);
CAMLlocal1(str);
uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id));
str = caml_copy_string((const char*)txt);
- delete (txt);
CAMLreturn (str);
}
-extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value id){
+
+extern "C" value caml_text_collection_empty_text(value tree,value id){
CAMLparam2(tree,id);
- CAMLlocal1(str);
- char* txt = (char*) XMLTREE(tree)->GetCachedText((DocID) Int_val(id));
- str = caml_copy_string(txt);
- free(txt);
- CAMLreturn (str);
+ CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
}
-extern "C" CAMLprim value caml_text_collection_size(value tree){
- CAMLparam1(tree);
- // CAMLreturn (Val_int( XMLTREE(tree)->CachedText.size()));
- NOT_IMPLEMENTED("text_collection_size");
- CAMLreturn (Val_unit);
+bool docId_comp(DocID x, DocID y) { return x < y; };
+
+/**
+ * Existential queries
+ */
+
+extern "C" value caml_text_collection_is_prefix(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_bool((int) XMLTREE(tree)->IsPrefix(cstr)));
}
+extern "C" value caml_text_collection_is_suffix(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_bool((int) XMLTREE(tree)->IsSuffix(cstr)));
+}
+extern "C" value caml_text_collection_is_equal(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_bool((int) XMLTREE(tree)->IsEqual(cstr)));
+}
+extern "C" value caml_text_collection_is_contains(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr)));
+}
+extern "C" value caml_text_collection_is_lessthan(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsLessThan(cstr)));
+}
-extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){
- CAMLparam2(tree,id);
- CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
+
+/**
+ * Count Queries
+ */
+
+/**
+ * Global counting
+ */
+extern "C" value caml_text_collection_count(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr))));
}
-extern "C" CAMLprim value caml_text_collection_is_contains(value tree,value str){
+extern "C" value caml_text_collection_count_prefix(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_int((XMLTREE(tree)->CountPrefix(cstr))));
+}
+
+extern "C" value caml_text_collection_count_suffix(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_int((XMLTREE(tree)->CountSuffix(cstr))));
+}
+
+extern "C" value caml_text_collection_count_equal(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_int((XMLTREE(tree)->CountEqual(cstr))));
+}
+
+extern "C" value caml_text_collection_count_contains(value tree,value str){
CAMLparam2(tree,str);
uchar * cstr = (uchar *) String_val(str);
- CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr)));
+ CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr))));
}
-extern "C" CAMLprim value caml_text_collection_count_contains(value tree,value str){
+extern "C" value caml_text_collection_count_lessthan(value tree,value str){
CAMLparam2(tree,str);
uchar * cstr = (uchar *) String_val(str);
- CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr))));
-
+ CAMLreturn (Val_int((XMLTREE(tree)->CountLessThan(cstr))));
}
-extern "C" CAMLprim value caml_text_collection_count(value tree,value str){
+
+static value sort_alloc_array(std::vector<DocID> results, value resarray){
+ std::sort(results.begin(), results.end(), docId_comp);
+ size_t s = results.size();
+ resarray = caml_alloc_tuple(s);
+ for (size_t i = 0; i < s ;i++){
+ caml_initialize(&Field(resarray,i),Val_int(results[i]));
+ };
+ return resarray;
+
+}
+
+/**
+ * Full reporting queries
+ */
+
+extern "C" value caml_text_collection_prefix(value tree,value str){
CAMLparam2(tree,str);
- //uchar * cstr = (uchar *) String_val(str);
- NOT_IMPLEMENTED("text_collection_count");
- CAMLreturn (Val_unit);
-
+ CAMLlocal1(resarray);
+ uchar * cstr = (uchar *) String_val(str);
+ std::vector<DocID> results = XMLTREE(tree)->Prefix(cstr);
+ CAMLreturn (sort_alloc_array(results,resarray));
+}
+
+extern "C" value caml_text_collection_suffix(value tree,value str){
+ CAMLparam2(tree,str);
+ CAMLlocal1(resarray);
+ uchar * cstr = (uchar *) String_val(str);
+ std::vector<DocID> results = XMLTREE(tree)->Suffix(cstr);
+ CAMLreturn (sort_alloc_array(results,resarray));
+}
+
+extern "C" value caml_text_collection_equals(value tree,value str){
+ CAMLparam2(tree,str);
+ CAMLlocal1(resarray);
+ uchar * cstr = (uchar *) strdup(String_val(str));
+ std::vector<DocID> results = XMLTREE(tree)->Equals(cstr);
+ free(cstr);
+ CAMLreturn (sort_alloc_array(results,resarray));
+}
+
+extern "C" value caml_text_collection_contains(value tree,value str){
+ CAMLparam2(tree,str);
+ CAMLlocal1(resarray);
+ uchar * cstr = (uchar *) String_val(str);
+ std::vector<DocID> results = XMLTREE(tree)->Contains(cstr);
+ CAMLreturn (sort_alloc_array(results,resarray));
}
-extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){
+extern "C" value caml_text_collection_lessthan(value tree,value str){
CAMLparam2(tree,str);
CAMLlocal1(resarray);
uchar * cstr = (uchar *) String_val(str);
- std::vector<DocID> results;
- results = XMLTREE(tree)->Contains(cstr);
- //free(cstr);
- resarray = caml_alloc_tuple(results.size());
+ std::vector<DocID> results = XMLTREE(tree)->LessThan(cstr);
+ CAMLreturn (sort_alloc_array(results,resarray));
+}
+
+/** Full reporting into a bit vector
+ */
+
+extern "C" value caml_text_collection_prefix_bv(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) strdup(String_val(str));
+ std::vector<DocID> results = XMLTREE(tree)->Prefix(cstr);
+ std::vector<bool> *bv = new std::vector<bool>(XMLTREE(tree)->Size(),false);
+ for (unsigned int i=0; i < results.size(); i++)
+ bv->at(XMLTREE(tree)->ParentNode(results[i]))=true;
+ free(cstr);
+ CAMLreturn ((value) bv);
+}
+
+extern "C" value caml_text_collection_suffix_bv(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) strdup(String_val(str));
+ std::vector<DocID> results = XMLTREE(tree)->Suffix(cstr);
+ std::vector<bool> *bv = new std::vector<bool>(XMLTREE(tree)->Size(),false);
+ for (unsigned int i=0; i < results.size(); i++)
+ bv->at(XMLTREE(tree)->ParentNode(results[i]))=true;
+ free(cstr);
+ CAMLreturn ((value) bv);
+}
- for (unsigned int i=0; i<results.size();i++){
- caml_initialize(&Field(resarray,i),Val_int(results[i]));
+extern "C" value caml_text_collection_equals_bv(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) strdup(String_val(str));
+ XMLTree* xt = XMLTREE(tree);
+ std::vector<DocID> results = xt->Equals(cstr);
+ std::vector<bool> *bv = new std::vector<bool>(xt->Size(),false);
+ for (unsigned int i=0; i < results.size(); i++)
+ bv->at(xt->Parent(xt->ParentNode(results[i])))=true;
+ free(cstr);
+ CAMLreturn ((value) bv);
+}
+
+
+extern "C" value caml_text_collection_contains_bv(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) strdup(String_val(str));
+ XMLTree* xt = XMLTREE(tree);
+ std::vector<DocID> results = xt->Contains(cstr);
+ std::vector<bool> *bv = new std::vector<bool>(xt->Size(),false);
+ for (unsigned int i=0; i < results.size(); i++){
+ bv->at(xt->Parent(xt->ParentNode(results[i])))=true;
+ }
+ free(cstr);
+ CAMLreturn ((value) bv);
+}
+
+extern "C" value caml_text_collection_contains_bv_update(value tree,value str,value vbv){
+ CAMLparam3(tree,str,vbv);
+ uchar * cstr = (uchar *) strdup(String_val(str));
+ XMLTree* xt = XMLTREE(tree);
+ std::vector<DocID> results = xt->Contains(cstr);
+ std::vector<bool> *bv = (std::vector<bool> *) vbv;
+ for (unsigned int i=0; i < results.size(); i++){
+ /** Hack for the Techfest demo */
+ (*bv)[xt->Parent(xt->Parent(xt->ParentNode(results[i])))]=true;
+ }
+ free(cstr);
+ CAMLreturn ((value) bv);
+}
+extern "C" value caml_text_collection_contains_bv_update_list(value tree,value str,value acc,value vbv,value count){
+ CAMLparam4(tree,str,acc,vbv);
+ CAMLlocal1(head);
+ uchar * cstr = (uchar *) strdup(String_val(str));
+ XMLTree* xt = XMLTREE(tree);
+ std::vector<DocID> results = xt->Contains(cstr);
+ std::vector<bool> *bv = (std::vector<bool> *) vbv;
+ treeNode idx;
+ int acc_count = Int_val(count);
+ for (unsigned int i=0; i < results.size(); i++){
+ idx = xt->Parent(xt->Parent(xt->ParentNode(results[i])));
+ if (!(*bv)[idx]) {
+ (*bv)[idx]=true;
+ head = caml_alloc_tuple(2);
+ caml_initialize(&Field(head,0),Val_int(idx));
+ caml_initialize(&Field(head,1),acc);
+ acc=head;
+ acc_count++;
+ };
};
- CAMLreturn (resarray);
+ free(cstr);
+ head = caml_alloc_tuple(3);
+ caml_initialize(&Field(head,0),acc);
+ caml_initialize(&Field(head,1),(value) bv);
+ caml_initialize(&Field(head,2),Val_int(acc_count));
+ CAMLreturn (head);
+}
+
+extern "C" value caml_text_collection_lessthan_bv(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) strdup(String_val(str));
+ std::vector<DocID> results = XMLTREE(tree)->LessThan(cstr);
+ std::vector<bool> *bv = new std::vector<bool>(XMLTREE(tree)->Size(),false);
+ for (unsigned int i=0; i < results.size(); i++)
+ bv->at(XMLTREE(tree)->ParentNode(results[i]))=true;
+ free(cstr);
+ CAMLreturn ((value) bv);
}
+/*************************************************************************/
-extern "C" CAMLprim value caml_xml_tree_root(value tree){
- CAMLparam1(tree);
- CAMLreturn (TREENODEVAL(XMLTREE(tree)->Root()));
+/**
+ * XMLTree bindings
+ * All of the functions here call the _unsafe version and implement the logics themselves
+ * (test for NULLT and so on). This avoids one indirection + one call when the tests fails.
+ */
+
+
+NoAlloc extern "C" value caml_xml_tree_root(value tree){
+ return (Val_int(XMLTREE_ROOT));
}
-extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){
- CAMLparam1(tree);
- CAMLreturn((value) XMLTREE(tree)->getTextCollection());
+
+NoAlloc extern "C" value caml_xml_tree_size(value tree){
+ return (Val_int(XMLTREE(tree)->Size()));
}
-extern "C" CAMLprim value caml_xml_tree_parent(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id))));
+
+NoAlloc extern "C" value caml_xml_tree_num_tags(value tree){
+ return (Val_int(XMLTREE(tree)->NumTags()));
}
-extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id))));
+
+NoAlloc extern "C" value caml_xml_tree_subtree_size(value tree, value node){
+ return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(node))));
}
-extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) {
- CAMLparam3(tree,id1,id2);
- CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2))));
+NoAlloc extern "C" value caml_xml_tree_subtree_tags(value tree, value node, value tag){
+ return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(node), TAGVAL(tag))));
}
-extern "C" CAMLprim value caml_xml_tree_serialize(value tree, value filename){
- CAMLparam2(tree,filename);
- NOT_IMPLEMENTED("caml_xml_tree_serialize");
- CAMLreturn(Val_unit);
+NoAlloc extern "C" value caml_xml_tree_subtree_elements(value tree, value node){
+ return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(node))));
}
-extern "C" CAMLprim value caml_xml_tree_unserialize(value filename){
- CAMLparam1(filename);
- NOT_IMPLEMENTED("caml_xml_tree_unserialize");
- CAMLreturn(Val_unit);
+NoAlloc extern "C" value caml_xml_tree_is_leaf(value tree, value node){
+ return (Val_bool(XMLTREE(tree)->IsLeaf(TREENODEVAL(node))));
}
+NoAlloc extern "C" value caml_xml_tree_is_ancestor(value tree, value node1,value node2){
+ return (Val_bool(XMLTREE(tree)->IsAncestor(TREENODEVAL(node1),TREENODEVAL(node2))));
+}
-extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id))));
+NoAlloc extern "C" value caml_xml_tree_is_child(value tree, value node1,value node2){
+ return (Val_bool(XMLTREE(tree)->IsChild(TREENODEVAL(node1),TREENODEVAL(node2))));
}
-extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id))));
+NoAlloc extern "C" value caml_xml_tree_is_first_child(value tree, value node){
+ return (Val_bool(XMLTREE(tree)->IsFirstChild(TREENODEVAL(node))));
}
-extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){
- CAMLparam3(tree,id,tag);
- CAMLreturn(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag))));
+NoAlloc extern "C" value caml_xml_tree_num_children(value tree, value node){
+ return (Val_int(XMLTREE(tree)->NumChildren(TREENODEVAL(node))));
}
-extern "C" CAMLprim value caml_xml_tree_tagged_next(value tree, value id, value tag){
- CAMLparam3(tree,id,tag);
- CAMLreturn(Val_int (XMLTREE(tree)->TaggedNext(TREENODEVAL(id),(TagType) Int_val(tag))));
+NoAlloc extern "C" value caml_xml_tree_child_number(value tree, value node){
+ return (Val_int(XMLTREE(tree)->ChildNumber(TREENODEVAL(node))));
}
+NoAlloc extern "C" value caml_xml_tree_depth(value tree, value node){
+ return (Val_int(XMLTREE(tree)->Depth(TREENODEVAL(node))));
+}
+NoAlloc extern "C" value caml_xml_tree_preorder(value tree, value node){
+ return (Val_int(XMLTREE(tree)->Preorder(TREENODEVAL(node))));
+}
+NoAlloc extern "C" value caml_xml_tree_postorder(value tree, value node){
+ return (Val_int(XMLTREE(tree)->Postorder(TREENODEVAL(node))));
+}
-extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){
- CAMLparam3(tree,id,tag);
- CAMLreturn(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag))));
+NoAlloc extern "C" value caml_xml_tree_tag(value tree, value node){
+ return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(node))));
}
+extern "C" value caml_xml_tree_doc_ids(value tree, value node){
+ CAMLparam2(tree,node);
+ CAMLlocal1(tuple);
+ range ids;
+ tuple = caml_alloc(2,0);
+ ids = XMLTREE(tree)->DocIds(Int_val(node));
+ Store_field(tuple,0,Val_int(ids.min));
+ Store_field(tuple,1,Val_int(ids.max));
+ CAMLreturn (tuple);
+}
-extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id))));
+NoAlloc extern "C" value caml_xml_tree_parent(value tree, value node){
+ return (Val_int(XMLTREE(tree)->Parent(TREENODEVAL(node))));
}
-extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id))));
+NoAlloc extern "C" value caml_xml_tree_child(value tree, value node,value idx){
+ return (Val_int(XMLTREE(tree)->Child(TREENODEVAL(node),Int_val(idx))));
}
-extern "C" CAMLprim value caml_xml_tree_prev_text(value tree, value id){
- CAMLparam2(tree,id);
+NoAlloc extern "C" value caml_xml_tree_first_child(value tree, value node){
+ return (Val_int(XMLTREE(tree)->FirstChild(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_first_element(value tree, value node){
+ return (Val_int(XMLTREE(tree)->FirstElement(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_last_child(value tree, value node){
+ return (Val_int(XMLTREE(tree)->LastChild(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_next_sibling(value tree, value node){
+ return (Val_int(XMLTREE(tree)->NextSibling(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_next_element(value tree, value node){
+ return (Val_int(XMLTREE(tree)->NextElement(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_prev_sibling(value tree, value node){
+ return (Val_int(XMLTREE(tree)->PrevSibling(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_tagged_child(value tree, value node,value tag){
+ return (Val_int(XMLTREE(tree)->TaggedChild(TREENODEVAL(node),TAGVAL(tag))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_select_child(value tree, value node,value tags){
+ return (Val_int(XMLTREE(tree)->SelectChild(TREENODEVAL(node), HSET(tags))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_tagged_following_sibling(value tree, value node,value tag){
+ return (Val_int(XMLTREE(tree)->TaggedFollowingSibling(TREENODEVAL(node),TAGVAL(tag))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_select_following_sibling(value tree, value node,value tags){
+ return (Val_int(XMLTREE(tree)->SelectFollowingSibling(TREENODEVAL(node), HSET(tags))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_tagged_descendant(value tree, value node, value tag){
+ return (Val_int(XMLTREE(tree)->TaggedDescendant(TREENODEVAL(node), TAGVAL(tag))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_select_descendant(value tree, value node, value tags){
+ return (Val_int(XMLTREE(tree)->SelectDescendant(TREENODEVAL(node), HSET(tags))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_tagged_preceding(value tree, value node, value tag){
+ return (Val_int(XMLTREE(tree)->TaggedPreceding(TREENODEVAL(node), TAGVAL(tag))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_tagged_following(value tree, value node, value tag){
+ return (Val_int(XMLTREE(tree)->TaggedFollowing(TREENODEVAL(node), TAGVAL(tag))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_tagged_following_below(value tree, value node, value tag, value ancestor){
+ return (Val_int(XMLTREE(tree)->TaggedFollowingBelow(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(ancestor))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_select_following_below(value tree, value node, value tags, value ancestor){
+ return (Val_int(XMLTREE(tree)->SelectFollowingBelow(TREENODEVAL(node), HSET(tags), TREENODEVAL(ancestor))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_tagged_following_before(value tree, value node, value tag, value closing){
+ return (Val_int(XMLTREE(tree)->TaggedFollowingBefore(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(closing))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_select_following_before(value tree, value node, value tags, value closing){
+ return (Val_int(XMLTREE(tree)->SelectFollowingBefore(TREENODEVAL(node), HSET(tags), TREENODEVAL(closing))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_tagged_ancestor(value tree, value node, value tag){
+ return (Val_int(XMLTREE(tree)->TaggedAncestor(TREENODEVAL(node), TAGVAL(tag))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_my_text(value tree, value node){
+ return (Val_int(XMLTREE(tree)->MyText(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_my_text_unsafe(value tree, value node){
+ return (Val_int(XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_text_xml_id(value tree, value docid){
+ return (Val_int(XMLTREE(tree)->TextXMLId(Int_val(docid))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_node_xml_id(value tree, value node){
+ return (Val_int(XMLTREE(tree)->NodeXMLId(TREENODEVAL(node))));
+}
+
+NoAlloc extern "C" value caml_xml_tree_parent_node(value tree, value docid){
+ return (Val_int(XMLTREE(tree)->ParentNode(Int_val(docid))));
+}
+/*
+NoAlloc extern "C" value caml_xml_tree_prev_node(value tree, value docid){
+ return (Val_int(XMLTREE(tree)->PrevNode(Int_val(docid))));
+}
+*/
+extern "C" value caml_xml_tree_get_tag_id(value tree, value tagname){
+ CAMLparam2(tree,tagname);
CAMLlocal1(res);
- CAMLreturn(Val_int((XMLTREE(tree)->PrevText(TREENODEVAL(id)))));
+ unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname));
+ res = Val_int(XMLTREE(tree)->GetTagId(ctagname));
+ free(ctagname);
CAMLreturn(res);
}
-extern "C" CAMLprim value caml_xml_tree_next_text(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int((XMLTREE(tree)->NextText(TREENODEVAL(id)))));
+
+extern "C" value caml_xml_tree_get_tag_name(value tree, value tag){
+ CAMLparam2(tree,tag);
+ CAMLlocal1(res);
+ res = caml_copy_string((const char*) XMLTREE(tree)->GetTagNameByRef(TAGVAL(tag)));
+ CAMLreturn(res);
}
-extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id)))));
+
+extern "C" value caml_xml_tree_register_tag(value tree, value tagname){
+ CAMLparam2(tree,tagname);
+ CAMLlocal1(res);
+ unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname));
+ res = Val_int(XMLTREE(tree)->RegisterTag(ctagname));
+ free(ctagname);
+ CAMLreturn(res);
}
-extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id)))));
+
+NoAlloc extern "C" value caml_xml_tree_get_text_collection(value tree){
+ return((value) XMLTREE(tree)->getTextCollection());
}
-extern "C" CAMLprim value caml_xml_tree_node_xml_id(value tree, value id){
- CAMLparam2(tree,id);
- CAMLreturn(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id)))));
+
+NoAlloc extern "C" value caml_xml_tree_closing(value tree, value node){
+ return (Val_int(XMLTREE(tree)->Closing(TREENODEVAL(node))));
}
-extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){
- CAMLparam2(tree,tagid);
- CAMLlocal1(str);
- char* tag;
- tag = (char*) XMLTREE(tree)->GetTagNameByRef((TagType) (Int_val(tagid)));
- str = caml_copy_string((const char*) tag);
- CAMLreturn (str);
+NoAlloc extern "C" value caml_xml_tree_is_open(value tree, value node){
+ return (Val_bool(XMLTREE(tree)->IsOpen(TREENODEVAL(node))));
}
-extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){
- CAMLparam2(tree,id);
- CAMLreturn (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id))));
+
+NoAlloc extern "C" value caml_xml_tree_nullt(value unit){
+ return (NULLT);
}
-extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){
- CAMLparam3(tree,id,tag);
- CAMLreturn (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag))));
+NoAlloc extern "C" value caml_unordered_set_length(value hset){
+ return (Val_int((HSET(hset))->size()));
}
+extern "C" value caml_unordered_set_alloc(value unit){
+ CAMLparam1(unit);
+ CAMLlocal1(hset);
+ hset = caml_alloc_custom(&set_ops,sizeof(TagIdSet*),1,2);
+ TagIdSet* ht = new TagIdSet();
+ memcpy(Data_custom_val(hset),&ht,sizeof(TagIdSet*));
+ CAMLreturn (hset);
+}
-extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){
- CAMLparam2(tree,str);
- CAMLlocal1(id);
- unsigned char* tag;
- tag = (unsigned char*) (String_val(str));
- id = Val_int(XMLTREE(tree)->RegisterTag(tag));
- CAMLreturn (id);
+NoAlloc extern "C" value caml_unordered_set_set(value set, value v){
+ HSET(set)->insert((int) Int_val(v));
+ return (Val_unit);
}
-extern "C" CAMLprim value caml_xml_tree_nullt(value unit){
- CAMLparam1(unit);
- CAMLreturn (NULLT);
+NoAlloc extern "C" value caml_result_set_create(value size){
+ results* res = (results*) malloc(sizeof(results));
+ results r = createResults (Int_val(size));
+ res->n = r.n;
+ res->lgn = r.lgn;
+ res->tree = r.tree;
+ return ((value) (res));
}
-extern "C" CAMLprim value caml_xml_tree_save(value tree,value filename){
- CAMLparam2(tree,filename);
- XMLTREE(tree)->Save((unsigned char *) String_val(filename));
- CAMLreturn (Val_unit);
+NoAlloc extern "C" value caml_result_set_set(value result,value p){
+ setResult ( *((results*) result), Int_val(p));
+ return (Val_unit);
}
-extern "C" CAMLprim value caml_xml_tree_load(value filename,value samplerate){
- CAMLparam2(filename,samplerate);
- CAMLlocal1(doc);
- XMLTree * tree;
- tree = XMLTree::Load((unsigned char *) String_val(filename),Int_val(samplerate));
- caml_init_ops();
- doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
- memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
- CAMLreturn(doc);
+NoAlloc extern "C" value caml_result_set_clear(value result,value p1,value p2){
+ clearRange ( *((results*) result), Int_val(p1), Int_val(p2));
+ return (Val_unit);
+}
+
+NoAlloc extern "C" value caml_result_set_next(value result,value p){
+ results r;
+ r = *( (results *) result);
+ return (Val_int(nextResult(r, Int_val(p))));
+}
+
+NoAlloc extern "C" value caml_result_set_count(value result){
+ results r;
+ r = *( (results *) result);
+ return (Val_int(countResult(r)));
+}
+
+NoAlloc extern "C" value caml_xml_tree_print(value tree,value node,value fd){
+ CAMLparam3(tree,node,fd);
+ XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node), false);
+ CAMLreturn(Val_unit);
+}
+
+NoAlloc extern "C" value caml_set_tag_bits(value result, value tag, value tree, value node)
+{
+ results r;
+ XMLTree *t = XMLTREE(Field(tree,0));
+ treeNode opening = TREENODEVAL(node);
+ treeNode closing = t->Closing(opening);
+ TagType target_tag = Int_val(tag);
+ treeNode first = t->TaggedDescendant(opening,target_tag);
+ r = *( (results *) result);
+ opening = first;
+ while (opening != NULLT){
+ setResult(r,opening);
+ opening = t->TaggedFollowingBefore(opening,target_tag,closing);
+ };
+ return(Val_int(first));
+}
+
+
+NoAlloc extern "C" value caml_bit_vector_create(value size){
+ return (value) (new vector<bool>(Int_val(size),false));
+}
+
+NoAlloc extern "C" value caml_bit_vector_free(value vect){
+ delete ((vector<bool>*) vect);
+ return Val_unit;
+}
+
+NoAlloc extern "C" value caml_bit_vector_get(value vect,value idx){
+ return Val_bool (((vector<bool>*)vect)->at(Int_val(idx)));
+}
+
+NoAlloc extern "C" value caml_bit_vector_set(value vect,value idx,value b){
+ (((vector<bool>*)vect)->at(Int_val(idx))) = (bool) Bool_val(b);
+ return Val_unit;
+}
+
+NoAlloc extern "C" value caml_bit_vector_next(value vect,value idx){
+ vector<bool>* bv = (vector<bool>*) vect;
+ int i = Int_val(idx);
+ int l = bv->size();
+ while (i < l && !((*bv)[i]))
+ i++;
+ return Val_int(i);
+}
+NoAlloc extern "C" value caml_bit_vector_prev(value vect,value idx){
+ int i = Int_val(idx);
+ while (i >= 0 && !((*((vector<bool>*) vect))[i]))
+ i--;
+ return Val_int(i);
+}
+
+extern "C" value caml_bit_vector_node_array(value vect){
+ CAMLparam0();
+ CAMLlocal1(res);
+ vector<bool>* bv = (vector<bool>*) vect;
+ vector<treeNode> vr;
+ int l = bv->size();
+ int i = 0;
+ while (i < l){
+ if ((*bv)[i]) vr.push_back(i);
+ i++;
+ };
+ l = vr.size();
+ res = caml_alloc_tuple(l);
+ for(i=0;i<l;i++)
+ caml_initialize(&Field(res,i),Val_int(vr[i]));
+ CAMLreturn (res);
+}
+
+
+int iterjump(XMLTree* tree, treeNode node, TagType tag, treeNode anc){
+ if (node == NULLT)
+ return 0;
+ else {
+ return
+ 1
+ + iterjump(tree,tree->TaggedDescendant(node,tag),tag,node)
+ + iterjump(tree,tree->TaggedFollowingBelow(node,tag,anc),tag,anc);
+ };
+}
+
+extern "C" value caml_benchmark_jump(value tree,value tag){
+ int count;
+ treeNode root = XMLTREE(tree)->FirstChild(0);
+ root = XMLTREE(tree)->FirstChild(root);
+ count = iterjump(XMLTREE(tree), root , Int_val(tag),0);
+ return Val_int(count);
+}
+
+int iterfcns(XMLTree* tree, treeNode node){
+ if (node == NULLT)
+ return 0;
+ else {
+ int tmp = 1;
+ tmp += iterfcns(tree,tree->FirstChild(node));
+ tmp += iterfcns(tree,tree->NextSibling(node));
+ return tmp;
+ };
+}
+
+int iterfene(XMLTree* tree, treeNode node){
+ if (node == NULLT)
+ return 0;
+ else {
+ int tmp = 1;
+ tmp += iterfene(tree,tree->FirstElement(node));
+ tmp += iterfene(tree,tree->NextElement(node));
+ return tmp;
+ };
+}
+
+extern "C" value caml_benchmark_fcns(value tree){
+ int i = iterfcns(XMLTREE(tree),0);
+ return Val_int(i);
+
+}
+
+extern "C" value caml_benchmark_fene(value tree){
+ int i = iterfene(XMLTREE(tree),0);
+ return Val_int(i);
+
+}
+
+int iterlcps(XMLTree* tree, treeNode node){
+ if (node == NULLT)
+ return 0;
+ else {
+ int x = tree->Tag(node);
+ x += iterlcps(tree,tree->LastChild(node));
+ x += iterlcps(tree,tree->PrevSibling(node));
+ return x;
+ };
+}
+
+int fulliterative(XMLTree* tree){
+ treeNode current = tree->Root();
+ treeNode next = NULLT;
+ int count = 1; //the root
+
+ do {
+
+
+ while ((next = tree->FirstChild(current)) != NULLT) {
+ current = next;
+ count++;
+ };
+
+ while ( (next = tree->NextSibling(current)) == NULLT){
+ current = tree->Parent(current);
+ if (current == NULLT) return count;
+ }
+ current = next;
+ count++;
+ } while (true);
+
+}
+
+extern "C" value caml_benchmark_iter(value tree){
+ return Val_int(fulliterative(XMLTREE(tree)));
+}
+
+extern "C" value caml_benchmark_lcps(value tree){
+ iterlcps(XMLTREE(tree),0);
+ return Val_unit;
+
+}
+
+extern "C" {
+
+ typedef struct dummy_node_ {
+ struct dummy_node_* first;
+ struct dummy_node_* next;
+ } dummy_node;
+
+
+ dummy_node * new_dummy_node () {
+
+ dummy_node * node = (dummy_node*) malloc(sizeof(dummy_node));
+ if (!node)
+ printf("%s","Cannot allocate memory\n");
+
+ return node;
+ }
+
+ void free_tree(dummy_node * node){
+ if (node){
+ free_tree(node->first);
+ free_tree(node->next);
+ free(node);
+ };
+ return;
+ }
+
+ dummy_node * create_tree(XMLTree* tree, treeNode i, int mode){
+ if (i == NULLT)
+ return NULL;
+ else {
+ dummy_node * f, *n, *r;
+ //mode = i % 3;
+ if (mode == 0) r = new_dummy_node();
+ f = create_tree(tree,tree->FirstChild(i), mode);
+ if (mode == 1) r = new_dummy_node();
+ n = create_tree(tree,tree->NextSibling(i), mode);
+ if (mode == 2) r = new_dummy_node();
+ r->first = f;
+ r->next = n;
+ return r;
+ };
+ }
+
+ int iter_tree(dummy_node * n){
+ if (n == NULL)
+ return 0;
+ else
+ return 1 + iter_tree (n->first) + iter_tree (n->next);
+ }
+}
+extern "C" value caml_build_pointers(value tree, value mode){
+ return ((value) create_tree(XMLTREE(Field(tree,0)),0, Int_val(mode)));
+}
+
+extern "C" value caml_iter_pointers (value node){
+ return Val_int(iter_tree((dummy_node*) node));
+
+}
+
+extern "C" value caml_free_pointers(value node){
+ free_tree((dummy_node*) node);
+ return Val_unit;
}