1 /**************************************
4 * An Ocaml Driver which calls the C++ methods and
5 * adds a C wrapper interface with OCaml code.
13 #include <unordered_set>
15 #include "XMLDocShredder.h"
20 /* OCaml memory managment */
21 #include <caml/mlvalues.h>
22 #include <caml/alloc.h>
23 #include <caml/memory.h>
24 #include <caml/callback.h>
25 #include <caml/fail.h>
26 #include <caml/custom.h>
30 #define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) ))
31 #define NOT_IMPLEMENTED(s) (caml_failwith(s))
32 #define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x)))
33 #define HSET(x) ((std::unordered_set<int>*)((* (std::unordered_set<int>**) Data_custom_val(x))))
34 #define TEXTCOLLECTION(x)
35 #define TREENODEVAL(i) ((treeNode) (Int_val(i)))
36 #define XMLTREE_ROOT 0
38 static struct custom_operations ops;
39 static struct custom_operations set_ops;
40 static value * cpp_exception = NULL;
41 static bool ops_initialized = false;
45 extern "C" void caml_xml_tree_finalize(value tree){
50 extern "C" void caml_hset_finalize(value hblock){
55 extern "C" CAMLprim value caml_init_lib (value unit) {
57 if (!ops_initialized){
60 ops.identifier = (char*) "XMLTree";
61 ops.finalize = caml_xml_tree_finalize;
62 set_ops.identifier = (char*) "unordered_set";
63 set_ops.finalize = caml_hset_finalize;
65 cpp_exception = caml_named_value("CPlusPlusError");
66 if (cpp_exception == NULL){
67 string s = "FATAL: Unregistered exception ";
68 s += "CPlusPlusError";
69 caml_failwith(s.c_str());
72 ops_initialized = true;
78 extern "C" CAMLprim value caml_shredder_parse(XMLDocShredder *shredder){
82 shredder->processStartDocument("");
84 shredder->processEndDocument();
85 doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
86 tree = (XMLTree *) shredder->getXMLTree();
87 memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
92 extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){
95 char *fn = String_val(uri);
96 XMLDocShredder * shredder;
98 shredder = new XMLDocShredder(fn,Int_val(sf),Bool_val(iet),Bool_val(dtc));
99 doc = caml_shredder_parse(shredder);
102 catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
103 catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
104 catch (char const * msg){ CAMLRAISEMSG(msg); };
108 extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value iet, value dtc){
111 XMLDocShredder * shredder;
112 unsigned int ln = string_length(data);
113 unsigned char *fn = (unsigned char*) String_val(data);
115 shredder = new XMLDocShredder (fn,ln,Int_val(sf),Bool_val(iet),Bool_val(dtc));
116 doc = caml_shredder_parse(shredder);
119 catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
120 catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
121 catch (char const * msg){ CAMLRAISEMSG(msg); };
125 extern "C" CAMLprim value caml_xml_tree_save(value tree,value fd){
127 XMLTREE(tree)->Save(Int_val(fd));
128 CAMLreturn (Val_unit);
131 extern "C" CAMLprim value caml_xml_tree_load(value fd, value load_tc,value sf){
132 CAMLparam3(fd,load_tc,sf);
136 tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf));
137 doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2);
138 memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
141 catch (const xmlpp::internal_error& e){ CAMLRAISEMSG(e.what()); }
142 catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
143 catch (string msg){ CAMLRAISEMSG(msg.c_str()); }
144 catch (char const * msg){ CAMLRAISEMSG(msg); };
147 extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){
150 uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id));
151 str = caml_copy_string((const char*)txt);
155 extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value id){
158 char* txt = (char*) XMLTREE(tree)->GetText((DocID) Int_val(id));
159 str = caml_copy_string(txt);
164 extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){
166 CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
169 extern "C" CAMLprim value caml_text_collection_is_contains(value tree,value str){
170 CAMLparam2(tree,str);
171 uchar * cstr = (uchar *) String_val(str);
172 CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr)));
175 extern "C" CAMLprim value caml_text_collection_count_contains(value tree,value str){
176 CAMLparam2(tree,str);
177 uchar * cstr = (uchar *) String_val(str);
178 CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr))));
181 extern "C" CAMLprim value caml_text_collection_count(value tree,value str){
182 CAMLparam2(tree,str);
183 uchar * cstr = (uchar *) String_val(str);
184 CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr))));
185 CAMLreturn (Val_unit);
188 bool docId_comp(DocID x, DocID y) { return x < y; };
191 extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){
192 CAMLparam2(tree,str);
193 CAMLlocal1(resarray);
194 uchar * cstr = (uchar *) String_val(str);
195 std::vector<DocID> results;
196 results = XMLTREE(tree)->Contains(cstr);
197 std::sort(results.begin(), results.end(), docId_comp);
198 size_t s = results.size();
199 resarray = caml_alloc_tuple(s);
201 for (size_t i = 0; i < s ;i++){
202 caml_initialize(&Field(resarray,i),Val_int(results[i]));
204 CAMLreturn (resarray);
207 extern "C" CAMLprim value caml_text_collection_equals(value tree,value str){
208 CAMLparam2(tree,str);
209 CAMLlocal1(resarray);
210 uchar * cstr = (uchar *) String_val(str);
211 std::vector<DocID> results;
212 results = XMLTREE(tree)->Equal(cstr);
213 std::sort(results.begin(), results.end(), docId_comp);
214 size_t s = results.size();
215 resarray = caml_alloc_tuple(s);
217 for (size_t i = 0; i < s ;i++){
218 caml_initialize(&Field(resarray,i),Val_int(results[i]));
220 CAMLreturn (resarray);
222 extern "C" CAMLprim value caml_text_collection_startswith(value tree,value str){
223 CAMLparam2(tree,str);
224 CAMLlocal1(resarray);
225 uchar * cstr = (uchar *) String_val(str);
226 std::vector<DocID> results;
227 results = XMLTREE(tree)->Prefix(cstr);
228 std::sort(results.begin(), results.end(), docId_comp);
229 size_t s = results.size();
230 resarray = caml_alloc_tuple(s);
232 for (size_t i = 0; i < s ;i++){
233 caml_initialize(&Field(resarray,i),Val_int(results[i]));
235 CAMLreturn (resarray);
237 extern "C" CAMLprim value caml_text_collection_endswith(value tree,value str){
238 CAMLparam2(tree,str);
239 CAMLlocal1(resarray);
240 uchar * cstr = (uchar *) String_val(str);
241 std::vector<DocID> results;
242 results = XMLTREE(tree)->Suffix(cstr);
243 std::sort(results.begin(), results.end(), docId_comp);
244 size_t s = results.size();
245 resarray = caml_alloc_tuple(s);
247 for (size_t i = 0; i < s ;i++){
248 caml_initialize(&Field(resarray,i),Val_int(results[i]));
250 CAMLreturn (resarray);
255 extern "C" CAMLprim value caml_text_collection_unsorted_contains(value tree,value str){
256 CAMLparam2(tree,str);
257 CAMLlocal1(resarray);
258 uchar * cstr = (uchar *) String_val(str);
259 std::vector<DocID> results;
260 results = XMLTREE(tree)->Contains(cstr);
261 resarray = caml_alloc_tuple(results.size());
262 for (size_t i = 0; i < results.size() ;i++){
263 caml_initialize(&Field(resarray,i),Val_int(results[i]));
265 CAMLreturn (resarray);
269 extern "C" CAMLprim value caml_xml_tree_root(value tree){
271 CAMLreturn (Val_int(TREENODEVAL(XMLTREE_ROOT)));
273 extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){
275 CAMLreturn((value) XMLTREE(tree)->getTextCollection());
277 extern "C" CAMLprim value caml_xml_tree_parent(value tree, value id){
278 return(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id))));
280 extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){
281 return(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id))));
284 extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){
285 return (Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id))));
288 extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) {
289 CAMLparam3(tree,id1,id2);
290 CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2))));
293 extern "C" CAMLprim value caml_xml_tree_last_child(value tree, value id){
294 return(Val_int (XMLTREE(tree)->LastChild(TREENODEVAL(id))));
297 extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){
298 return Val_bool (XMLTREE(tree)->IsFirstChild(TREENODEVAL(id)));
300 extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){
301 return(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id))));
303 extern "C" CAMLprim value caml_xml_tree_closing(value tree, value id){
304 return(Val_int (XMLTREE(tree)->Closing(TREENODEVAL(id))));
306 extern "C" CAMLprim value caml_xml_tree_is_open(value tree, value id){
307 return(Val_bool (XMLTREE(tree)->IsOpen(TREENODEVAL(id))));
310 extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){
311 return(Val_int (XMLTREE(Field(tree,0))->FirstElement(TREENODEVAL(id))));
314 extern "C" CAMLprim value caml_xml_tree_tagged_child(value tree, value id, value tag){
315 return(Val_int (XMLTREE(tree)->TaggedChild(TREENODEVAL(id),Int_val(tag))));
318 extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){
319 return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id))));
322 extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){
323 return(Val_int (XMLTREE(Field(tree,0))->NextElement(TREENODEVAL(id))));
326 extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){
327 return(Val_int (XMLTREE(tree)->TaggedFollSibling(TREENODEVAL(id),Int_val(tag))));
331 extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){
332 return(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id))));
335 extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){
336 return(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag))));
340 extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){
341 return(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag))));
343 extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, value tag,value root){
344 return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root))));
346 extern "C" CAMLprim value caml_xml_tree_tagged_foll_before(value tree, value id, value tag,value root){
347 return(Val_int (XMLTREE(tree)->TaggedFollBefore(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root))));
350 extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){
351 return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id)))));
354 extern "C" CAMLprim value caml_xml_tree_my_text_unsafe(value tree, value id){
355 return(Val_int((XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(id)))));
358 extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){
359 return(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id)))));
361 extern "C" CAMLprim value caml_xml_tree_node_xml_id(value tree, value id){
362 return(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id)))));
365 extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){
366 CAMLparam2(tree,tagid);
369 tag = (char*) XMLTREE(tree)->GetTagNameByRef((TagType) (Int_val(tagid)));
370 str = caml_copy_string((const char*) tag);
375 extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){
376 return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id))));
379 extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){
380 return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag))));
383 extern "C" CAMLprim value caml_xml_tree_subtree_size(value tree,value id){
384 return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(id))));
387 extern "C" CAMLprim value caml_xml_tree_subtree_elements(value tree,value id){
388 return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(id))));
392 extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){
393 CAMLparam2(tree,str);
396 tag = (unsigned char*) (String_val(str));
397 id = Val_int(XMLTREE(tree)->RegisterTag(tag));
401 extern "C" CAMLprim value caml_xml_tree_nullt(value unit){
405 extern "C" CAMLprim value caml_unordered_set_length(value hset){
407 CAMLreturn (Val_int((HSET(hset))->size()));
410 extern "C" CAMLprim value caml_unordered_set_alloc(value len){
413 hset = caml_alloc_custom(&set_ops,sizeof(std::unordered_set<int>*),1,2);
414 std::unordered_set<int>* ht = new std::unordered_set<int>();
415 memcpy(Data_custom_val(hset),&ht,sizeof(std::unordered_set<int>*));
419 extern "C" CAMLprim value caml_unordered_set_set(value vec, value v){
420 HSET(vec)->insert((int) Int_val(v));
424 extern "C" CAMLprim value caml_xml_tree_select_desc(value tree, value node, value tags){
425 return (Val_int (XMLTREE(tree)->SelectDesc(TREENODEVAL(node),
428 extern "C" CAMLprim value caml_xml_tree_select_child(value tree, value node, value tags){
429 return (Val_int (XMLTREE(tree)->SelectChild(TREENODEVAL(node),
432 extern "C" CAMLprim value caml_xml_tree_select_foll_sibling(value tree, value node, value tags){
433 return (Val_int (XMLTREE(tree)->SelectFollSibling(TREENODEVAL(node),
436 extern "C" CAMLprim value caml_xml_tree_select_foll_below(value tree, value node, value tags,value ctx){
437 return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node),
441 extern "C" CAMLprim value caml_xml_tree_select_foll_before(value tree, value node, value tags,value ctx){
442 return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node),
448 extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){
449 CAMLparam2(tree,node);
451 tuple = caml_alloc_tuple(2);
452 range r = (XMLTREE(tree)->DocIds(TREENODEVAL(node)));
453 caml_initialize(&Field(tuple,0),Val_int(r.min));
454 caml_initialize(&Field(tuple,1),Val_int(r.max));
458 extern "C" value caml_result_set_create(value size){
459 results* res = (results*) malloc(sizeof(results));
460 results r = createResults (Int_val(size));
464 return ((value) (res));
467 extern "C" CAMLprim value caml_result_set_set(value result,value p){
469 setResult ( *((results*) result), Int_val(p));
470 CAMLreturn (Val_unit);
473 extern "C" CAMLprim value caml_result_set_clear(value result,value p1,value p2){
475 clearRange ( *((results*) result), Int_val(p1), Int_val(p2));
476 CAMLreturn (Val_unit);
479 extern "C" CAMLprim value caml_result_set_next(value result,value p){
482 r = *( (results *) result);
483 CAMLreturn (Val_int(nextResult(r, Int_val(p))));
486 extern "C" CAMLprim value caml_result_set_count(value result){
489 r = *( (results *) result);
490 CAMLreturn (Val_int(countResult(r)));
493 extern "C" CAMLprim value caml_xml_tree_print(value tree,value node,value fd){
494 CAMLparam3(tree,node,fd);
495 XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node));
496 CAMLreturn(Val_unit);
499 extern "C" CAMLprim value caml_set_tag_bits(value result, value tag, value tree, value node)
501 CAMLparam3(tag,tree,node);
503 XMLTree *t = XMLTREE(Field(tree,0));
504 treeNode opening = TREENODEVAL(node);
505 treeNode closing = t->Closing(opening);
506 TagType target_tag = Int_val(tag);
507 treeNode first = t->TaggedDesc(opening,target_tag);
508 r = *( (results *) result);
510 while (opening != NULLT){
511 setResult(r,opening);
512 opening = t->TaggedFollBefore(opening,target_tag,closing);
514 CAMLreturn(Val_int(first));