1 /**************************************
4 * An Ocaml Driver which calls the C++ methods and
5 * adds a C wrapper interface with OCaml code.
13 * functions never doing any allocation (non caml_alloc*, caml_copy_string,...)
14 * have NOALLOC in the comment and their external declaration can have "noalloc"
18 #include <unordered_set>
22 #include "XMLTreeBuilder.h"
25 #include "common_stub.hpp"
27 #define CAMLRAISEMSG(msg) (sxsi_raise_msg((char*) (msg)))
29 #define XMLTREE(x) (Obj_val<XMLTree*>(x))
31 #define HSET(x) (Obj_val<TagIdSet*>(x))
33 #define XMLTREEBUILDER(x) (Obj_val<XMLTreeBuilder*>(x))
35 #define GRAMMAR(x) (Obj_val<Grammar*>(x))
38 #define TREENODEVAL(i) ((treeNode) (Int_val(i)))
39 #define TAGVAL(i) ((TagType) (Int_val(i)))
40 #define XMLTREE_ROOT 0
45 #include <sys/resource.h>
51 /** XMLTreeBuilder bindings
55 extern "C" value caml_xml_tree_builder_create(value unit)
59 result = sxsi_alloc_custom<XMLTreeBuilder*>();
60 Obj_val<XMLTreeBuilder*>(result) = new XMLTreeBuilder();
65 extern "C" value caml_xml_tree_builder_open_document(value vbuilder,
71 CAMLparam5(vbuilder, vet, vsrate, vdtc, vidxtype);
72 bool empty_text = Bool_val(vet);
73 int sample_rate = Int_val(vsrate);
74 bool disable_tc = Bool_val(vdtc);
75 TextCollectionBuilder::index_type_t idx_type;
76 switch (Int_val(vidxtype)){
78 idx_type = TextCollectionBuilder::index_type_default;
81 idx_type = TextCollectionBuilder::index_type_swcsa;
84 idx_type = TextCollectionBuilder::index_type_rlcsa;
87 CAMLRAISEMSG("Invalid Index Type");
89 int res = XMLTREEBUILDER(vbuilder)->OpenDocument(empty_text,
94 CAMLRAISEMSG("OpenDocument");
96 CAMLreturn (Val_unit);
99 extern "C" value caml_xml_tree_builder_close_document(value vbuilder)
101 CAMLparam1(vbuilder);
103 XMLTree * tree = XMLTREEBUILDER(vbuilder)->CloseDocument();
105 CAMLRAISEMSG("CloseDocument");
106 result = sxsi_alloc_custom<XMLTree*>();
107 Obj_val<XMLTree*>(result) = tree;
111 extern "C" value caml_xml_tree_builder_new_open_tag(value vbuilder, value vtag)
113 CAMLparam2(vbuilder, vtag);
114 const char * tag = String_val(vtag);
115 if (XMLTREEBUILDER(vbuilder)->NewOpenTag(std::string(tag)) == NULLT)
116 CAMLRAISEMSG("NewOpenTag");
118 CAMLreturn (Val_unit);
121 extern "C" value caml_xml_tree_builder_new_closing_tag(value vbuilder, value vtag)
123 CAMLparam2(vbuilder, vtag);
124 const char * tag = String_val(vtag);
125 if (XMLTREEBUILDER(vbuilder)->NewClosingTag(std::string(tag)) == NULLT)
126 CAMLRAISEMSG("NewClosingTag");
128 CAMLreturn (Val_unit);
131 extern "C" value caml_xml_tree_builder_new_text(value vbuilder, value vtext)
133 CAMLparam2(vbuilder, vtext);
134 const char * text = String_val(vtext);
135 if (XMLTREEBUILDER(vbuilder)->NewText(std::string(text)) == NULLT)
136 CAMLRAISEMSG("NewText");
138 CAMLreturn (Val_unit);
142 /*************************************************************************/
146 * All of the functions here call the _unsafe version and implement the logics themselves
147 * (test for NULLT and so on). This avoids one indirection + one call when the tests fails.
151 extern "C" value caml_xml_tree_save(value tree,value fd, value name){
152 CAMLparam3(tree, fd, name);
153 XMLTREE(tree)->Save(Int_val(fd), String_val(name));
154 CAMLreturn (Val_unit);
157 extern "C" value caml_xml_tree_load(value fd, value name, value load_tc,value sf){
158 CAMLparam4(fd, name, load_tc, sf);
163 tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf), String_val(name));
164 result = sxsi_alloc_custom<XMLTree*>();
165 Obj_val<XMLTree*>(result) = tree;
168 catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
169 catch (std::string msg){ CAMLRAISEMSG(msg.c_str()); }
170 catch (char const * msg){ CAMLRAISEMSG(msg); };
176 NoAlloc extern "C" value caml_xml_tree_root(value tree){
177 return (Val_int(XMLTREE_ROOT));
180 NoAlloc extern "C" value caml_xml_tree_size(value tree){
181 return (Val_int(XMLTREE(tree)->Size()));
184 NoAlloc extern "C" value caml_xml_tree_num_tags(value tree){
185 return (Val_int(XMLTREE(tree)->NumTags()));
188 NoAlloc extern "C" value caml_xml_tree_subtree_size(value tree, value node){
189 return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(node))));
192 NoAlloc extern "C" value caml_xml_tree_subtree_tags(value tree, value node, value tag){
193 return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(node), TAGVAL(tag))));
196 NoAlloc extern "C" value caml_xml_tree_subtree_elements(value tree, value node){
197 return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(node))));
200 NoAlloc extern "C" value caml_xml_tree_is_leaf(value tree, value node){
201 return (Val_bool(XMLTREE(tree)->IsLeaf(TREENODEVAL(node))));
204 NoAlloc extern "C" value caml_xml_tree_is_ancestor(value tree, value node1,value node2){
205 return (Val_bool(XMLTREE(tree)->IsAncestor(TREENODEVAL(node1),TREENODEVAL(node2))));
208 NoAlloc extern "C" value caml_xml_tree_is_child(value tree, value node1,value node2){
209 return (Val_bool(XMLTREE(tree)->IsChild(TREENODEVAL(node1),TREENODEVAL(node2))));
212 NoAlloc extern "C" value caml_xml_tree_is_first_child(value tree, value node){
213 return (Val_bool(XMLTREE(tree)->IsFirstChild(TREENODEVAL(node))));
215 NoAlloc extern "C" value caml_xml_tree_is_right_descendant(value tree, value x, value y){
216 return (Val_bool(XMLTREE(tree)->IsRightDescendant(TREENODEVAL(x), TREENODEVAL(y))));
218 NoAlloc extern "C" value caml_xml_tree_num_children(value tree, value node){
219 return (Val_int(XMLTREE(tree)->NumChildren(TREENODEVAL(node))));
222 NoAlloc extern "C" value caml_xml_tree_child_number(value tree, value node){
223 return (Val_int(XMLTREE(tree)->ChildNumber(TREENODEVAL(node))));
226 NoAlloc extern "C" value caml_xml_tree_depth(value tree, value node){
227 return (Val_int(XMLTREE(tree)->Depth(TREENODEVAL(node))));
230 NoAlloc extern "C" value caml_xml_tree_preorder(value tree, value node){
231 return (Val_int(XMLTREE(tree)->Preorder(TREENODEVAL(node))));
234 NoAlloc extern "C" value caml_xml_tree_postorder(value tree, value node){
235 return (Val_int(XMLTREE(tree)->Postorder(TREENODEVAL(node))));
238 NoAlloc extern "C" value caml_xml_tree_tag(value tree, value node) throw () {
239 return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(node))));
242 extern "C" value caml_xml_tree_doc_ids(value tree, value node){
243 CAMLparam2(tree,node);
246 tuple = caml_alloc(2,0);
247 ids = XMLTREE(tree)->DocIds(Int_val(node));
248 Store_field(tuple,0,Val_int(ids.min));
249 Store_field(tuple,1,Val_int(ids.max));
253 NoAlloc extern "C" value caml_xml_tree_parent(value tree, value node){
254 return (Val_int(XMLTREE(tree)->Parent(TREENODEVAL(node))));
257 NoAlloc extern "C" value caml_xml_tree_binary_parent(value tree, value node){
258 return (Val_int(XMLTREE(tree)->BinaryParent(TREENODEVAL(node))));
261 NoAlloc extern "C" value caml_xml_tree_child(value tree, value node,value idx){
262 return (Val_int(XMLTREE(tree)->Child(TREENODEVAL(node),Int_val(idx))));
265 NoAlloc extern "C" value caml_xml_tree_first_child(value tree, value node){
266 return (Val_int(XMLTREE(tree)->FirstChild(TREENODEVAL(node))));
269 NoAlloc extern "C" value caml_xml_tree_first_element(value tree, value node){
270 return (Val_int(XMLTREE(tree)->FirstElement(TREENODEVAL(node))));
273 NoAlloc extern "C" value caml_xml_tree_last_child(value tree, value node){
274 return (Val_int(XMLTREE(tree)->LastChild(TREENODEVAL(node))));
277 NoAlloc extern "C" value caml_xml_tree_next_sibling(value tree, value node){
278 return (Val_int(XMLTREE(tree)->NextSibling(TREENODEVAL(node))));
281 NoAlloc extern "C" value caml_xml_tree_next_element(value tree, value node){
282 return (Val_int(XMLTREE(tree)->NextElement(TREENODEVAL(node))));
285 NoAlloc extern "C" value caml_xml_tree_next_node_before(value tree, value node, value ctx){
286 return (Val_int(XMLTREE(tree)->NextNodeBefore(TREENODEVAL(node), TREENODEVAL(ctx))));
289 NoAlloc extern "C" value caml_xml_tree_prev_sibling(value tree, value node){
290 return (Val_int(XMLTREE(tree)->PrevSibling(TREENODEVAL(node))));
293 NoAlloc extern "C" value caml_xml_tree_tagged_child(value tree, value node,value tag){
294 return (Val_int(XMLTREE(tree)->TaggedChild(TREENODEVAL(node),TAGVAL(tag))));
297 NoAlloc extern "C" value caml_xml_tree_select_child(value tree, value node,value tags){
298 return (Val_int(XMLTREE(tree)->SelectChild(TREENODEVAL(node), HSET(tags))));
301 NoAlloc extern "C" value caml_xml_tree_tagged_following_sibling(value tree, value node,value tag){
302 return (Val_int(XMLTREE(tree)->TaggedFollowingSibling(TREENODEVAL(node),TAGVAL(tag))));
305 NoAlloc extern "C" value caml_xml_tree_select_following_sibling(value tree, value node,value tags){
306 return (Val_int(XMLTREE(tree)->SelectFollowingSibling(TREENODEVAL(node), HSET(tags))));
309 NoAlloc extern "C" value caml_xml_tree_tagged_descendant(value tree, value node, value tag){
310 return (Val_int(XMLTREE(tree)->TaggedDescendant(TREENODEVAL(node), TAGVAL(tag))));
313 NoAlloc extern "C" value caml_xml_tree_tagged_next(value tree, value node, value tag){
314 return (Val_int(XMLTREE(tree)->TaggedNext(TREENODEVAL(node), TAGVAL(tag))));
317 NoAlloc extern "C" value caml_xml_tree_select_descendant(value tree, value node, value tags){
318 return (Val_int(XMLTREE(tree)->SelectDescendant(TREENODEVAL(node), HSET(tags))));
321 NoAlloc extern "C" value caml_xml_tree_tagged_preceding(value tree, value node, value tag){
322 return (Val_int(XMLTREE(tree)->TaggedPreceding(TREENODEVAL(node), TAGVAL(tag))));
325 NoAlloc extern "C" value caml_xml_tree_tagged_following(value tree, value node, value tag){
326 return (Val_int(XMLTREE(tree)->TaggedFollowing(TREENODEVAL(node), TAGVAL(tag))));
329 NoAlloc extern "C" value caml_xml_tree_tagged_following_below(value tree, value node, value tag, value ancestor){
330 return (Val_int(XMLTREE(tree)->TaggedFollowingBelow(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(ancestor))));
333 NoAlloc extern "C" value caml_xml_tree_select_following_below(value tree, value node, value tags, value ancestor){
334 return (Val_int(XMLTREE(tree)->SelectFollowingBelow(TREENODEVAL(node), HSET(tags), TREENODEVAL(ancestor))));
337 NoAlloc extern "C" value caml_xml_tree_tagged_following_before(value tree, value node, value tag, value closing){
338 return (Val_int(XMLTREE(tree)->TaggedFollowingBefore(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(closing))));
341 NoAlloc extern "C" value caml_xml_tree_select_following_before(value tree, value node, value tags, value closing){
342 return (Val_int(XMLTREE(tree)->SelectFollowingBefore(TREENODEVAL(node), HSET(tags), TREENODEVAL(closing))));
345 NoAlloc extern "C" value caml_xml_tree_tagged_ancestor(value tree, value node, value tag){
346 return (Val_int(XMLTREE(tree)->TaggedAncestor(TREENODEVAL(node), TAGVAL(tag))));
349 NoAlloc extern "C" value caml_xml_tree_my_text(value tree, value node){
350 return (Val_int(XMLTREE(tree)->MyText(TREENODEVAL(node))));
353 NoAlloc extern "C" value caml_xml_tree_my_text_unsafe(value tree, value node){
354 return (Val_int(XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(node))));
357 NoAlloc extern "C" value caml_xml_tree_text_xml_id(value tree, value docid){
358 return (Val_int(XMLTREE(tree)->TextXMLId(Int_val(docid))));
361 NoAlloc extern "C" value caml_xml_tree_node_xml_id(value tree, value node){
362 return (Val_int(XMLTREE(tree)->NodeXMLId(TREENODEVAL(node))));
365 NoAlloc extern "C" value caml_xml_tree_parent_node(value tree, value docid){
366 return (Val_int(XMLTREE(tree)->ParentNode(Int_val(docid))));
369 NoAlloc extern "C" value caml_xml_tree_prev_node(value tree, value docid){
370 return (Val_int(XMLTREE(tree)->PrevNode(Int_val(docid))));
373 extern "C" value caml_xml_tree_get_tag_id(value tree, value tagname){
374 CAMLparam2(tree,tagname);
376 unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname));
377 res = Val_int(XMLTREE(tree)->GetTagId(ctagname));
382 extern "C" value caml_xml_tree_get_tag_name(value tree, value tag){
383 CAMLparam2(tree,tag);
385 res = caml_copy_string((const char*) XMLTREE(tree)->GetTagNameByRef(TAGVAL(tag)));
389 extern "C" value caml_xml_tree_register_tag(value tree, value tagname){
390 CAMLparam2(tree,tagname);
392 unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname));
393 res = Val_int(XMLTREE(tree)->RegisterTag(ctagname));
399 NoAlloc extern "C" value caml_xml_tree_get_text_collection(value tree){
400 return((value) XMLTREE(tree)->getTextCollection());
403 NoAlloc extern "C" value caml_xml_tree_closing(value tree, value node){
404 return (Val_int(XMLTREE(tree)->Closing(TREENODEVAL(node))));
407 NoAlloc extern "C" value caml_xml_tree_is_open(value tree, value node){
408 return (Val_bool(XMLTREE(tree)->IsOpen(TREENODEVAL(node))));
413 NoAlloc extern "C" value caml_xml_tree_nullt(value unit){
418 NoAlloc extern "C" value caml_unordered_set_length(value hset){
419 return (Val_int((HSET(hset))->size()));
422 extern "C" value caml_unordered_set_alloc(value unit){
425 hset = sxsi_alloc_custom<TagIdSet*>();
426 Obj_val<TagIdSet*>(hset) = new TagIdSet();
430 NoAlloc extern "C" value caml_unordered_set_set(value set, value v){
431 HSET(set)->insert((int) Int_val(v));
435 // NoAlloc extern "C" value caml_result_set_create(value size){
436 // results* res = (results*) malloc(sizeof(results));
437 // results r = createResults (Int_val(size));
440 // res->tree = r.tree;
441 // return ((value) (res));
444 // NoAlloc extern "C" value caml_result_set_set(value result,value p){
445 // setResult ( *((results*) result), Int_val(p));
446 // return (Val_unit);
449 // NoAlloc extern "C" value caml_result_set_clear(value result,value p1,value p2){
450 // clearRange ( *((results*) result), Int_val(p1), Int_val(p2));
451 // return (Val_unit);
454 // NoAlloc extern "C" value caml_result_set_next(value result,value p){
456 // r = *( (results *) result);
457 // return (Val_int(nextResult(r, Int_val(p))));
460 // NoAlloc extern "C" value caml_result_set_count(value result){
462 // r = *( (results *) result);
463 // return (Val_int(countResult(r)));
466 NoAlloc extern "C" value caml_xml_tree_print(value tree,value node,value fd){
467 CAMLparam3(tree,node,fd);
468 XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node), false);
469 CAMLreturn(Val_unit);
472 NoAlloc extern "C" value caml_xml_tree_flush(value tree, value fd){
474 XMLTREE(tree)->Flush(Int_val(fd));
475 CAMLreturn(Val_unit);
478 // NoAlloc extern "C" value caml_set_tag_bits(value result, value tag, value tree, value node)
481 // XMLTree *t = XMLTREE(Field(tree,0));
482 // treeNode opening = TREENODEVAL(node);
483 // treeNode closing = t->Closing(opening);
484 // TagType target_tag = Int_val(tag);
485 // treeNode first = t->TaggedDescendant(opening,target_tag);
486 // r = *( (results *) result);
488 // while (opening != NULLT){
489 // setResult(r,opening);
490 // opening = t->TaggedFollowingBefore(opening,target_tag,closing);
492 // return(Val_int(first));
496 NoAlloc extern "C" value caml_bit_vector_create(value size){
497 return (value) (new vector<bool>(Int_val(size),false));
500 NoAlloc extern "C" value caml_bit_vector_free(value vect){
501 delete ((vector<bool>*) vect);
505 NoAlloc extern "C" value caml_bit_vector_get(value vect,value idx){
506 return Val_bool (((vector<bool>*)vect)->at(Int_val(idx)));
509 NoAlloc extern "C" value caml_bit_vector_set(value vect,value idx,value b){
510 (((vector<bool>*)vect)->at(Int_val(idx))) = (bool) Bool_val(b);
514 NoAlloc extern "C" value caml_bit_vector_next(value vect,value idx){
515 vector<bool>* bv = (vector<bool>*) vect;
516 int i = Int_val(idx);
518 while (i < l && !((*bv)[i]))
522 NoAlloc extern "C" value caml_bit_vector_prev(value vect,value idx){
523 int i = Int_val(idx);
524 while (i >= 0 && !((*((vector<bool>*) vect))[i]))
529 extern "C" value caml_bit_vector_node_array(value vect){
532 vector<bool>* bv = (vector<bool>*) vect;
537 if ((*bv)[i]) vr.push_back(i);
541 res = caml_alloc_tuple(l);
543 caml_initialize(&Field(res,i),Val_int(vr[i]));
548 int iterjump(XMLTree* tree, treeNode node, TagType tag, treeNode anc){
554 + iterjump(tree,tree->TaggedDescendant(node,tag),tag,node)
555 + iterjump(tree,tree->TaggedFollowingBelow(node,tag,anc),tag,anc);
559 extern "C" value caml_benchmark_jump(value tree,value tag){
561 treeNode root = XMLTREE(tree)->FirstChild(0);
562 root = XMLTREE(tree)->FirstChild(root);
563 count = iterjump(XMLTREE(tree), root , Int_val(tag),0);
564 return Val_int(count);
567 int iterfcns(XMLTree* tree, treeNode node){
572 tmp += iterfcns(tree,tree->FirstChild(node));
573 tmp += iterfcns(tree,tree->NextSibling(node));
579 int iterfene(XMLTree* tree, treeNode node){
584 tmp += iterfene(tree,tree->FirstElement(node));
585 tmp += iterfene(tree,tree->NextElement(node));
591 extern "C" value caml_benchmark_fcns(value tree){
592 int i = iterfcns(XMLTREE(tree),0);
596 extern "C" value caml_benchmark_fene(value tree){
597 int i = iterfene(XMLTREE(tree),0);
601 int iterlcps(XMLTree* tree, treeNode node){
605 int x = tree->Tag(node);
606 x += iterlcps(tree,tree->LastChild(node));
607 x += iterlcps(tree,tree->PrevSibling(node));
612 int fulliterative(XMLTree* tree){
613 treeNode current = tree->Root();
614 treeNode next = NULLT;
615 int count = 1; //the root
619 while ((next = tree->FirstChild(current)) != NULLT) {
624 while ( (next = tree->NextSibling(current)) == NULLT){
625 current = tree->Parent(current);
626 if (current == NULLT) return count;
634 extern "C" value caml_benchmark_iter(value tree){
635 return Val_int(fulliterative(XMLTREE(tree)));
638 extern "C" value caml_benchmark_lcps(value tree){
640 iterlcps(XMLTREE(tree),0);
647 typedef struct dummy_node_ {
648 struct dummy_node_* first;
649 struct dummy_node_* next;
653 dummy_node * new_dummy_node () {
655 dummy_node * node = (dummy_node*) malloc(sizeof(dummy_node));
657 printf("%s","Cannot allocate memory\n");
662 void free_tree(dummy_node * node){
664 free_tree(node->first);
665 free_tree(node->next);
671 dummy_node * create_tree(XMLTree* tree, treeNode i, int mode){
675 dummy_node * f, *n, *r;
678 if (mode == 0) r = new_dummy_node();
679 f = create_tree(tree,tree->FirstChild(i), mode);
680 if (mode == 1) r = new_dummy_node();
681 n = create_tree(tree,tree->NextSibling(i), mode);
682 if (mode == 2) r = new_dummy_node();
689 int iter_tree(dummy_node * n){
693 return 1 + iter_tree (n->first) + iter_tree (n->next);
696 extern "C" value caml_build_pointers(value tree, value mode){
697 return ((value) create_tree(XMLTREE(Field(tree,0)),0, Int_val(mode)));
700 extern "C" value caml_iter_pointers (value node){
701 return Val_int(iter_tree((dummy_node*) node));
705 extern "C" value caml_free_pointers(value node){
706 free_tree((dummy_node*) node);
710 * Interface to the TextCollection
717 extern "C" value caml_text_collection_get_text(value tree, value id){
720 uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id));
721 str = caml_copy_string((const char*)txt);
726 extern "C" value caml_text_collection_empty_text(value tree,value id){
728 CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
731 bool docId_comp(DocID x, DocID y) { return x < y; }
734 * Existential queries
737 extern "C" value caml_text_collection_is_prefix(value tree,value str){
738 CAMLparam2(tree,str);
739 uchar * cstr = (uchar *) String_val(str);
740 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsPrefix(cstr)));
743 extern "C" value caml_text_collection_is_suffix(value tree,value str){
744 CAMLparam2(tree,str);
745 uchar * cstr = (uchar *) String_val(str);
746 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsSuffix(cstr)));
748 extern "C" value caml_text_collection_is_equal(value tree,value str){
749 CAMLparam2(tree,str);
750 uchar * cstr = (uchar *) String_val(str);
751 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsEqual(cstr)));
753 extern "C" value caml_text_collection_is_contains(value tree,value str){
754 CAMLparam2(tree,str);
755 uchar * cstr = (uchar *) String_val(str);
756 CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr)));
759 extern "C" value caml_text_collection_is_lessthan(value tree,value str){
760 CAMLparam2(tree,str);
761 uchar * cstr = (uchar *) String_val(str);
762 CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsLessThan(cstr)));
773 extern "C" value caml_text_collection_count(value tree,value str){
774 CAMLparam2(tree,str);
775 uchar * cstr = (uchar *) String_val(str);
776 CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr))));
779 extern "C" value caml_text_collection_count_prefix(value tree,value str){
780 CAMLparam2(tree,str);
781 uchar * cstr = (uchar *) String_val(str);
782 CAMLreturn (Val_int((XMLTREE(tree)->CountPrefix(cstr))));
785 extern "C" value caml_text_collection_count_suffix(value tree,value str){
786 CAMLparam2(tree,str);
787 uchar * cstr = (uchar *) String_val(str);
788 CAMLreturn (Val_int((XMLTREE(tree)->CountSuffix(cstr))));
791 extern "C" value caml_text_collection_count_equal(value tree,value str){
792 CAMLparam2(tree,str);
793 uchar * cstr = (uchar *) String_val(str);
794 CAMLreturn (Val_int((XMLTREE(tree)->CountEqual(cstr))));
797 extern "C" value caml_text_collection_count_contains(value tree,value str){
798 CAMLparam2(tree,str);
799 uchar * cstr = (uchar *) String_val(str);
800 CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr))));
803 extern "C" value caml_text_collection_count_lessthan(value tree,value str){
804 CAMLparam2(tree,str);
805 uchar * cstr = (uchar *) String_val(str);
806 CAMLreturn (Val_int((XMLTREE(tree)->CountLessThan(cstr))));
809 static value sort_alloc_array(std::vector<DocID> results, value resarray){
810 std::sort(results.begin(), results.end(), docId_comp);
811 size_t s = results.size();
812 resarray = caml_alloc_tuple(s);
813 for (size_t i = 0; i < s ;i++){
814 caml_initialize(&Field(resarray,i),Val_int(results[i]));
821 * Full reporting queries
824 extern "C" value caml_text_collection_prefix(value tree,value str){
825 CAMLparam2(tree,str);
826 CAMLlocal1(resarray);
827 uchar * cstr = (uchar *) String_val(str);
828 std::vector<DocID> results = XMLTREE(tree)->Prefix(cstr);
829 CAMLreturn (sort_alloc_array(results,resarray));
832 extern "C" value caml_text_collection_suffix(value tree,value str){
833 CAMLparam2(tree,str);
834 CAMLlocal1(resarray);
835 uchar * cstr = (uchar *) String_val(str);
836 std::vector<DocID> results = XMLTREE(tree)->Suffix(cstr);
837 CAMLreturn (sort_alloc_array(results,resarray));
840 extern "C" value caml_text_collection_equals(value tree,value str){
841 CAMLparam2(tree,str);
842 CAMLlocal1(resarray);
843 uchar * cstr = (uchar *) strdup(String_val(str));
844 std::vector<DocID> results = XMLTREE(tree)->Equals(cstr);
846 CAMLreturn (sort_alloc_array(results,resarray));
849 extern "C" value caml_text_collection_contains(value tree,value str){
850 CAMLparam2(tree,str);
851 CAMLlocal1(resarray);
852 uchar * cstr = (uchar *) String_val(str);
853 std::vector<DocID> results = XMLTREE(tree)->Contains(cstr);
854 CAMLreturn (sort_alloc_array(results,resarray));
857 extern "C" value caml_text_collection_lessthan(value tree,value str){
858 CAMLparam2(tree,str);
859 CAMLlocal1(resarray);
860 uchar * cstr = (uchar *) String_val(str);
861 std::vector<DocID> results = XMLTREE(tree)->LessThan(cstr);
862 CAMLreturn (sort_alloc_array(results,resarray));
865 /** Full reporting into a bit vector
868 #define BV_QUERY(pref, Pref) \
869 extern "C" value caml_text_collection_## pref ##_bv(value tree, value str){ \
870 CAMLparam2(tree, str); \
871 CAMLlocal3(res, res_bv, res_array); \
873 uchar * cstr = (uchar *) strdup(String_val(str)); \
874 std::vector<DocID> results = XMLTREE(tree)->Pref(cstr); \
875 res_bv = caml_alloc_string((XMLTREE(tree)->Size() / 4) + 2); \
876 unsigned long slen = caml_string_length(res_bv); \
877 memset(&(Byte(res_bv,0)), 0, slen); \
878 res_array = caml_alloc_shr(results.size(), 0); \
879 for (unsigned int i = 0; i < results.size(); ++i) { \
880 j = XMLTREE(tree)->ParentNode(results[i]); \
881 Byte(res_bv, j >> 3) |= (1 << (j & 7)); \
882 caml_initialize(&Field(res_array, i), Val_int(j)); \
885 res = caml_alloc(2, 0); \
886 Store_field(res, 0, res_bv); \
887 Store_field(res, 1, res_array); \
892 BV_QUERY(prefix, Prefix)
893 BV_QUERY(suffix, Suffix)
894 BV_QUERY(equals, Equals)
895 BV_QUERY(contains, Contains)
896 BV_QUERY(lessthan, LessThan)
900 //////////////////////////////////////////// Grammar stuff
902 extern "C" value caml_grammar_load(value file, value load_bp)
904 CAMLparam2(file, load_bp);
907 int f1 = Int_val(file);
909 FILE * fd = fdopen(f2, "r");
911 CAMLRAISEMSG("Error opening grammar file");
912 grammar = Grammar::load(fd, Bool_val(load_bp));
914 result = sxsi_alloc_custom<Grammar*>();
915 Obj_val<Grammar*>(result) = grammar;
919 extern "C" value caml_grammar_get_symbol_at(value grammar, value symbol, value preorder)
921 CAMLparam3(grammar, symbol, preorder);
922 CAMLreturn(Val_long(GRAMMAR(grammar)->getSymbolAt(Long_val(symbol), Int_val(preorder))));
925 extern "C" value caml_grammar_first_child(value grammar, value rule, value pos)
928 CAMLreturn(Val_int(GRAMMAR(grammar)->firstChild(Long_val(rule), Int_val(pos))));
931 extern "C" value caml_grammar_next_sibling(value grammar, value rule, value pos)
934 CAMLreturn(Val_int(GRAMMAR(grammar)->nextSibling(Long_val(rule), Int_val(pos))));
937 extern "C" value caml_grammar_start_first_child(value grammar, value pos)
940 CAMLreturn(Val_int(GRAMMAR(grammar)->startFirstChild(Int_val(pos))));
943 extern "C" value caml_grammar_start_next_sibling(value grammar, value pos)
946 CAMLreturn(Val_int(GRAMMAR(grammar)->startNextSibling(Int_val(pos))));
949 extern "C" value caml_grammar_is_nil(value grammar, value rule)
952 CAMLreturn(Val_bool(GRAMMAR(grammar)->isNil(Long_val(rule))));
955 extern "C" value caml_grammar_get_tag(value grammar, value tag)
959 const char * s = (GRAMMAR(grammar)->getTagName(Long_val(tag))).c_str();
960 res = caml_copy_string(s);
964 extern "C" value caml_grammar_get_id1(value grammar, value rule)
967 CAMLreturn(Val_long(GRAMMAR(grammar)->getID1(Long_val(rule))));
970 extern "C" value caml_grammar_get_id2(value grammar, value rule)
973 CAMLreturn(Val_long(GRAMMAR(grammar)->getID2(Long_val(rule))));
976 extern "C" value caml_grammar_get_param_pos(value grammar, value rule)
979 CAMLreturn(Val_int(GRAMMAR(grammar)->getParamPos(Long_val(rule))));
982 extern "C" value caml_grammar_translate_tag(value grammar, value tag)
985 CAMLreturn(Val_int(GRAMMAR(grammar)->translateTag(Int_val(tag))));
988 extern "C" value caml_grammar_register_tag(value grammar, value str)
990 CAMLparam2(grammar, str);
991 char * s = String_val(str);
992 CAMLreturn(Val_int(GRAMMAR(grammar)->getTagID(s)));
995 extern "C" value caml_grammar_nil_id(value grammar)
998 CAMLreturn(Val_long((GRAMMAR(grammar)->getNiltagid()) * 4 + 1));
1002 extern char *caml_young_end;
1003 extern char *caml_young_start;
1004 typedef char * addr;
1005 #define Is_young(val) \
1006 ((addr)(val) < (addr)caml_young_end && (addr)(val) > (addr)caml_young_start)
1009 extern "C" value caml_custom_is_young(value a){
1010 return Val_bool(Is_young(a));
1013 extern "C" value caml_custom_array_blit(value a1, value ofs1, value a2, value ofs2,
1020 /* Arrays of values, destination is in young generation.
1021 Here too we can do a direct copy since this cannot create
1022 old-to-young pointers, nor mess up with the incremental major GC.
1023 Again, memmove takes care of overlap. */
1024 memmove(&Field(a2, Long_val(ofs2)),
1025 &Field(a1, Long_val(ofs1)),
1026 Long_val(n) * sizeof(value));
1029 /* Array of values, destination is in old generation.
1030 We must use caml_modify. */
1031 count = Long_val(n);
1032 if (a1 == a2 && Long_val(ofs1) < Long_val(ofs2)) {
1033 /* Copy in descending order */
1034 for (dst = &Field(a2, Long_val(ofs2) + count - 1),
1035 src = &Field(a1, Long_val(ofs1) + count - 1);
1037 count--, src--, dst--) {
1038 caml_modify(dst, *src);
1041 /* Copy in ascending order */
1042 for (dst = &Field(a2, Long_val(ofs2)), src = &Field(a1, Long_val(ofs1));
1044 count--, src++, dst++) {
1045 caml_modify(dst, *src);
1048 /* Many caml_modify in a row can create a lot of old-to-young refs.
1049 Give the minor GC a chance to run if it needs to. */
1050 //caml_check_urgent_gc(Val_unit);
1055 ////////////////////// BP
1057 extern "C" value caml_bitmap_create(value size)
1060 size_t bits = Long_val(size);
1061 size_t words = bits / (8*sizeof(unsigned int));
1062 unsigned int *buffer = (unsigned int*) calloc(words+1, sizeof(unsigned int));
1064 CAMLRAISEMSG("BP: cannot allocate memory");
1065 CAMLreturn( (value) buffer);
1068 extern "C" value caml_bitmap_resize(value bitmap, value nsize)
1070 CAMLparam2(bitmap, nsize);
1071 size_t bits = Long_val(nsize);
1072 size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int);
1073 unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes);
1075 CAMLRAISEMSG("BP: cannot reallocate memory");
1076 CAMLreturn((value) buffer);
1079 extern "C" value caml_bitmap_setbit(value bitmap, value i, value b)
1081 CAMLparam3(bitmap, i, b);
1082 unsigned int j = Int_val(i);
1083 unsigned int x = Bool_val(b);
1084 bp_setbit ((unsigned int*) bitmap, j, x);
1085 CAMLreturn(Val_unit);
1088 extern "C" void caml_bp_delete(value b)
1091 bp * B = Obj_val<bp*>(b);
1096 extern "C" value caml_bp_construct(value bitmap, value npar)
1098 CAMLparam2(bitmap, npar);
1100 bp * b = bp_construct(Int_val(npar), (unsigned int *) bitmap, OPT_DEGREE);
1101 res = sxsi_alloc_custom<bp*>(caml_bp_delete);
1102 Obj_val<bp*>(res) = b;
1106 extern "C" value caml_bp_first_child(value b, value idx)
1109 CAMLreturn (Val_int( bp_first_child(Obj_val<bp*>(b), Int_val(idx))));
1113 extern "C" value caml_bp_next_sibling(value b, value idx)
1116 CAMLreturn (Val_int(bp_next_sibling(Obj_val<bp*>(b), Int_val(idx))));
1119 extern "C" value caml_bp_preorder_rank(value b, value idx)
1122 CAMLreturn (Val_int(bp_preorder_rank(Obj_val<bp*>(b), Int_val(idx)) - 1));
1126 extern "C" value caml_bp_load(value file)
1131 int f1 = Int_val(file);
1133 FILE * fd = fdopen(f2, "r");
1135 CAMLRAISEMSG("Error opening bp file");
1138 result = sxsi_alloc_custom<bp*>(caml_bp_delete);
1139 Obj_val<bp*>(result) = B;
1143 extern "C" value caml_bp_save(value b, value file)
1145 CAMLparam2(b, file);
1146 bp *B = Obj_val<bp*>(b);
1147 int f1 = Int_val(file);
1149 FILE * fd = fdopen(f2, "a");
1152 CAMLRAISEMSG("Error saving bp file");
1155 CAMLreturn(Val_unit);