1 /**************************************
4 * An Ocaml Driver which calls the C++ methods and
5 * adds a C wrapper interface with OCaml code.
13 * functions never doing any allocation (non caml_alloc*, caml_copy_string,...)
14 * have NOALLOC in the comment and their external declaration can have "noalloc"
18 #include <unordered_set>
22 #include "XMLTreeBuilder.h"
24 #include "common_stub.hpp"
26 #define CAMLRAISEMSG(msg) (sxsi_raise_msg((char*) (msg)))
28 #define XMLTREE(x) (Obj_val<XMLTree*>(x))
30 #define HSET(x) (Obj_val<TagIdSet*>(x))
32 #define XMLTREEBUILDER(x) (Obj_val<XMLTreeBuilder*>(x))
35 #define TREENODEVAL(i) ((treeNode) (Int_val(i)))
36 #define TAGVAL(i) ((TagType) (Int_val(i)))
37 #define XMLTREE_ROOT 0
42 #include <sys/resource.h>
48 /** XMLTreeBuilder bindings
52 extern "C" value caml_xml_tree_builder_create(value unit)
56 result = sxsi_alloc_custom<XMLTreeBuilder*>();
57 Obj_val<XMLTreeBuilder*>(result) = new XMLTreeBuilder();
62 extern "C" value caml_xml_tree_builder_open_document(value vbuilder,
68 CAMLparam5(vbuilder, vet, vsrate, vdtc, vidxtype);
69 bool empty_text = Bool_val(vet);
70 int sample_rate = Int_val(vsrate);
71 bool disable_tc = Bool_val(vdtc);
72 TextCollectionBuilder::index_type_t idx_type;
73 switch (Int_val(vidxtype)){
75 idx_type = TextCollectionBuilder::index_type_default;
78 idx_type = TextCollectionBuilder::index_type_swcsa;
81 idx_type = TextCollectionBuilder::index_type_rlcsa;
84 CAMLRAISEMSG("Invalid Index Type");
86 int res = XMLTREEBUILDER(vbuilder)->OpenDocument(empty_text,
91 CAMLRAISEMSG("OpenDocument");
93 CAMLreturn (Val_unit);
96 extern "C" value caml_xml_tree_builder_close_document(value vbuilder)
100 XMLTree * tree = XMLTREEBUILDER(vbuilder)->CloseDocument();
102 CAMLRAISEMSG("CloseDocument");
103 result = sxsi_alloc_custom<XMLTree*>();
104 Obj_val<XMLTree*>(result) = tree;
108 extern "C" value caml_xml_tree_builder_new_open_tag(value vbuilder, value vtag)
110 CAMLparam2(vbuilder, vtag);
111 const char * tag = String_val(vtag);
112 if (XMLTREEBUILDER(vbuilder)->NewOpenTag(std::string(tag)) == NULLT)
113 CAMLRAISEMSG("NewOpenTag");
115 CAMLreturn (Val_unit);
118 extern "C" value caml_xml_tree_builder_new_closing_tag(value vbuilder, value vtag)
120 CAMLparam2(vbuilder, vtag);
121 const char * tag = String_val(vtag);
122 if (XMLTREEBUILDER(vbuilder)->NewClosingTag(std::string(tag)) == NULLT)
123 CAMLRAISEMSG("NewClosingTag");
125 CAMLreturn (Val_unit);
128 extern "C" value caml_xml_tree_builder_new_text(value vbuilder, value vtext)
130 CAMLparam2(vbuilder, vtext);
131 const char * text = String_val(vtext);
132 if (XMLTREEBUILDER(vbuilder)->NewText(std::string(text)) == NULLT)
133 CAMLRAISEMSG("NewText");
135 CAMLreturn (Val_unit);
139 /*************************************************************************/
143 * All of the functions here call the _unsafe version and implement the logics themselves
144 * (test for NULLT and so on). This avoids one indirection + one call when the tests fails.
148 extern "C" value caml_xml_tree_save(value tree,value fd, value name){
149 CAMLparam3(tree, fd, name);
150 XMLTREE(tree)->Save(Int_val(fd), String_val(name));
151 CAMLreturn (Val_unit);
154 extern "C" value caml_xml_tree_load(value fd, value name, value load_tc,value sf){
155 CAMLparam4(fd, name, load_tc, sf);
160 tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf), String_val(name));
161 result = sxsi_alloc_custom<XMLTree*>();
162 Obj_val<XMLTree*>(result) = tree;
165 catch (const std::exception& e){ CAMLRAISEMSG(e.what()); }
166 catch (std::string msg){ CAMLRAISEMSG(msg.c_str()); }
167 catch (char const * msg){ CAMLRAISEMSG(msg); };
173 NoAlloc extern "C" value caml_xml_tree_root(value tree){
174 return (Val_int(XMLTREE_ROOT));
177 NoAlloc extern "C" value caml_xml_tree_size(value tree){
178 return (Val_int(XMLTREE(tree)->Size()));
181 NoAlloc extern "C" value caml_xml_tree_num_tags(value tree){
182 return (Val_int(XMLTREE(tree)->NumTags()));
185 NoAlloc extern "C" value caml_xml_tree_subtree_size(value tree, value node){
186 return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(node))));
189 NoAlloc extern "C" value caml_xml_tree_subtree_tags(value tree, value node, value tag){
190 return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(node), TAGVAL(tag))));
193 NoAlloc extern "C" value caml_xml_tree_subtree_elements(value tree, value node){
194 return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(node))));
197 NoAlloc extern "C" value caml_xml_tree_is_leaf(value tree, value node){
198 return (Val_bool(XMLTREE(tree)->IsLeaf(TREENODEVAL(node))));
201 NoAlloc extern "C" value caml_xml_tree_is_ancestor(value tree, value node1,value node2){
202 return (Val_bool(XMLTREE(tree)->IsAncestor(TREENODEVAL(node1),TREENODEVAL(node2))));
205 NoAlloc extern "C" value caml_xml_tree_is_child(value tree, value node1,value node2){
206 return (Val_bool(XMLTREE(tree)->IsChild(TREENODEVAL(node1),TREENODEVAL(node2))));
209 NoAlloc extern "C" value caml_xml_tree_is_first_child(value tree, value node){
210 return (Val_bool(XMLTREE(tree)->IsFirstChild(TREENODEVAL(node))));
212 NoAlloc extern "C" value caml_xml_tree_is_right_descendant(value tree, value x, value y){
213 return (Val_bool(XMLTREE(tree)->IsRightDescendant(TREENODEVAL(x), TREENODEVAL(y))));
215 NoAlloc extern "C" value caml_xml_tree_num_children(value tree, value node){
216 return (Val_int(XMLTREE(tree)->NumChildren(TREENODEVAL(node))));
219 NoAlloc extern "C" value caml_xml_tree_child_number(value tree, value node){
220 return (Val_int(XMLTREE(tree)->ChildNumber(TREENODEVAL(node))));
223 NoAlloc extern "C" value caml_xml_tree_depth(value tree, value node){
224 return (Val_int(XMLTREE(tree)->Depth(TREENODEVAL(node))));
227 NoAlloc extern "C" value caml_xml_tree_preorder(value tree, value node){
228 return (Val_int(XMLTREE(tree)->Preorder(TREENODEVAL(node))));
231 NoAlloc extern "C" value caml_xml_tree_postorder(value tree, value node){
232 return (Val_int(XMLTREE(tree)->Postorder(TREENODEVAL(node))));
235 NoAlloc extern "C" value caml_xml_tree_tag(value tree, value node) throw () {
236 return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(node))));
239 extern "C" value caml_xml_tree_doc_ids(value tree, value node){
240 CAMLparam2(tree,node);
243 tuple = caml_alloc(2,0);
244 ids = XMLTREE(tree)->DocIds(Int_val(node));
245 Store_field(tuple,0,Val_int(ids.min));
246 Store_field(tuple,1,Val_int(ids.max));
250 NoAlloc extern "C" value caml_xml_tree_parent(value tree, value node){
251 return (Val_int(XMLTREE(tree)->Parent(TREENODEVAL(node))));
254 NoAlloc extern "C" value caml_xml_tree_binary_parent(value tree, value node){
255 return (Val_int(XMLTREE(tree)->BinaryParent(TREENODEVAL(node))));
258 NoAlloc extern "C" value caml_xml_tree_child(value tree, value node,value idx){
259 return (Val_int(XMLTREE(tree)->Child(TREENODEVAL(node),Int_val(idx))));
262 NoAlloc extern "C" value caml_xml_tree_first_child(value tree, value node){
263 return (Val_int(XMLTREE(tree)->FirstChild(TREENODEVAL(node))));
266 NoAlloc extern "C" value caml_xml_tree_first_element(value tree, value node){
267 return (Val_int(XMLTREE(tree)->FirstElement(TREENODEVAL(node))));
270 NoAlloc extern "C" value caml_xml_tree_last_child(value tree, value node){
271 return (Val_int(XMLTREE(tree)->LastChild(TREENODEVAL(node))));
274 NoAlloc extern "C" value caml_xml_tree_next_sibling(value tree, value node){
275 return (Val_int(XMLTREE(tree)->NextSibling(TREENODEVAL(node))));
278 NoAlloc extern "C" value caml_xml_tree_next_element(value tree, value node){
279 return (Val_int(XMLTREE(tree)->NextElement(TREENODEVAL(node))));
282 NoAlloc extern "C" value caml_xml_tree_next_node_before(value tree, value node, value ctx){
283 return (Val_int(XMLTREE(tree)->NextNodeBefore(TREENODEVAL(node), TREENODEVAL(ctx))));
286 NoAlloc extern "C" value caml_xml_tree_prev_sibling(value tree, value node){
287 return (Val_int(XMLTREE(tree)->PrevSibling(TREENODEVAL(node))));
290 NoAlloc extern "C" value caml_xml_tree_tagged_child(value tree, value node,value tag){
291 return (Val_int(XMLTREE(tree)->TaggedChild(TREENODEVAL(node),TAGVAL(tag))));
294 NoAlloc extern "C" value caml_xml_tree_select_child(value tree, value node,value tags){
295 return (Val_int(XMLTREE(tree)->SelectChild(TREENODEVAL(node), HSET(tags))));
298 NoAlloc extern "C" value caml_xml_tree_tagged_following_sibling(value tree, value node,value tag){
299 return (Val_int(XMLTREE(tree)->TaggedFollowingSibling(TREENODEVAL(node),TAGVAL(tag))));
302 NoAlloc extern "C" value caml_xml_tree_select_following_sibling(value tree, value node,value tags){
303 return (Val_int(XMLTREE(tree)->SelectFollowingSibling(TREENODEVAL(node), HSET(tags))));
306 NoAlloc extern "C" value caml_xml_tree_tagged_descendant(value tree, value node, value tag){
307 return (Val_int(XMLTREE(tree)->TaggedDescendant(TREENODEVAL(node), TAGVAL(tag))));
310 NoAlloc extern "C" value caml_xml_tree_tagged_next(value tree, value node, value tag){
311 return (Val_int(XMLTREE(tree)->TaggedNext(TREENODEVAL(node), TAGVAL(tag))));
314 NoAlloc extern "C" value caml_xml_tree_select_descendant(value tree, value node, value tags){
315 return (Val_int(XMLTREE(tree)->SelectDescendant(TREENODEVAL(node), HSET(tags))));
318 NoAlloc extern "C" value caml_xml_tree_tagged_preceding(value tree, value node, value tag){
319 return (Val_int(XMLTREE(tree)->TaggedPreceding(TREENODEVAL(node), TAGVAL(tag))));
322 NoAlloc extern "C" value caml_xml_tree_tagged_following(value tree, value node, value tag){
323 return (Val_int(XMLTREE(tree)->TaggedFollowing(TREENODEVAL(node), TAGVAL(tag))));
326 NoAlloc extern "C" value caml_xml_tree_tagged_following_below(value tree, value node, value tag, value ancestor){
327 return (Val_int(XMLTREE(tree)->TaggedFollowingBelow(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(ancestor))));
330 NoAlloc extern "C" value caml_xml_tree_select_following_below(value tree, value node, value tags, value ancestor){
331 return (Val_int(XMLTREE(tree)->SelectFollowingBelow(TREENODEVAL(node), HSET(tags), TREENODEVAL(ancestor))));
334 NoAlloc extern "C" value caml_xml_tree_tagged_following_before(value tree, value node, value tag, value closing){
335 return (Val_int(XMLTREE(tree)->TaggedFollowingBefore(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(closing))));
338 NoAlloc extern "C" value caml_xml_tree_select_following_before(value tree, value node, value tags, value closing){
339 return (Val_int(XMLTREE(tree)->SelectFollowingBefore(TREENODEVAL(node), HSET(tags), TREENODEVAL(closing))));
342 NoAlloc extern "C" value caml_xml_tree_tagged_ancestor(value tree, value node, value tag){
343 return (Val_int(XMLTREE(tree)->TaggedAncestor(TREENODEVAL(node), TAGVAL(tag))));
346 NoAlloc extern "C" value caml_xml_tree_my_text(value tree, value node){
347 return (Val_int(XMLTREE(tree)->MyText(TREENODEVAL(node))));
350 NoAlloc extern "C" value caml_xml_tree_my_text_unsafe(value tree, value node){
351 return (Val_int(XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(node))));
354 NoAlloc extern "C" value caml_xml_tree_text_xml_id(value tree, value docid){
355 return (Val_int(XMLTREE(tree)->TextXMLId(Int_val(docid))));
358 NoAlloc extern "C" value caml_xml_tree_node_xml_id(value tree, value node){
359 return (Val_int(XMLTREE(tree)->NodeXMLId(TREENODEVAL(node))));
362 NoAlloc extern "C" value caml_xml_tree_parent_node(value tree, value docid){
363 return (Val_int(XMLTREE(tree)->ParentNode(Int_val(docid))));
366 NoAlloc extern "C" value caml_xml_tree_prev_node(value tree, value docid){
367 return (Val_int(XMLTREE(tree)->PrevNode(Int_val(docid))));
370 extern "C" value caml_xml_tree_get_tag_id(value tree, value tagname){
371 CAMLparam2(tree,tagname);
373 unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname));
374 res = Val_int(XMLTREE(tree)->GetTagId(ctagname));
379 extern "C" value caml_xml_tree_get_tag_name(value tree, value tag){
380 CAMLparam2(tree,tag);
382 res = caml_copy_string((const char*) XMLTREE(tree)->GetTagNameByRef(TAGVAL(tag)));
386 extern "C" value caml_xml_tree_register_tag(value tree, value tagname){
387 CAMLparam2(tree,tagname);
389 unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname));
390 res = Val_int(XMLTREE(tree)->RegisterTag(ctagname));
396 NoAlloc extern "C" value caml_xml_tree_get_text_collection(value tree){
397 return((value) XMLTREE(tree)->getTextCollection());
400 NoAlloc extern "C" value caml_xml_tree_closing(value tree, value node){
401 return (Val_int(XMLTREE(tree)->Closing(TREENODEVAL(node))));
404 NoAlloc extern "C" value caml_xml_tree_is_open(value tree, value node){
405 return (Val_bool(XMLTREE(tree)->IsOpen(TREENODEVAL(node))));
410 NoAlloc extern "C" value caml_xml_tree_nullt(value unit){
415 NoAlloc extern "C" value caml_unordered_set_length(value hset){
416 return (Val_int((HSET(hset))->size()));
419 extern "C" value caml_unordered_set_alloc(value unit){
422 hset = sxsi_alloc_custom<TagIdSet*>();
423 Obj_val<TagIdSet*>(hset) = new TagIdSet();
427 NoAlloc extern "C" value caml_unordered_set_set(value set, value v){
428 HSET(set)->insert((int) Int_val(v));
432 // NoAlloc extern "C" value caml_result_set_create(value size){
433 // results* res = (results*) malloc(sizeof(results));
434 // results r = createResults (Int_val(size));
437 // res->tree = r.tree;
438 // return ((value) (res));
441 // NoAlloc extern "C" value caml_result_set_set(value result,value p){
442 // setResult ( *((results*) result), Int_val(p));
443 // return (Val_unit);
446 // NoAlloc extern "C" value caml_result_set_clear(value result,value p1,value p2){
447 // clearRange ( *((results*) result), Int_val(p1), Int_val(p2));
448 // return (Val_unit);
451 // NoAlloc extern "C" value caml_result_set_next(value result,value p){
453 // r = *( (results *) result);
454 // return (Val_int(nextResult(r, Int_val(p))));
457 // NoAlloc extern "C" value caml_result_set_count(value result){
459 // r = *( (results *) result);
460 // return (Val_int(countResult(r)));
463 NoAlloc extern "C" value caml_xml_tree_print(value tree,value node,value fd){
464 CAMLparam3(tree,node,fd);
465 XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node), false);
466 CAMLreturn(Val_unit);
469 NoAlloc extern "C" value caml_xml_tree_flush(value tree, value fd){
471 XMLTREE(tree)->Flush(Int_val(fd));
472 CAMLreturn(Val_unit);
475 // NoAlloc extern "C" value caml_set_tag_bits(value result, value tag, value tree, value node)
478 // XMLTree *t = XMLTREE(Field(tree,0));
479 // treeNode opening = TREENODEVAL(node);
480 // treeNode closing = t->Closing(opening);
481 // TagType target_tag = Int_val(tag);
482 // treeNode first = t->TaggedDescendant(opening,target_tag);
483 // r = *( (results *) result);
485 // while (opening != NULLT){
486 // setResult(r,opening);
487 // opening = t->TaggedFollowingBefore(opening,target_tag,closing);
489 // return(Val_int(first));
493 NoAlloc extern "C" value caml_bit_vector_create(value size){
494 return (value) (new vector<bool>(Int_val(size),false));
497 NoAlloc extern "C" value caml_bit_vector_free(value vect){
498 delete ((vector<bool>*) vect);
502 NoAlloc extern "C" value caml_bit_vector_get(value vect,value idx){
503 return Val_bool (((vector<bool>*)vect)->at(Int_val(idx)));
506 NoAlloc extern "C" value caml_bit_vector_set(value vect,value idx,value b){
507 (((vector<bool>*)vect)->at(Int_val(idx))) = (bool) Bool_val(b);
511 NoAlloc extern "C" value caml_bit_vector_next(value vect,value idx){
512 vector<bool>* bv = (vector<bool>*) vect;
513 int i = Int_val(idx);
515 while (i < l && !((*bv)[i]))
519 NoAlloc extern "C" value caml_bit_vector_prev(value vect,value idx){
520 int i = Int_val(idx);
521 while (i >= 0 && !((*((vector<bool>*) vect))[i]))
526 extern "C" value caml_bit_vector_node_array(value vect){
529 vector<bool>* bv = (vector<bool>*) vect;
534 if ((*bv)[i]) vr.push_back(i);
538 res = caml_alloc_tuple(l);
540 caml_initialize(&Field(res,i),Val_int(vr[i]));
545 int iterjump(XMLTree* tree, treeNode node, TagType tag, treeNode anc){
551 + iterjump(tree,tree->TaggedDescendant(node,tag),tag,node)
552 + iterjump(tree,tree->TaggedFollowingBelow(node,tag,anc),tag,anc);
556 extern "C" value caml_benchmark_jump(value tree,value tag){
558 treeNode root = XMLTREE(tree)->FirstChild(0);
559 root = XMLTREE(tree)->FirstChild(root);
560 count = iterjump(XMLTREE(tree), root , Int_val(tag),0);
561 return Val_int(count);
564 int iterfcns(XMLTree* tree, treeNode node){
569 tmp += iterfcns(tree,tree->FirstChild(node));
570 tmp += iterfcns(tree,tree->NextSibling(node));
576 int iterfene(XMLTree* tree, treeNode node){
581 tmp += iterfene(tree,tree->FirstElement(node));
582 tmp += iterfene(tree,tree->NextElement(node));
588 extern "C" value caml_benchmark_fcns(value tree){
589 int i = iterfcns(XMLTREE(tree),0);
593 extern "C" value caml_benchmark_fene(value tree){
594 int i = iterfene(XMLTREE(tree),0);
598 int iterlcps(XMLTree* tree, treeNode node){
602 int x = tree->Tag(node);
603 x += iterlcps(tree,tree->LastChild(node));
604 x += iterlcps(tree,tree->PrevSibling(node));
609 int fulliterative(XMLTree* tree){
610 treeNode current = tree->Root();
611 treeNode next = NULLT;
612 int count = 1; //the root
616 while ((next = tree->FirstChild(current)) != NULLT) {
621 while ( (next = tree->NextSibling(current)) == NULLT){
622 current = tree->Parent(current);
623 if (current == NULLT) return count;
631 extern "C" value caml_benchmark_iter(value tree){
632 return Val_int(fulliterative(XMLTREE(tree)));
635 extern "C" value caml_benchmark_lcps(value tree){
637 iterlcps(XMLTREE(tree),0);
644 typedef struct dummy_node_ {
645 struct dummy_node_* first;
646 struct dummy_node_* next;
650 dummy_node * new_dummy_node () {
652 dummy_node * node = (dummy_node*) malloc(sizeof(dummy_node));
654 printf("%s","Cannot allocate memory\n");
659 void free_tree(dummy_node * node){
661 free_tree(node->first);
662 free_tree(node->next);
668 dummy_node * create_tree(XMLTree* tree, treeNode i, int mode){
672 dummy_node * f, *n, *r;
675 if (mode == 0) r = new_dummy_node();
676 f = create_tree(tree,tree->FirstChild(i), mode);
677 if (mode == 1) r = new_dummy_node();
678 n = create_tree(tree,tree->NextSibling(i), mode);
679 if (mode == 2) r = new_dummy_node();
686 int iter_tree(dummy_node * n){
690 return 1 + iter_tree (n->first) + iter_tree (n->next);
693 extern "C" value caml_build_pointers(value tree, value mode){
694 return ((value) create_tree(XMLTREE(Field(tree,0)),0, Int_val(mode)));
697 extern "C" value caml_iter_pointers (value node){
698 return Val_int(iter_tree((dummy_node*) node));
702 extern "C" value caml_free_pointers(value node){
703 free_tree((dummy_node*) node);
707 * Interface to the TextCollection
714 extern "C" value caml_text_collection_get_text(value tree, value id){
717 uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id));
718 str = caml_copy_string((const char*)txt);
723 extern "C" value caml_text_collection_empty_text(value tree,value id){
725 CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
728 bool docId_comp(DocID x, DocID y) { return x < y; }
731 * Existential queries
734 extern "C" value caml_text_collection_is_prefix(value tree,value str){
735 CAMLparam2(tree,str);
736 uchar * cstr = (uchar *) String_val(str);
737 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsPrefix(cstr)));
740 extern "C" value caml_text_collection_is_suffix(value tree,value str){
741 CAMLparam2(tree,str);
742 uchar * cstr = (uchar *) String_val(str);
743 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsSuffix(cstr)));
745 extern "C" value caml_text_collection_is_equal(value tree,value str){
746 CAMLparam2(tree,str);
747 uchar * cstr = (uchar *) String_val(str);
748 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsEqual(cstr)));
750 extern "C" value caml_text_collection_is_contains(value tree,value str){
751 CAMLparam2(tree,str);
752 uchar * cstr = (uchar *) String_val(str);
753 CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr)));
756 extern "C" value caml_text_collection_is_lessthan(value tree,value str){
757 CAMLparam2(tree,str);
758 uchar * cstr = (uchar *) String_val(str);
759 CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsLessThan(cstr)));
770 extern "C" value caml_text_collection_count(value tree,value str){
771 CAMLparam2(tree,str);
772 uchar * cstr = (uchar *) String_val(str);
773 CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr))));
776 extern "C" value caml_text_collection_count_prefix(value tree,value str){
777 CAMLparam2(tree,str);
778 uchar * cstr = (uchar *) String_val(str);
779 CAMLreturn (Val_int((XMLTREE(tree)->CountPrefix(cstr))));
782 extern "C" value caml_text_collection_count_suffix(value tree,value str){
783 CAMLparam2(tree,str);
784 uchar * cstr = (uchar *) String_val(str);
785 CAMLreturn (Val_int((XMLTREE(tree)->CountSuffix(cstr))));
788 extern "C" value caml_text_collection_count_equal(value tree,value str){
789 CAMLparam2(tree,str);
790 uchar * cstr = (uchar *) String_val(str);
791 CAMLreturn (Val_int((XMLTREE(tree)->CountEqual(cstr))));
794 extern "C" value caml_text_collection_count_contains(value tree,value str){
795 CAMLparam2(tree,str);
796 uchar * cstr = (uchar *) String_val(str);
797 CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr))));
800 extern "C" value caml_text_collection_count_lessthan(value tree,value str){
801 CAMLparam2(tree,str);
802 uchar * cstr = (uchar *) String_val(str);
803 CAMLreturn (Val_int((XMLTREE(tree)->CountLessThan(cstr))));
806 static value sort_alloc_array(std::vector<DocID> results, value resarray){
807 std::sort(results.begin(), results.end(), docId_comp);
808 size_t s = results.size();
809 resarray = caml_alloc_tuple(s);
810 for (size_t i = 0; i < s ;i++){
811 caml_initialize(&Field(resarray,i),Val_int(results[i]));
818 * Full reporting queries
821 extern "C" value caml_text_collection_prefix(value tree,value str){
822 CAMLparam2(tree,str);
823 CAMLlocal1(resarray);
824 uchar * cstr = (uchar *) String_val(str);
825 std::vector<DocID> results = XMLTREE(tree)->Prefix(cstr);
826 CAMLreturn (sort_alloc_array(results,resarray));
829 extern "C" value caml_text_collection_suffix(value tree,value str){
830 CAMLparam2(tree,str);
831 CAMLlocal1(resarray);
832 uchar * cstr = (uchar *) String_val(str);
833 std::vector<DocID> results = XMLTREE(tree)->Suffix(cstr);
834 CAMLreturn (sort_alloc_array(results,resarray));
837 extern "C" value caml_text_collection_equals(value tree,value str){
838 CAMLparam2(tree,str);
839 CAMLlocal1(resarray);
840 uchar * cstr = (uchar *) strdup(String_val(str));
841 std::vector<DocID> results = XMLTREE(tree)->Equals(cstr);
843 CAMLreturn (sort_alloc_array(results,resarray));
846 extern "C" value caml_text_collection_contains(value tree,value str){
847 CAMLparam2(tree,str);
848 CAMLlocal1(resarray);
849 uchar * cstr = (uchar *) String_val(str);
850 std::vector<DocID> results = XMLTREE(tree)->Contains(cstr);
851 CAMLreturn (sort_alloc_array(results,resarray));
854 extern "C" value caml_text_collection_lessthan(value tree,value str){
855 CAMLparam2(tree,str);
856 CAMLlocal1(resarray);
857 uchar * cstr = (uchar *) String_val(str);
858 std::vector<DocID> results = XMLTREE(tree)->LessThan(cstr);
859 CAMLreturn (sort_alloc_array(results,resarray));
862 /** Full reporting into a bit vector
865 #define BV_QUERY(pref, Pref) \
866 extern "C" value caml_text_collection_## pref ##_bv(value tree, value str){ \
867 CAMLparam2(tree, str); \
868 CAMLlocal3(res, res_bv, res_array); \
870 uchar * cstr = (uchar *) strdup(String_val(str)); \
871 std::vector<DocID> results = XMLTREE(tree)->Pref(cstr); \
872 res_bv = caml_alloc_string((XMLTREE(tree)->Size() / 4) + 2); \
873 unsigned long slen = caml_string_length(res_bv); \
874 memset(&(Byte(res_bv,0)), 0, slen); \
875 res_array = caml_alloc_shr(results.size(), 0); \
876 for (unsigned int i = 0; i < results.size(); ++i) { \
877 j = XMLTREE(tree)->ParentNode(results[i]); \
878 Byte(res_bv, j >> 3) |= (1 << (j & 7)); \
879 caml_initialize(&Field(res_array, i), Val_int(j)); \
882 res = caml_alloc(2, 0); \
883 Store_field(res, 0, res_bv); \
884 Store_field(res, 1, res_array); \
889 BV_QUERY(prefix, Prefix)
890 BV_QUERY(suffix, Suffix)
891 BV_QUERY(equals, Equals)
892 BV_QUERY(contains, Contains)
893 BV_QUERY(lessthan, LessThan)
896 ////////////////////// BP
898 extern "C" value caml_bitmap_create(value size)
901 size_t bits = Long_val(size);
902 size_t words = bits / (8*sizeof(unsigned int));
903 unsigned int *buffer = (unsigned int*) calloc(words+1, sizeof(unsigned int));
905 CAMLRAISEMSG("BP: cannot allocate memory");
906 CAMLreturn( (value) buffer);
909 extern "C" value caml_bitmap_resize(value bitmap, value nsize)
911 CAMLparam2(bitmap, nsize);
912 size_t bits = Long_val(nsize);
913 size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int);
914 unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes);
916 CAMLRAISEMSG("BP: cannot reallocate memory");
917 CAMLreturn((value) buffer);
920 extern "C" value caml_bitmap_setbit(value bitmap, value i, value b)
922 CAMLparam3(bitmap, i, b);
923 unsigned int j = Int_val(i);
924 unsigned int x = Bool_val(b);
925 bp_setbit ((unsigned int*) bitmap, j, x);
926 CAMLreturn(Val_unit);
929 extern "C" void caml_bp_delete(value b)
932 bp * B = Obj_val<bp*>(b);
937 extern "C" value caml_bp_construct(value bitmap, value npar)
939 CAMLparam2(bitmap, npar);
941 bp * b = bp_construct(Int_val(npar), (unsigned int *) bitmap, OPT_DEGREE);
942 res = sxsi_alloc_custom<bp*>(caml_bp_delete);
943 Obj_val<bp*>(res) = b;
947 extern "C" value caml_bp_first_child(value b, value idx)
950 CAMLreturn (Val_int( bp_first_child(Obj_val<bp*>(b), Int_val(idx))));
954 extern "C" value caml_bp_next_sibling(value b, value idx)
957 CAMLreturn (Val_int(bp_next_sibling(Obj_val<bp*>(b), Int_val(idx))));
960 extern "C" value caml_bp_preorder_rank(value b, value idx)
963 CAMLreturn (Val_int(bp_preorder_rank(Obj_val<bp*>(b), Int_val(idx)) - 1));
967 extern "C" value caml_bp_load(value file)
972 int f1 = Int_val(file);
974 FILE * fd = fdopen(f2, "r");
976 CAMLRAISEMSG("Error opening bp file");
979 result = sxsi_alloc_custom<bp*>(caml_bp_delete);
980 Obj_val<bp*>(result) = B;
984 extern "C" value caml_bp_save(value b, value file)
987 bp *B = Obj_val<bp*>(b);
988 int f1 = Int_val(file);
990 FILE * fd = fdopen(f2, "a");
993 CAMLRAISEMSG("Error saving bp file");
996 CAMLreturn(Val_unit);
999 extern "C" value caml_bp_alloc_stats(value unit)
1002 CAMLreturn (Val_long(bp_get_alloc_stats()));