1 /**************************************
4 * An Ocaml Driver which calls the C++ methods and
5 * adds a C wrapper interface with OCaml code.
13 * functions never doing any allocation (non caml_alloc*, caml_copy_string,...)
14 * have NOALLOC in the comment and their external declaration can have "noalloc"
18 #include <unordered_set>
22 #include "XMLTreeBuilder.h"
24 #include "common_stub.hpp"
26 #define CAMLRAISEMSG(msg) (sxsi_raise_msg((char*) (msg)))
28 #define XMLTREE(x) (Obj_val<XMLTree*>(x))
30 #define HSET(x) (Obj_val<TagIdSet*>(x))
32 #define XMLTREEBUILDER(x) (Obj_val<XMLTreeBuilder*>(x))
35 #define TREENODEVAL(i) ((treeNode) (Int_val(i)))
36 #define TAGVAL(i) ((TagType) (Int_val(i)))
37 #define XMLTREE_ROOT 0
42 #include <sys/resource.h>
44 #include <bp-darray.h>
48 extern "C" value caml_clz(value i)
50 return Val_long( ((sizeof(unsigned long)*8) - __builtin_clzl(Long_val(i))) - 1);
53 extern "C" value caml_leading_bit(value i)
55 return Val_long( ( 1 << (sizeof(unsigned long)*8 - __builtin_clzl(Long_val(i)) - 1)));
60 * Interface to the TextCollection
67 extern "C" value caml_text_collection_get_text(value tree, value id){
70 uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id));
71 str = caml_copy_string((const char*)txt);
75 extern "C" value caml_text_collection_empty_text(value tree,value id){
77 CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
80 bool docId_comp(DocID x, DocID y) { return x < y; }
86 extern "C" value caml_text_collection_is_prefix(value tree,value str){
88 uchar * cstr = (uchar *) String_val(str);
89 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsPrefix(cstr)));
92 extern "C" value caml_text_collection_is_suffix(value tree,value str){
94 uchar * cstr = (uchar *) String_val(str);
95 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsSuffix(cstr)));
97 extern "C" value caml_text_collection_is_equal(value tree,value str){
99 uchar * cstr = (uchar *) String_val(str);
100 CAMLreturn (Val_bool((int) XMLTREE(tree)->IsEqual(cstr)));
102 extern "C" value caml_text_collection_is_contains(value tree,value str){
103 CAMLparam2(tree,str);
104 uchar * cstr = (uchar *) String_val(str);
105 CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr)));
108 extern "C" value caml_text_collection_is_lessthan(value tree,value str){
109 CAMLparam2(tree,str);
110 uchar * cstr = (uchar *) String_val(str);
111 CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsLessThan(cstr)));
122 extern "C" value caml_text_collection_count(value tree,value str){
123 CAMLparam2(tree,str);
124 uchar * cstr = (uchar *) String_val(str);
125 CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr))));
128 extern "C" value caml_text_collection_count_prefix(value tree,value str){
129 CAMLparam2(tree,str);
130 uchar * cstr = (uchar *) String_val(str);
131 CAMLreturn (Val_int((XMLTREE(tree)->CountPrefix(cstr))));
134 extern "C" value caml_text_collection_count_suffix(value tree,value str){
135 CAMLparam2(tree,str);
136 uchar * cstr = (uchar *) String_val(str);
137 CAMLreturn (Val_int((XMLTREE(tree)->CountSuffix(cstr))));
140 extern "C" value caml_text_collection_count_equal(value tree,value str){
141 CAMLparam2(tree,str);
142 uchar * cstr = (uchar *) String_val(str);
143 CAMLreturn (Val_int((XMLTREE(tree)->CountEqual(cstr))));
146 extern "C" value caml_text_collection_count_contains(value tree,value str){
147 CAMLparam2(tree,str);
148 uchar * cstr = (uchar *) String_val(str);
149 CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr))));
152 extern "C" value caml_text_collection_count_lessthan(value tree,value str){
153 CAMLparam2(tree,str);
154 uchar * cstr = (uchar *) String_val(str);
155 CAMLreturn (Val_int((XMLTREE(tree)->CountLessThan(cstr))));
158 static value sort_alloc_array(std::vector<DocID> results, value resarray){
159 std::sort(results.begin(), results.end(), docId_comp);
160 size_t s = results.size();
161 resarray = caml_alloc_tuple(s);
162 for (size_t i = 0; i < s ;i++){
163 caml_initialize(&Field(resarray,i),Val_int(results[i]));
170 * Full reporting queries
173 extern "C" value caml_text_collection_prefix(value tree,value str){
174 CAMLparam2(tree,str);
175 CAMLlocal1(resarray);
176 uchar * cstr = (uchar *) String_val(str);
177 std::vector<DocID> results = XMLTREE(tree)->Prefix(cstr);
178 CAMLreturn (sort_alloc_array(results,resarray));
181 extern "C" value caml_text_collection_suffix(value tree,value str){
182 CAMLparam2(tree,str);
183 CAMLlocal1(resarray);
184 uchar * cstr = (uchar *) String_val(str);
185 std::vector<DocID> results = XMLTREE(tree)->Suffix(cstr);
186 CAMLreturn (sort_alloc_array(results,resarray));
189 extern "C" value caml_text_collection_equals(value tree,value str){
190 CAMLparam2(tree,str);
191 CAMLlocal1(resarray);
192 uchar * cstr = (uchar *) strdup(String_val(str));
193 std::vector<DocID> results = XMLTREE(tree)->Equals(cstr);
195 CAMLreturn (sort_alloc_array(results,resarray));
198 extern "C" value caml_text_collection_contains(value tree,value str){
199 CAMLparam2(tree,str);
200 CAMLlocal1(resarray);
201 uchar * cstr = (uchar *) String_val(str);
202 std::vector<DocID> results = XMLTREE(tree)->Contains(cstr);
203 CAMLreturn (sort_alloc_array(results,resarray));
206 extern "C" value caml_text_collection_lessthan(value tree,value str){
207 CAMLparam2(tree,str);
208 CAMLlocal1(resarray);
209 uchar * cstr = (uchar *) String_val(str);
210 std::vector<DocID> results = XMLTREE(tree)->LessThan(cstr);
211 CAMLreturn (sort_alloc_array(results,resarray));
215 ////////////////////// BP
217 extern "C" value caml_bitmap_create(value size)
220 size_t bits = Long_val(size);
221 size_t words = bits / (8*sizeof(unsigned int));
222 unsigned int *buffer = (unsigned int*) calloc(words+1, sizeof(unsigned int));
224 CAMLRAISEMSG("BP: cannot allocate memory");
225 CAMLreturn( (value) buffer);
228 extern "C" value caml_bitmap_resize(value bitmap, value nsize)
230 CAMLparam2(bitmap, nsize);
231 size_t bits = Long_val(nsize);
232 size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int);
233 unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes);
235 CAMLRAISEMSG("BP: cannot reallocate memory");
236 CAMLreturn((value) buffer);
239 extern "C" value caml_bitmap_setbit(value bitmap, value i, value b)
241 CAMLparam3(bitmap, i, b);
242 unsigned int j = Int_val(i);
243 unsigned int x = Bool_val(b);
244 bp_setbit ((unsigned int*) bitmap, j, x);
245 CAMLreturn(Val_unit);
248 extern "C" void caml_bp_delete(value b)
251 bp * B = Obj_val<bp*>(b);
256 extern "C" value caml_bp_construct(value bitmap, value npar)
258 CAMLparam2(bitmap, npar);
260 bp * b = bp_construct(Int_val(npar), (unsigned int *) bitmap, OPT_DEGREE);
261 res = sxsi_alloc_custom<bp*>(caml_bp_delete);
262 Obj_val<bp*>(res) = b;
266 extern "C" value caml_bp_first_child(value b, value idx)
269 CAMLreturn (Val_int( bp_first_child(Obj_val<bp*>(b), Int_val(idx))));
273 extern "C" value caml_bp_next_sibling(value b, value idx)
276 CAMLreturn (Val_int(bp_next_sibling(Obj_val<bp*>(b), Int_val(idx))));
279 extern "C" value caml_bp_preorder_rank(value b, value idx)
282 CAMLreturn (Val_int(bp_preorder_rank(Obj_val<bp*>(b), Int_val(idx)) - 1));
286 extern "C" value caml_bp_load(value file)
291 int f1 = Int_val(file);
293 FILE * fd = fdopen(f2, "r");
295 CAMLRAISEMSG("Error opening bp file");
298 result = sxsi_alloc_custom<bp*>(caml_bp_delete);
299 Obj_val<bp*>(result) = B;
303 extern "C" value caml_bp_save(value b, value file)
306 bp *B = Obj_val<bp*>(b);
307 int f1 = Int_val(file);
309 FILE * fd = fdopen(f2, "a");
312 CAMLRAISEMSG("Error saving bp file");
315 CAMLreturn(Val_unit);
318 extern "C" value caml_bp_alloc_stats(value unit)
321 CAMLreturn (Val_long(bp_get_alloc_stats()));