#include "XMLTree.h"
#include "XMLTreeBuilder.h"
-#include "Grammar.h"
#include "Utils.h"
#include "common_stub.hpp"
#define XMLTREEBUILDER(x) (Obj_val<XMLTreeBuilder*>(x))
-#define GRAMMAR(x) (Obj_val<Grammar*>(x))
-
#define TREENODEVAL(i) ((treeNode) (Int_val(i)))
#define TAGVAL(i) ((TagType) (Int_val(i)))
#include <sys/time.h>
#include <sys/resource.h>
#include <stdio.h>
+#include <bp-darray.h>
}
+extern "C" value caml_clz(value i)
+{
+ return Val_long( ((sizeof(unsigned long)*8) - __builtin_clzl(Long_val(i))) - 1);
+}
+
+extern "C" value caml_leading_bit(value i)
+{
+ return Val_long( ( 1 << (sizeof(unsigned long)*8 - __builtin_clzl(Long_val(i)) - 1)));
+}
/** XMLTreeBuilder bindings
*
XMLTree * tree;
try {
- tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf), String_val(name));
+ tree = XMLTree::Load(Int_val(fd), Bool_val(load_tc), Int_val(sf), String_val(name));
result = sxsi_alloc_custom<XMLTree*>();
Obj_val<XMLTree*>(result) = tree;
CAMLreturn(result);
/** Full reporting into a bit vector
*/
+static std::vector<DocID> sort_results(std::vector<DocID> v)
+{
+ std::vector<DocID> res;
+ std::sort(v.begin(), v.end());
+ DocID prev = NULLT;
+ for(auto i = v.begin(); i != v.end(); ++i){
+ while (prev == *i){
+ ++i;
+ if (i == v.end()) return res;
+ };
+ prev = *i;
+ res.push_back(prev);
+ };
+ return res;
+}
#define BV_QUERY(pref, Pref) \
- extern "C" value caml_text_collection_## pref ##_bv(value tree, value str){ \
- CAMLparam2(tree, str); \
- CAMLlocal3(res, res_bv, res_array); \
- int j; \
- uchar * cstr = (uchar *) strdup(String_val(str)); \
- std::vector<DocID> results = XMLTREE(tree)->Pref(cstr); \
- res_bv = caml_alloc_string((XMLTREE(tree)->Size() / 4) + 2); \
- unsigned long slen = caml_string_length(res_bv); \
- memset(&(Byte(res_bv,0)), 0, slen); \
- res_array = caml_alloc_shr(results.size(), 0); \
- for (unsigned int i = 0; i < results.size(); ++i) { \
- j = XMLTREE(tree)->ParentNode(results[i]); \
- Byte(res_bv, j >> 3) |= (1 << (j & 7)); \
- caml_initialize(&Field(res_array, i), Val_int(j)); \
- }; \
- free(cstr); \
- res = caml_alloc(2, 0); \
- Store_field(res, 0, res_bv); \
- Store_field(res, 1, res_array); \
- CAMLreturn(res); \
+ extern "C" value caml_text_collection_## pref ##_bv(value tree, value str, value dobvv){ \
+ CAMLparam3(tree, str, dobvv); \
+ CAMLlocal3(res, res_bv, res_array); \
+ int j; \
+ uchar * cstr = (uchar *) strdup(String_val(str)); \
+ std::vector<DocID> uresults = XMLTREE(tree)->Pref(cstr); \
+ std::vector<DocID> results = sort_results(uresults); \
+ bool dobv = Bool_val(dobvv); \
+ res_bv = caml_alloc_string(dobv ? ((XMLTREE(tree)->Size() / 4) + 2) : 0); \
+ unsigned long slen = caml_string_length(res_bv); \
+ if (dobv) \
+ memset(&(Byte(res_bv,0)), 0, slen); \
+ res_array = caml_alloc_shr(results.size(), 0); \
+ for (unsigned int i = 0; i < results.size(); ++i) { \
+ j = XMLTREE(tree)->ParentNode(results[i]); \
+ if (dobv) { \
+ Byte(res_bv, j >> 3) |= (1 << (j & 7)); \
+ }; \
+ caml_initialize(&Field(res_array, i), Val_int(j)); \
+ }; \
+ free(cstr); \
+ res = caml_alloc(2, 0); \
+ Store_field(res, 0, res_bv); \
+ Store_field(res, 1, res_array); \
+ CAMLreturn(res); \
} \
BV_QUERY(lessthan, LessThan)
+////////////////////// BP
-//////////////////////////////////////////// Grammar stuff
+extern "C" value caml_bitmap_create(value size)
+{
+ CAMLparam1(size);
+ size_t bits = Long_val(size);
+ size_t words = bits / (8*sizeof(unsigned int));
+ unsigned int *buffer = (unsigned int*) calloc(words+1, sizeof(unsigned int));
+ if (buffer == NULL)
+ CAMLRAISEMSG("BP: cannot allocate memory");
+ CAMLreturn( (value) buffer);
+}
-extern "C" value caml_grammar_load(value file, value load_bp)
+extern "C" value caml_bitmap_resize(value bitmap, value nsize)
{
- CAMLparam2(file, load_bp);
- CAMLlocal1(result);
- Grammar *grammar;
- int f1 = Int_val(file);
- int f2 = dup(f1);
- FILE * fd = fdopen(f2, "r");
- if (fd == NULL)
- CAMLRAISEMSG("Error opening grammar file");
- grammar = Grammar::load(fd, Bool_val(load_bp));
- fclose(fd);
- result = sxsi_alloc_custom<Grammar*>();
- Obj_val<Grammar*>(result) = grammar;
- CAMLreturn(result);
+ CAMLparam2(bitmap, nsize);
+ size_t bits = Long_val(nsize);
+ size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int);
+ unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes);
+ if (buffer == NULL)
+ CAMLRAISEMSG("BP: cannot reallocate memory");
+ CAMLreturn((value) buffer);
+}
+
+extern "C" value caml_bitmap_setbit(value bitmap, value i, value b)
+{
+ CAMLparam3(bitmap, i, b);
+ unsigned int j = Int_val(i);
+ unsigned int x = Bool_val(b);
+ bp_setbit ((unsigned int*) bitmap, j, x);
+ CAMLreturn(Val_unit);
+}
+
+extern "C" void caml_bp_delete(value b)
+{
+ CAMLparam1(b);
+ bp * B = Obj_val<bp*>(b);
+ bp_delete(B);
+ CAMLreturn0;
}
-extern "C" value caml_grammar_get_symbol_at(value grammar, value symbol, value preorder)
+extern "C" value caml_bp_construct(value bitmap, value npar)
{
- CAMLparam3(grammar, symbol, preorder);
- CAMLreturn(Val_long(GRAMMAR(grammar)->getSymbolAt(Long_val(symbol), Int_val(preorder))));
+ CAMLparam2(bitmap, npar);
+ CAMLlocal1(res);
+ bp * b = bp_construct(Int_val(npar), (unsigned int *) bitmap, OPT_DEGREE);
+ res = sxsi_alloc_custom<bp*>(caml_bp_delete);
+ Obj_val<bp*>(res) = b;
+ CAMLreturn(res);
+}
+extern "C" value caml_bp_first_child(value b, value idx)
+{
+ CAMLparam2(b, idx);
+ CAMLreturn (Val_int( bp_first_child(Obj_val<bp*>(b), Int_val(idx))));
}
-extern "C" value caml_grammar_first_child(value grammar, value rule, value pos)
+
+extern "C" value caml_bp_next_sibling(value b, value idx)
{
- CAMLparam1(grammar);
- CAMLreturn(Val_int(GRAMMAR(grammar)->firstChild(Long_val(rule), Int_val(pos))));
+ CAMLparam2(b, idx);
+ CAMLreturn (Val_int(bp_next_sibling(Obj_val<bp*>(b), Int_val(idx))));
}
-extern "C" value caml_grammar_next_sibling(value grammar, value rule, value pos)
+extern "C" value caml_bp_preorder_rank(value b, value idx)
{
- CAMLparam1(grammar);
- CAMLreturn(Val_int(GRAMMAR(grammar)->nextSibling(Long_val(rule), Int_val(pos))));
+ CAMLparam2(b, idx);
+ CAMLreturn (Val_int(bp_preorder_rank(Obj_val<bp*>(b), Int_val(idx)) - 1));
}
-extern "C" value caml_grammar_is_nil(value grammar, value rule)
+
+extern "C" value caml_bp_load(value file)
{
- CAMLparam1(grammar);
- CAMLreturn(Val_bool(GRAMMAR(grammar)->isNil(Long_val(rule))));
+ CAMLparam1(file);
+ CAMLlocal1(result);
+ bp *B;
+ int f1 = Int_val(file);
+ int f2 = dup(f1);
+ FILE * fd = fdopen(f2, "r");
+ if (fd == NULL)
+ CAMLRAISEMSG("Error opening bp file");
+ B = loadTree(fd);
+ fclose(fd);
+ result = sxsi_alloc_custom<bp*>(caml_bp_delete);
+ Obj_val<bp*>(result) = B;
+ CAMLreturn(result);
}
-extern "C" value caml_grammar_get_tag(value grammar, value symbol)
+extern "C" value caml_bp_save(value b, value file)
{
- CAMLparam1(grammar);
- CAMLlocal1(res);
- const char * s = (GRAMMAR(grammar)->getTagName(Long_val(symbol) >> 2)).c_str();
- res = caml_copy_string(s);
- CAMLreturn(res);
+ CAMLparam2(b, file);
+ bp *B = Obj_val<bp*>(b);
+ int f1 = Int_val(file);
+ int f2 = dup(f1);
+ FILE * fd = fdopen(f2, "a");
+ fflush(stderr);
+ if (fd == NULL)
+ CAMLRAISEMSG("Error saving bp file");
+ saveTree(B, fd);
+ fclose(fd);
+ CAMLreturn(Val_unit);
+}
+
+extern "C" value caml_bp_alloc_stats(value unit)
+{
+ CAMLparam1(unit);
+ CAMLreturn (Val_long(bp_get_alloc_stats()));
}