#include <caml/callback.h>
#include <caml/fail.h>
#include <caml/custom.h>
+
+
+#include <unistd.h>
+#include <sys/times.h>
+#include <time.h>
+#include <sys/stat.h>
+
+ struct tms t1;
+ struct tms t2;
+ double ticks = (double) sysconf(_SC_CLK_TCK)/1000;
+ void start_clock() {
+ times (&t1);
+ }
+
+
+ double stop_clock() {
+ times (&t2);
+ return (t2.tms_utime-t1.tms_utime)/ticks;
+ }
+} //extern C
-} //extern C
//#include "TextCollection/TextCollection.h"
#include "XMLDocShredder.h"
return;
}
+
extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){
CAMLparam1(uri);
CAMLlocal1(doc);
};
}
+
+
+
void traversal_rec(XMLTree* tree, treeNode id){
DocID tid;
if (id == NULLT)
return;
- // int tag = tree->Tag(id);
+ //int tag = tree->Tag(id);
if (id) {
tid = tree->PrevText(id);
char * data = (char *) (tree->getTextCollection())->GetText(tid);
}
extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){
- CAMLparam2(tree,id);
- const char* txt = (const char*) (XMLTREE(tree)->GetText((DocID) Int_val(id)));
- CAMLreturn (caml_copy_string(txt));
+ CAMLparam2(tree,id);
+ CAMLlocal1(str);
+ uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id));
+ str = caml_copy_string((const char*)txt);
+ delete (txt);
+ CAMLreturn (str);
}
+
+extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value id){
+ CAMLparam2(tree,id);
+ CAMLlocal1(str);
+ char* txt = (char*) XMLTREE(tree)->GetCachedText((DocID) Int_val(id));
+ str = caml_copy_string(txt);
+ free(txt);
+ CAMLreturn (str);
+}
+
+
extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){
CAMLparam2(tree,id);
CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id))));
uchar * cstr = (uchar *) String_val(str);
CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr))));
+}
+extern "C" CAMLprim value caml_text_collection_count(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr))));
+ CAMLreturn (Val_unit);
+
}
extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){
uchar * cstr = (uchar *) String_val(str);
std::vector<DocID> results;
results = XMLTREE(tree)->Contains(cstr);
-
+ //free(cstr);
resarray = caml_alloc_tuple(results.size());
for (unsigned int i=0; i<results.size();i++){
};
CAMLreturn (resarray);
}
+extern "C" CAMLprim value caml_text_collection_unsorted_contains(value tree,value str){
+ CAMLparam2(tree,str);
+ uchar * cstr = (uchar *) String_val(str);
+ std::vector<DocID> results;
+ start_clock();
+ results = XMLTREE(tree)->Contains(cstr);
+ double d = stop_clock();
+ std::cerr << "Internal timing " << d <<" ms\n";
+ CAMLreturn (Val_unit);
+}
extern "C" CAMLprim value caml_xml_tree_root(value tree){
CAMLparam1(tree);
- CAMLreturn (TREENODEVAL(XMLTREE(tree)->Root()));
+ CAMLreturn (Val_int(TREENODEVAL(XMLTREE(tree)->Root())));
}
extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){
CAMLparam1(tree);
CAMLparam2(tree,id);
CAMLreturn(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id))));
}
+extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){
+ CAMLparam2(tree,id);
+ CAMLreturn(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id))));
+}
+
extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){
CAMLparam2(tree,id);
- CAMLreturn(Val_int (XMLTREE(tree)->ParentNode(TREENODEVAL(id))));
+ CAMLreturn(Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_prev_doc(value tree, value id){
+ CAMLparam2(tree,id);
+ CAMLreturn(Val_int (XMLTREE(tree)->PrevNode((DocID) Int_val(id))));
}
extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) {
CAMLreturn(Val_unit);
}
+extern "C" CAMLprim value caml_xml_tree_last_child(value tree, value id){
+ CAMLparam2(tree,id);
+ CAMLreturn(Val_int (XMLTREE(tree)->LastChild(TREENODEVAL(id))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){
+ CAMLparam2(tree,id);
+ CAMLreturn(Val_bool (XMLTREE(tree)->IsFirstChild(TREENODEVAL(id))));
+}
extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){
CAMLparam2(tree,id);
CAMLreturn(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id))));
}
+extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){
+ CAMLparam3(tree,id,tag);
+ CAMLreturn(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag))));
+}
+
+
+extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){
+ CAMLparam3(tree,id,tag);
+ CAMLreturn(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag))));
+}
+extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, value tag,value root){
+ CAMLparam4(tree,id,tag,root);
+ CAMLreturn(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root))));
+}
+
+
extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){
CAMLparam2(tree,id);
CAMLreturn(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id))));
extern "C" CAMLprim value caml_xml_tree_prev_text(value tree, value id){
CAMLparam2(tree,id);
- CAMLlocal1(res);
CAMLreturn(Val_int((XMLTREE(tree)->PrevText(TREENODEVAL(id)))));
- CAMLreturn(res);
}
extern "C" CAMLprim value caml_xml_tree_next_text(value tree, value id){
CAMLparam2(tree,id);
CAMLparam2(tree,id);
CAMLreturn(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id)))));
}
-extern "C" CAMLprim value caml_xml_tree_tag(value tree, value id){
- CAMLparam2(tree,id);
- const char* tag;
- tag =(const char*) XMLTREE(tree)->GetTagName(XMLTREE(tree)->Tag(TREENODEVAL(id)));
-
- CAMLreturn (caml_copy_string(tag));
-}
extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){
CAMLparam2(tree,tagid);
- const char* tag;
- tag = (const char*) XMLTREE(tree)->GetTagName((TagType) (Int_val(tagid)));
-
- CAMLreturn (caml_copy_string(tag));
+ CAMLlocal1(str);
+ char* tag;
+ tag = (char*) XMLTREE(tree)->GetTagNameByRef((TagType) (Int_val(tagid)));
+ str = caml_copy_string((const char*) tag);
+ CAMLreturn (str);
}
CAMLreturn (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id))));
}
+extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){
+ CAMLparam3(tree,id,tag);
+ CAMLreturn (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag))));
+}
+
+
extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){
CAMLparam2(tree,str);
CAMLlocal1(id);
memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*));
CAMLreturn(doc);
}
+
+extern "C" {
+ static int caml_empty_vector[] = { 0 };
+}
+
+extern "C" CAMLprim value caml_int_vector_empty(value unit){
+ CAMLparam1(unit);
+ CAMLreturn ((value) caml_empty_vector);
+}
+
+extern "C" CAMLprim value caml_int_vector_length(value vec){
+ CAMLparam1(vec);
+ CAMLreturn (Val_int( ((int*) caml_empty_vector)[0] ));
+}
+extern "C" CAMLprim value caml_int_vector_alloc(value len){
+ CAMLparam1(len);
+ int * vec = (int *) malloc(sizeof(int)*(Int_val(len)+1));
+ vec[0] = Int_val(len);
+ CAMLreturn ((value) vec);
+}
+
+extern "C" CAMLprim value caml_int_vector_set(value vec, value i, value v){
+ CAMLparam3(vec,i,v);
+
+ ((int*) vec)[Int_val(i)+1] = Int_val(v);
+ CAMLreturn (Val_unit);
+}
+
+
+#define VECT(x) ((int*) (x))
+extern "C" CAMLprim value caml_xml_tree_select_below(value tree, value node, value ctags, value dtags){
+ CAMLparam4(tree,node,ctags,dtags);
+
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedBelow(TREENODEVAL(node),
+ &(VECT(ctags)[1]),
+ VECT(ctags)[0],
+ &(VECT(dtags)[1]),
+ VECT(dtags)[0]))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_select_next(value tree, value node, value ctags, value ftags,value root){
+ CAMLparam5(tree,node,ctags,ftags,root);
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedNext(TREENODEVAL(node),
+ &(VECT(ctags)[1]),
+ VECT(ctags)[0],
+ &(VECT(ftags)[1]),
+ VECT(ftags)[0],
+ TREENODEVAL(root)))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_select_desc_only(value tree, value node,value dtags){
+ CAMLparam3(tree,node,dtags);
+
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedDescOnly(TREENODEVAL(node),
+ &(VECT(dtags)[1]),
+ VECT(dtags)[0]))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_select_foll_only(value tree, value node, value ftags,value root){
+ CAMLparam4(tree,node,ftags,root);
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedFollOnly(TREENODEVAL(node),
+ &(VECT(ftags)[1]),
+ VECT(ftags)[0],
+ TREENODEVAL(root)))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_select_desc_or_foll_only(value tree, value node, value ftags,value root){
+ CAMLparam4(tree,node,ftags,root);
+ CAMLreturn (Val_int (
+ (XMLTREE(tree)->TaggedDescOrFollOnly(TREENODEVAL(node),
+ &(VECT(ftags)[1]),
+ VECT(ftags)[0],
+ TREENODEVAL(root)))));
+}
+
+extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){
+ CAMLparam2(tree,node);
+ CAMLlocal1(tuple);
+ tuple = caml_alloc_tuple(2);
+ range r = (XMLTREE(tree)->DocIds(TREENODEVAL(node)));
+ caml_initialize(&Field(tuple,0),Val_int(r.min));
+ caml_initialize(&Field(tuple,1),Val_int(r.max));
+ CAMLreturn (tuple);
+}