From cad5e2e2831477cba1f6211c57b9a4cc5b58bd55 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 19 Aug 2009 21:10:21 +0000 Subject: [PATCH] Added benchmarking funtions, Need to debug symbol table generaion. git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@555 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 708 +++++++++++++++++++++++++++++++++--------------- ata.ml | 170 ++++++------ ata.mli | 2 +- main.ml | 14 +- results.c | 2 +- tag.ml | 2 +- tree.ml | 266 ++++++++++-------- tree.mli | 25 +- 8 files changed, 758 insertions(+), 431 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 35d03b8..c10edf1 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -8,6 +8,11 @@ * Date: 04/11/08 */ +/*** + * Conventions: + * functions never doing any allocation (non caml_alloc*, caml_copy_string,...) + * have NOALLOC in the comment and their external declaration can have "noalloc" + */ #include @@ -30,10 +35,13 @@ extern "C" { #define CAMLRAISEMSG(msg) (caml_raise_with_string(*cpp_exception,(msg) )) #define NOT_IMPLEMENTED(s) (caml_failwith(s)) #define XMLTREE(x) ((XMLTree *)(* (XMLTree**) Data_custom_val(x))) -#define HSET(x) ((std::unordered_set*)((* (std::unordered_set**) Data_custom_val(x)))) +#define HSET(x) ((TagIdSet*)((* (TagIdSet**) Data_custom_val(x)))) #define TEXTCOLLECTION(x) #define TREENODEVAL(i) ((treeNode) (Int_val(i))) +#define TAGVAL(i) ((TagType) (Int_val(i))) #define XMLTREE_ROOT 0 +#define NoAlloc + static struct custom_operations ops; static struct custom_operations set_ops; @@ -52,7 +60,7 @@ extern "C" void caml_hset_finalize(value hblock){ return; } -extern "C" CAMLprim value caml_init_lib (value unit) { +extern "C" value caml_init_lib (value unit) { CAMLparam1(unit); if (!ops_initialized){ @@ -75,7 +83,7 @@ extern "C" CAMLprim value caml_init_lib (value unit) { CAMLreturn(Val_unit); } -extern "C" CAMLprim value caml_shredder_parse(XMLDocShredder *shredder){ +extern "C" value caml_shredder_parse(XMLDocShredder *shredder){ CAMLparam0(); CAMLlocal1(doc); XMLTree * tree; @@ -89,7 +97,7 @@ extern "C" CAMLprim value caml_shredder_parse(XMLDocShredder *shredder){ } -extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){ +extern "C" value caml_call_shredder_uri(value uri,value sf, value iet, value dtc){ CAMLparam1(uri); CAMLlocal1(doc); char *fn = String_val(uri); @@ -105,7 +113,7 @@ extern "C" CAMLprim value caml_call_shredder_uri(value uri,value sf, value iet, CAMLreturn (doc); } -extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value iet, value dtc){ +extern "C" value caml_call_shredder_string(value data,value sf, value iet, value dtc){ CAMLparam1(data); CAMLlocal1(doc); XMLDocShredder * shredder; @@ -122,18 +130,19 @@ extern "C" CAMLprim value caml_call_shredder_string(value data,value sf, value i CAMLreturn(doc); } -extern "C" CAMLprim value caml_xml_tree_save(value tree,value fd){ +extern "C" value caml_xml_tree_save(value tree,value fd){ CAMLparam2(tree,fd); XMLTREE(tree)->Save(Int_val(fd)); CAMLreturn (Val_unit); } -extern "C" CAMLprim value caml_xml_tree_load(value fd, value load_tc,value sf){ +extern "C" value caml_xml_tree_load(value fd, value load_tc,value sf){ CAMLparam3(fd,load_tc,sf); CAMLlocal1(doc); XMLTree * tree; try { tree = XMLTree::Load(Int_val(fd),Bool_val(load_tc),Int_val(sf)); + printf("Pointer to tree is %p\n", (void*) tree); doc = caml_alloc_custom(&ops,sizeof(XMLTree*),1,2); memcpy(Data_custom_val(doc),&tree,sizeof(XMLTree*)); CAMLreturn(doc); @@ -144,7 +153,16 @@ extern "C" CAMLprim value caml_xml_tree_load(value fd, value load_tc,value sf){ catch (char const * msg){ CAMLRAISEMSG(msg); }; } -extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ + +/** + * Interface to the TextCollection + */ + +/** + * Utility functions + */ + +extern "C" value caml_text_collection_get_text(value tree, value id){ CAMLparam2(tree,id); CAMLlocal1(str); uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id)); @@ -152,310 +170,499 @@ extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ CAMLreturn (str); } -extern "C" CAMLprim value caml_text_collection_get_cached_text(value tree, value id){ + +extern "C" value caml_text_collection_empty_text(value tree,value id){ CAMLparam2(tree,id); - CAMLlocal1(str); - char* txt = (char*) XMLTREE(tree)->GetText((DocID) Int_val(id)); - str = caml_copy_string(txt); - CAMLreturn (str); + CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id)))); } +bool docId_comp(DocID x, DocID y) { return x < y; }; -extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){ - CAMLparam2(tree,id); - CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id)))); +/** + * Existential queries + */ + +extern "C" value caml_text_collection_is_prefix(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_bool((int) XMLTREE(tree)->IsPrefix(cstr))); } -extern "C" CAMLprim value caml_text_collection_is_contains(value tree,value str){ +extern "C" value caml_text_collection_is_suffix(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_bool((int) XMLTREE(tree)->IsSuffix(cstr))); +} +extern "C" value caml_text_collection_is_equal(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_bool((int) XMLTREE(tree)->IsEqual(cstr))); +} +extern "C" value caml_text_collection_is_contains(value tree,value str){ CAMLparam2(tree,str); uchar * cstr = (uchar *) String_val(str); CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr))); } -extern "C" CAMLprim value caml_text_collection_count_contains(value tree,value str){ +extern "C" value caml_text_collection_is_lessthan(value tree,value str){ CAMLparam2(tree,str); uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr)))); - + CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsLessThan(cstr))); } -extern "C" CAMLprim value caml_text_collection_count(value tree,value str){ + + +/** + * Count Queries + */ + +/** + * Global counting + */ +extern "C" value caml_text_collection_count(value tree,value str){ CAMLparam2(tree,str); uchar * cstr = (uchar *) String_val(str); CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr)))); - CAMLreturn (Val_unit); - } -bool docId_comp(DocID x, DocID y) { return x < y; }; +extern "C" value caml_text_collection_count_prefix(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->CountPrefix(cstr)))); +} -extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ +extern "C" value caml_text_collection_count_suffix(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->CountSuffix(cstr)))); +} + +extern "C" value caml_text_collection_count_equal(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) String_val(str); + CAMLreturn (Val_int((XMLTREE(tree)->CountEqual(cstr)))); +} + +extern "C" value caml_text_collection_count_contains(value tree,value str){ CAMLparam2(tree,str); - CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Contains(cstr); - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - CAMLreturn (resarray); + CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr)))); } -extern "C" CAMLprim value caml_text_collection_equals(value tree,value str){ +extern "C" value caml_text_collection_count_lessthan(value tree,value str){ CAMLparam2(tree,str); - CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Equal(cstr); - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - CAMLreturn (resarray); + CAMLreturn (Val_int((XMLTREE(tree)->CountLessThan(cstr)))); } -extern "C" CAMLprim value caml_text_collection_startswith(value tree,value str){ + +static value sort_alloc_array(std::vector results, value resarray){ + std::sort(results.begin(), results.end(), docId_comp); + size_t s = results.size(); + resarray = caml_alloc_tuple(s); + for (size_t i = 0; i < s ;i++){ + caml_initialize(&Field(resarray,i),Val_int(results[i])); + }; + return resarray; +} + +/** + * Full reporting queries + */ + +extern "C" value caml_text_collection_prefix(value tree,value str){ CAMLparam2(tree,str); CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Prefix(cstr); - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - CAMLreturn (resarray); + std::vector results = XMLTREE(tree)->Prefix(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); } -extern "C" CAMLprim value caml_text_collection_endswith(value tree,value str){ + +extern "C" value caml_text_collection_suffix(value tree,value str){ CAMLparam2(tree,str); CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Suffix(cstr); - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - CAMLreturn (resarray); + std::vector results = XMLTREE(tree)->Suffix(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); } +extern "C" value caml_text_collection_equals(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) strdup(String_val(str)); + std::vector results = XMLTREE(tree)->Equals(cstr); + free(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); +} +extern "C" value caml_text_collection_contains(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(resarray); + uchar * cstr = (uchar *) String_val(str); + std::vector results = XMLTREE(tree)->Contains(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); +} -extern "C" CAMLprim value caml_text_collection_unsorted_contains(value tree,value str){ +extern "C" value caml_text_collection_lessthan(value tree,value str){ CAMLparam2(tree,str); CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); - std::vector results; - results = XMLTREE(tree)->Contains(cstr); - resarray = caml_alloc_tuple(results.size()); - for (size_t i = 0; i < results.size() ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); + std::vector results = XMLTREE(tree)->LessThan(cstr); + CAMLreturn (sort_alloc_array(results,resarray)); +} + +/** Full reporting into a bit vector + */ + +extern "C" value caml_text_collection_prefix_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + std::vector results = XMLTREE(tree)->Prefix(cstr); + std::vector *bv = new std::vector(XMLTREE(tree)->Size(),false); + for (unsigned int i=0; i < results.size(); i++) + bv->at(XMLTREE(tree)->ParentNode(results[i]))=true; + free(cstr); + CAMLreturn ((value) bv); +} + +extern "C" value caml_text_collection_suffix_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + std::vector results = XMLTREE(tree)->Suffix(cstr); + std::vector *bv = new std::vector(XMLTREE(tree)->Size(),false); + for (unsigned int i=0; i < results.size(); i++) + bv->at(XMLTREE(tree)->ParentNode(results[i]))=true; + free(cstr); + CAMLreturn ((value) bv); +} + +extern "C" value caml_text_collection_equals_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + XMLTree* xt = XMLTREE(tree); + std::vector results = xt->Equals(cstr); + std::vector *bv = new std::vector(xt->Size(),false); + for (unsigned int i=0; i < results.size(); i++) + bv->at(xt->Parent(xt->ParentNode(results[i])))=true; + free(cstr); + CAMLreturn ((value) bv); +} + + +extern "C" value caml_text_collection_contains_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + XMLTree* xt = XMLTREE(tree); + std::vector results = xt->Contains(cstr); + std::vector *bv = new std::vector(xt->Size(),false); + for (unsigned int i=0; i < results.size(); i++){ + bv->at(xt->Parent(xt->ParentNode(results[i])))=true; + } + free(cstr); + CAMLreturn ((value) bv); +} + +extern "C" value caml_text_collection_contains_bv_update(value tree,value str,value vbv){ + CAMLparam3(tree,str,vbv); + uchar * cstr = (uchar *) strdup(String_val(str)); + XMLTree* xt = XMLTREE(tree); + std::vector results = xt->Contains(cstr); + std::vector *bv = (std::vector *) vbv; + for (unsigned int i=0; i < results.size(); i++){ + /** Hack for the Techfest demo */ + (*bv)[xt->Parent(xt->Parent(xt->ParentNode(results[i])))]=true; + } + free(cstr); + CAMLreturn ((value) bv); +} +extern "C" value caml_text_collection_contains_bv_update_list(value tree,value str,value acc,value vbv,value count){ + CAMLparam4(tree,str,acc,vbv); + CAMLlocal1(head); + uchar * cstr = (uchar *) strdup(String_val(str)); + XMLTree* xt = XMLTREE(tree); + std::vector results = xt->Contains(cstr); + std::vector *bv = (std::vector *) vbv; + treeNode idx; + int acc_count = Int_val(count); + for (unsigned int i=0; i < results.size(); i++){ + idx = xt->Parent(xt->Parent(xt->ParentNode(results[i]))); + if (!(*bv)[idx]) { + (*bv)[idx]=true; + head = caml_alloc_tuple(2); + caml_initialize(&Field(head,0),Val_int(idx)); + caml_initialize(&Field(head,1),acc); + acc=head; + acc_count++; + }; }; - CAMLreturn (resarray); + free(cstr); + head = caml_alloc_tuple(3); + caml_initialize(&Field(head,0),acc); + caml_initialize(&Field(head,1),(value) bv); + caml_initialize(&Field(head,2),Val_int(acc_count)); + CAMLreturn (head); } +extern "C" value caml_text_collection_lessthan_bv(value tree,value str){ + CAMLparam2(tree,str); + uchar * cstr = (uchar *) strdup(String_val(str)); + std::vector results = XMLTREE(tree)->LessThan(cstr); + std::vector *bv = new std::vector(XMLTREE(tree)->Size(),false); + for (unsigned int i=0; i < results.size(); i++) + bv->at(XMLTREE(tree)->ParentNode(results[i]))=true; + free(cstr); + CAMLreturn ((value) bv); +} + +/*************************************************************************/ + +/** + * XMLTree bindings + * All of the functions here call the _unsafe version and implement the logics themselves + * (test for NULLT and so on). This avoids one indirection + one call when the tests fails. + */ -extern "C" CAMLprim value caml_xml_tree_root(value tree){ - CAMLparam1(tree); - CAMLreturn (Val_int(TREENODEVAL(XMLTREE_ROOT))); + +NoAlloc extern "C" value caml_xml_tree_root(value tree){ + return (Val_int(XMLTREE_ROOT)); } -extern "C" CAMLprim value caml_xml_tree_text_collection(value tree){ - CAMLparam1(tree); - CAMLreturn((value) XMLTREE(tree)->getTextCollection()); + +NoAlloc extern "C" value caml_xml_tree_size(value tree){ + return (Val_int(XMLTREE(tree)->Size())); } -extern "C" CAMLprim value caml_xml_tree_parent(value tree, value id){ - return(Val_int (XMLTREE(tree)->Parent(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_subtree_size(value tree, value node){ + return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_prev_sibling(value tree, value id){ - return(Val_int (XMLTREE(tree)->PrevSibling(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_subtree_tags(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(node), TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_parent_doc(value tree, value id){ - return (Val_int (XMLTREE(tree)->ParentNode((DocID) Int_val(id)))); +NoAlloc extern "C" value caml_xml_tree_subtree_elements(value tree, value node){ + return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_is_ancestor(value tree,value id1, value id2) { - CAMLparam3(tree,id1,id2); - CAMLreturn(Val_bool (XMLTREE(tree)->IsAncestor(TREENODEVAL(id1),TREENODEVAL(id2)))); +NoAlloc extern "C" value caml_xml_tree_is_leaf(value tree, value node){ + return (Val_bool(XMLTREE(tree)->IsLeaf(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_last_child(value tree, value id){ - return(Val_int (XMLTREE(tree)->LastChild(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_is_ancestor(value tree, value node1,value node2){ + return (Val_bool(XMLTREE(tree)->IsAncestor(TREENODEVAL(node1),TREENODEVAL(node2)))); } -extern "C" CAMLprim value caml_xml_tree_is_first_child(value tree, value id){ - return Val_bool (XMLTREE(tree)->IsFirstChild(TREENODEVAL(id))); +NoAlloc extern "C" value caml_xml_tree_is_child(value tree, value node1,value node2){ + return (Val_bool(XMLTREE(tree)->IsChild(TREENODEVAL(node1),TREENODEVAL(node2)))); } -extern "C" CAMLprim value caml_xml_tree_first_child(value tree, value id){ - return(Val_int (XMLTREE(tree)->FirstChild(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_is_first_child(value tree, value node){ + return (Val_bool(XMLTREE(tree)->IsFirstChild(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_closing(value tree, value id){ - return(Val_int (XMLTREE(tree)->Closing(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_num_children(value tree, value node){ + return (Val_int(XMLTREE(tree)->NumChildren(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_is_open(value tree, value id){ - return(Val_bool (XMLTREE(tree)->IsOpen(TREENODEVAL(id)))); + +NoAlloc extern "C" value caml_xml_tree_child_number(value tree, value node){ + return (Val_int(XMLTREE(tree)->ChildNumber(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_first_element(value tree, value id){ - return(Val_int (XMLTREE(Field(tree,0))->FirstElement(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_depth(value tree, value node){ + return (Val_int(XMLTREE(tree)->Depth(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_child(value tree, value id, value tag){ - return(Val_int (XMLTREE(tree)->TaggedChild(TREENODEVAL(id),Int_val(tag)))); +NoAlloc extern "C" value caml_xml_tree_preorder(value tree, value node){ + return (Val_int(XMLTREE(tree)->Preorder(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_next_sibling(value tree, value id){ - return(Val_int (XMLTREE(tree)->NextSibling(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_postorder(value tree, value node){ + return (Val_int(XMLTREE(tree)->Postorder(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_next_element(value tree, value id){ - return(Val_int (XMLTREE(Field(tree,0))->NextElement(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_tag(value tree, value node){ + return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_sibling(value tree, value id, value tag){ - return(Val_int (XMLTREE(tree)->TaggedFollSibling(TREENODEVAL(id),Int_val(tag)))); +extern "C" value caml_xml_tree_doc_ids(value tree, value node){ + CAMLparam2(tree,node); + CAMLlocal1(tuple); + range ids; + tuple = caml_alloc(2,0); + ids = XMLTREE(tree)->DocIds(Int_val(node)); + Store_field(tuple,0,Val_int(ids.min)); + Store_field(tuple,1,Val_int(ids.max)); + CAMLreturn (tuple); } +NoAlloc extern "C" value caml_xml_tree_parent(value tree, value node){ + return (Val_int(XMLTREE(tree)->Parent(TREENODEVAL(node)))); +} -extern "C" CAMLprim value caml_xml_tree_is_leaf(value tree, value id){ - return(Val_bool (XMLTREE(tree)->IsLeaf(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_child(value tree, value node,value idx){ + return (Val_int(XMLTREE(tree)->Child(TREENODEVAL(node),Int_val(idx)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_desc(value tree, value id, value tag){ - return(Val_int (XMLTREE(tree)->TaggedDesc(TREENODEVAL(id),(TagType) Int_val(tag)))); +NoAlloc extern "C" value caml_xml_tree_first_child(value tree, value node){ + return (Val_int(XMLTREE(tree)->FirstChild(TREENODEVAL(node)))); } +NoAlloc extern "C" value caml_xml_tree_first_element(value tree, value node){ + return (Val_int(XMLTREE(tree)->FirstElement(TREENODEVAL(node)))); +} -extern "C" CAMLprim value caml_xml_tree_tagged_foll(value tree, value id, value tag){ - return(Val_int (XMLTREE(tree)->TaggedFoll(TREENODEVAL(id),(TagType) Int_val(tag)))); +NoAlloc extern "C" value caml_xml_tree_last_child(value tree, value node){ + return (Val_int(XMLTREE(tree)->LastChild(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_foll_below(value tree, value id, value tag,value root){ - return(Val_int (XMLTREE(tree)->TaggedFollBelow(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); + +NoAlloc extern "C" value caml_xml_tree_next_sibling(value tree, value node){ + return (Val_int(XMLTREE(tree)->NextSibling(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_tagged_foll_before(value tree, value id, value tag,value root){ - return(Val_int (XMLTREE(tree)->TaggedFollBefore(TREENODEVAL(id),(TagType) Int_val(tag),TREENODEVAL(root)))); + +NoAlloc extern "C" value caml_xml_tree_next_element(value tree, value node){ + return (Val_int(XMLTREE(tree)->NextElement(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_my_text(value tree, value id){ - return(Val_int((XMLTREE(tree)->MyText(TREENODEVAL(id))))); +NoAlloc extern "C" value caml_xml_tree_prev_sibling(value tree, value node){ + return (Val_int(XMLTREE(tree)->PrevSibling(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_my_text_unsafe(value tree, value id){ - return(Val_int((XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(id))))); +NoAlloc extern "C" value caml_xml_tree_tagged_child(value tree, value node,value tag){ + return (Val_int(XMLTREE(tree)->TaggedChild(TREENODEVAL(node),TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_text_xml_id(value tree, value id){ - return(Val_int((XMLTREE(tree)->TextXMLId(TREENODEVAL(id))))); +NoAlloc extern "C" value caml_xml_tree_select_child(value tree, value node,value tags){ + return (Val_int(XMLTREE(tree)->SelectChild(TREENODEVAL(node), HSET(tags)))); } -extern "C" CAMLprim value caml_xml_tree_node_xml_id(value tree, value id){ - return(Val_int((XMLTREE(tree)->NodeXMLId(TREENODEVAL(id))))); + +NoAlloc extern "C" value caml_xml_tree_tagged_following_sibling(value tree, value node,value tag){ + return (Val_int(XMLTREE(tree)->TaggedFollowingSibling(TREENODEVAL(node),TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_tag_name(value tree, value tagid){ - CAMLparam2(tree,tagid); - CAMLlocal1(str); - char* tag; - tag = (char*) XMLTREE(tree)->GetTagNameByRef((TagType) (Int_val(tagid))); - str = caml_copy_string((const char*) tag); - CAMLreturn (str); +NoAlloc extern "C" value caml_xml_tree_select_following_sibling(value tree, value node,value tags){ + return (Val_int(XMLTREE(tree)->SelectFollowingSibling(TREENODEVAL(node), HSET(tags)))); } +NoAlloc extern "C" value caml_xml_tree_tagged_descendant(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedDescendant(TREENODEVAL(node), TAGVAL(tag)))); +} -extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){ - return (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_select_descendant(value tree, value node, value tags){ + return (Val_int(XMLTREE(tree)->SelectDescendant(TREENODEVAL(node), HSET(tags)))); } -extern "C" CAMLprim value caml_xml_tree_subtree_tags(value tree,value id,value tag){ - return (Val_int(XMLTREE(tree)->SubtreeTags(TREENODEVAL(id),Int_val(tag)))); +NoAlloc extern "C" value caml_xml_tree_tagged_preceding(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedPreceding(TREENODEVAL(node), TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_subtree_size(value tree,value id){ - return (Val_int(XMLTREE(tree)->SubtreeSize(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_tagged_following(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedFollowing(TREENODEVAL(node), TAGVAL(tag)))); } -extern "C" CAMLprim value caml_xml_tree_subtree_elements(value tree,value id){ - return (Val_int(XMLTREE(tree)->SubtreeElements(TREENODEVAL(id)))); +NoAlloc extern "C" value caml_xml_tree_tagged_following_below(value tree, value node, value tag, value ancestor){ + return (Val_int(XMLTREE(tree)->TaggedFollowingBelow(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(ancestor)))); } +NoAlloc extern "C" value caml_xml_tree_select_following_below(value tree, value node, value tags, value ancestor){ + return (Val_int(XMLTREE(tree)->SelectFollowingBelow(TREENODEVAL(node), HSET(tags), TREENODEVAL(ancestor)))); +} -extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ - CAMLparam2(tree,str); - CAMLlocal1(id); - unsigned char* tag; - tag = (unsigned char*) (String_val(str)); - id = Val_int(XMLTREE(tree)->RegisterTag(tag)); - CAMLreturn (id); +NoAlloc extern "C" value caml_xml_tree_tagged_following_before(value tree, value node, value tag, value closing){ + return (Val_int(XMLTREE(tree)->TaggedFollowingBefore(TREENODEVAL(node), TAGVAL(tag), TREENODEVAL(closing)))); } -extern "C" CAMLprim value caml_xml_tree_nullt(value unit){ - return (NULLT); +NoAlloc extern "C" value caml_xml_tree_select_following_before(value tree, value node, value tags, value closing){ + return (Val_int(XMLTREE(tree)->SelectFollowingBefore(TREENODEVAL(node), HSET(tags), TREENODEVAL(closing)))); } -extern "C" CAMLprim value caml_unordered_set_length(value hset){ - CAMLparam1(hset); - CAMLreturn (Val_int((HSET(hset))->size())); +NoAlloc extern "C" value caml_xml_tree_tagged_ancestor(value tree, value node, value tag){ + return (Val_int(XMLTREE(tree)->TaggedAncestor(TREENODEVAL(node), TAGVAL(tag)))); } -extern "C" CAMLprim value caml_unordered_set_alloc(value len){ - CAMLparam1(len); - CAMLlocal1(hset); - hset = caml_alloc_custom(&set_ops,sizeof(std::unordered_set*),1,2); - std::unordered_set* ht = new std::unordered_set(); - memcpy(Data_custom_val(hset),&ht,sizeof(std::unordered_set*)); - CAMLreturn (hset); +NoAlloc extern "C" value caml_xml_tree_my_text(value tree, value node){ + return (Val_int(XMLTREE(tree)->MyText(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_unordered_set_set(value vec, value v){ - HSET(vec)->insert((int) Int_val(v)); - return (Val_unit); +NoAlloc extern "C" value caml_xml_tree_my_text_unsafe(value tree, value node){ + return (Val_int(XMLTREE(tree)->MyTextUnsafe(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_select_desc(value tree, value node, value tags){ - return (Val_int (XMLTREE(tree)->SelectDesc(TREENODEVAL(node), - HSET(tags)))); +NoAlloc extern "C" value caml_xml_tree_text_xml_id(value tree, value docid){ + return (Val_int(XMLTREE(tree)->TextXMLId(Int_val(docid)))); } -extern "C" CAMLprim value caml_xml_tree_select_child(value tree, value node, value tags){ - return (Val_int (XMLTREE(tree)->SelectChild(TREENODEVAL(node), - HSET(tags)))); + +NoAlloc extern "C" value caml_xml_tree_node_xml_id(value tree, value node){ + return (Val_int(XMLTREE(tree)->NodeXMLId(TREENODEVAL(node)))); } -extern "C" CAMLprim value caml_xml_tree_select_foll_sibling(value tree, value node, value tags){ - return (Val_int (XMLTREE(tree)->SelectFollSibling(TREENODEVAL(node), - HSET(tags)))); + +NoAlloc extern "C" value caml_xml_tree_parent_node(value tree, value docid){ + return (Val_int(XMLTREE(tree)->ParentNode(Int_val(docid)))); } -extern "C" CAMLprim value caml_xml_tree_select_foll_below(value tree, value node, value tags,value ctx){ - return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), - HSET(tags), - TREENODEVAL(ctx)))); +/* +NoAlloc extern "C" value caml_xml_tree_prev_node(value tree, value docid){ + return (Val_int(XMLTREE(tree)->PrevNode(Int_val(docid)))); } -extern "C" CAMLprim value caml_xml_tree_select_foll_before(value tree, value node, value tags,value ctx){ - return (Val_int (XMLTREE(tree)->SelectFollBelow(TREENODEVAL(node), - HSET(tags), - TREENODEVAL(ctx)))); +*/ +extern "C" value caml_xml_tree_get_tag_id(value tree, value tagname){ + CAMLparam2(tree,tagname); + CAMLlocal1(res); + unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname)); + res = Val_int(XMLTREE(tree)->GetTagId(ctagname)); + free(ctagname); + CAMLreturn(res); } +extern "C" value caml_xml_tree_get_tag_name(value tree, value tag){ + CAMLparam2(tree,tag); + CAMLlocal1(res); + res = caml_copy_string((const char*) XMLTREE(tree)->GetTagNameByRef(TAGVAL(tag))); + CAMLreturn(res); +} -extern "C" CAMLprim value caml_xml_tree_doc_ids(value tree, value node){ - CAMLparam2(tree,node); - CAMLlocal1(tuple); - tuple = caml_alloc_tuple(2); - range r = (XMLTREE(tree)->DocIds(TREENODEVAL(node))); - caml_initialize(&Field(tuple,0),Val_int(r.min)); - caml_initialize(&Field(tuple,1),Val_int(r.max)); - CAMLreturn (tuple); +extern "C" value caml_xml_tree_register_tag(value tree, value tagname){ + CAMLparam2(tree,tagname); + CAMLlocal1(res); + unsigned char* ctagname = (unsigned char*) strdup(String_val(tagname)); + res = Val_int(XMLTREE(tree)->RegisterTag(ctagname)); + free(ctagname); + CAMLreturn(res); +} + + +NoAlloc extern "C" value caml_xml_tree_get_text_collection(value tree){ + return((value) XMLTREE(tree)->getTextCollection()); +} + +NoAlloc extern "C" value caml_xml_tree_closing(value tree, value node){ + return (Val_int(XMLTREE(tree)->Closing(TREENODEVAL(node)))); +} + +NoAlloc extern "C" value caml_xml_tree_is_open(value tree, value node){ + return (Val_bool(XMLTREE(tree)->IsOpen(TREENODEVAL(node)))); +} + + + +NoAlloc extern "C" value caml_xml_tree_nullt(value unit){ + return (NULLT); } -extern "C" value caml_result_set_create(value size){ +NoAlloc extern "C" value caml_unordered_set_length(value hset){ + return (Val_int((HSET(hset))->size())); +} + +extern "C" value caml_unordered_set_alloc(value unit){ + CAMLparam1(unit); + CAMLlocal1(hset); + hset = caml_alloc_custom(&set_ops,sizeof(TagIdSet*),1,2); + TagIdSet* ht = new TagIdSet(); + memcpy(Data_custom_val(hset),&ht,sizeof(TagIdSet*)); + CAMLreturn (hset); +} + +NoAlloc extern "C" value caml_unordered_set_set(value set, value v){ + HSET(set)->insert((int) Int_val(v)); + return (Val_unit); +} + +NoAlloc extern "C" value caml_result_set_create(value size){ results* res = (results*) malloc(sizeof(results)); results r = createResults (Int_val(size)); res->n = r.n; @@ -464,53 +671,132 @@ extern "C" value caml_result_set_create(value size){ return ((value) (res)); } -extern "C" CAMLprim value caml_result_set_set(value result,value p){ - CAMLparam1(p); +NoAlloc extern "C" value caml_result_set_set(value result,value p){ setResult ( *((results*) result), Int_val(p)); - CAMLreturn (Val_unit); + return (Val_unit); } -extern "C" CAMLprim value caml_result_set_clear(value result,value p1,value p2){ - CAMLparam2(p1,p2); +NoAlloc extern "C" value caml_result_set_clear(value result,value p1,value p2){ clearRange ( *((results*) result), Int_val(p1), Int_val(p2)); - CAMLreturn (Val_unit); + return (Val_unit); } -extern "C" CAMLprim value caml_result_set_next(value result,value p){ - CAMLparam1(p); +NoAlloc extern "C" value caml_result_set_next(value result,value p){ results r; r = *( (results *) result); - CAMLreturn (Val_int(nextResult(r, Int_val(p)))); + return (Val_int(nextResult(r, Int_val(p)))); } -extern "C" CAMLprim value caml_result_set_count(value result){ - CAMLparam0(); +NoAlloc extern "C" value caml_result_set_count(value result){ results r; r = *( (results *) result); - CAMLreturn (Val_int(countResult(r))); + return (Val_int(countResult(r))); } -extern "C" CAMLprim value caml_xml_tree_print(value tree,value node,value fd){ +NoAlloc extern "C" value caml_xml_tree_print(value tree,value node,value fd){ CAMLparam3(tree,node,fd); XMLTREE(tree)->Print(Int_val(fd),TREENODEVAL(node)); CAMLreturn(Val_unit); } -extern "C" CAMLprim value caml_set_tag_bits(value result, value tag, value tree, value node) +NoAlloc extern "C" value caml_set_tag_bits(value result, value tag, value tree, value node) { - CAMLparam3(tag,tree,node); results r; XMLTree *t = XMLTREE(Field(tree,0)); treeNode opening = TREENODEVAL(node); treeNode closing = t->Closing(opening); TagType target_tag = Int_val(tag); - treeNode first = t->TaggedDesc(opening,target_tag); + treeNode first = t->TaggedDescendant(opening,target_tag); r = *( (results *) result); opening = first; while (opening != NULLT){ setResult(r,opening); - opening = t->TaggedFollBefore(opening,target_tag,closing); + opening = t->TaggedFollowingBefore(opening,target_tag,closing); }; - CAMLreturn(Val_int(first)); + return(Val_int(first)); } + +NoAlloc extern "C" value caml_bit_vector_create(value size){ + return (value) (new vector(Int_val(size),false)); +} + +NoAlloc extern "C" value caml_bit_vector_free(value vect){ + delete ((vector*) vect); + return Val_unit; +} + +NoAlloc extern "C" value caml_bit_vector_get(value vect,value idx){ + return Val_bool (((vector*)vect)->at(Int_val(idx))); +} + +NoAlloc extern "C" value caml_bit_vector_set(value vect,value idx,value b){ + (((vector*)vect)->at(Int_val(idx))) = (bool) Bool_val(b); + return Val_unit; +} + +NoAlloc extern "C" value caml_bit_vector_next(value vect,value idx){ + vector* bv = (vector*) vect; + int i = Int_val(idx); + int l = bv->size(); + while (i < l && !((*bv)[i])) + i++; + return Val_int(i); +} +NoAlloc extern "C" value caml_bit_vector_prev(value vect,value idx){ + int i = Int_val(idx); + while (i >= 0 && !((*((vector*) vect))[i])) + i--; + return Val_int(i); +} + +extern "C" value caml_bit_vector_node_array(value vect){ + CAMLparam0(); + CAMLlocal1(res); + vector* bv = (vector*) vect; + vector vr; + int l = bv->size(); + int i = 0; + while (i < l){ + if ((*bv)[i]) vr.push_back(i); + i++; + }; + l = vr.size(); + res = caml_alloc_tuple(l); + for(i=0;iTaggedDescendant(node,tag),tag); + iterjump(tree,tree->TaggedFollowing(node,tag),tag); + return; + }; +} + +extern "C" value caml_benchmark_jump(value tree,value tag){ + iterjump(XMLTREE(tree),0, Int_val(tag)); + return Val_unit; +} + +int iterfsns(XMLTree* tree, treeNode node){ + if (node == NULLT) + return 0; + else { + int x = tree->Tag(node); + x += iterfsns(tree,tree->FirstChild(node)); + x += iterfsns(tree,tree->NextSibling(node)); + return x; + }; +} + +extern "C" value caml_benchmark_fsns(value tree){ + iterfsns(XMLTREE(tree),0); + return Val_unit; + +} diff --git a/ata.ml b/ata.ml index a5d4a3f..77d5008 100644 --- a/ata.ml +++ b/ata.ml @@ -337,8 +337,8 @@ module FTable = Hashtbl.Make(struct let h_f = FTable.create BIG_H_SIZE -type merge_conf = NO | MARK | ONLY1 | ONLY2 | ONLY12 | MARK1 | MARK2 | MARK12 - +type merge_conf = NO | ONLY1 | ONLY2 | ONLY12 | MARK | MARK1 | MARK2 | MARK12 +(* 000 001 010 011 100 101 110 111 *) let eval_formlist tag s1 s2 fl = let rec loop fl = try @@ -447,14 +447,14 @@ let tags_of_state a q = else 0 let merge conf t res1 res2 = match conf with - NO -> 0 + NO -> 0 | MARK -> 1 - | ONLY12 -> res1+res2 - | ONLY1 -> res1 - | ONLY2 -> res2 - | MARK12 -> res1+res2+1 - | MARK1 -> res1+1 - | MARK2 -> res2+1 + | MARK1 -> res1+1 + | ONLY1 -> res1 + | ONLY2 -> res2 + | ONLY12 -> res1+res2 + | MARK2 -> res2+1 + | MARK12 -> res1+res2+1 let mk_quick_tag_loop _ sl ss tree tag = (); fun t ctx -> @@ -797,8 +797,8 @@ END (mk_fun (fun _ -> Tree.nil) "Tree.mk_nil") (mk_fun (Tree.tagged_child tree) "Tree.tagged_child") (mk_fun (Tree.select_child tree) "Tree.select_child") - (mk_fun (Tree.tagged_desc tree) "Tree.tagged_desc") - (mk_fun (Tree.select_desc tree) "Tree.select_desc") + (mk_fun (Tree.tagged_descendant tree) "Tree.tagged_desc") + (mk_fun (Tree.select_descendant tree) "Tree.select_desc") (mk_fun (fun _ _ -> Tree.first_child tree) "[FIRSTCHILD]Tree.select_child_desc") (mk_fun (Tree.first_element tree) "Tree.first_element") (mk_fun (Tree.first_child tree) "Tree.first_child") @@ -806,13 +806,13 @@ END let choose_jump_next tree d = choose_jump d (mk_fun (fun _ _ -> Tree.nil) "Tree.mk_nil2") - (mk_fun (Tree.tagged_sibling_ctx tree) "Tree.tagged_sibling_ctx") - (mk_fun (Tree.select_sibling_ctx tree) "Tree.select_sibling_ctx") - (mk_fun (Tree.tagged_foll_ctx tree) "Tree.tagged_foll_ctx") - (mk_fun (Tree.select_foll_ctx tree) "Tree.select_foll_ctx") - (mk_fun (fun _ _ -> Tree.next_sibling_ctx tree) "[NEXTSIBLING]Tree.select_sibling_foll_ctx") - (mk_fun (Tree.next_element_ctx tree) "Tree.next_element_ctx") - (mk_fun (Tree.next_sibling_ctx tree) "Tree.node_sibling_ctx") + (mk_fun (Tree.tagged_following_sibling_below tree) "Tree.tagged_sibling_ctx") + (mk_fun (Tree.select_following_sibling_below tree) "Tree.select_sibling_ctx") + (mk_fun (Tree.tagged_following_below tree) "Tree.tagged_foll_ctx") + (mk_fun (Tree.select_following_below tree) "Tree.select_foll_ctx") + (mk_fun (fun _ _ -> Tree.next_sibling_below tree) "[NEXTSIBLING]Tree.select_sibling_foll_ctx") + (mk_fun (Tree.next_element_below tree) "Tree.next_element_ctx") + (mk_fun (Tree.next_sibling_below tree) "Tree.node_sibling_ctx") module SListTable = Hashtbl.Make(struct type t = SList.t @@ -878,7 +878,18 @@ END in set tab tag data end - + + module TransCache2 = struct + include Hashtbl.Make (struct + type t = Tag.t*SList.t + let equal (a,b) (c,d) = a==c && b==d + let hash (a,b) = HASHINT2((Obj.magic a), b.SList.Node.id) + end) + + let add h t s d = add h (t,s) d + let find h t s = find h (t,s) + end + let td_trans = TransCache.create 10000 (* should be number of tags *number of states^2 in the document *) @@ -892,35 +903,6 @@ END let hash t = t.Formlistlist.Node.id end) - module Fold2ResOld = - struct - type 'a t = 'a SListTable.t SListTable.t FllTable.t - let create n = Array.init 10000 (fun _ -> FllTable.create n) - - let find h tag fl s1 s2 = - let hf = h.(tag) in - let hs1 = FllTable.find hf fl in - let hs2 = SListTable.find hs1 s1 in - SListTable.find hs2 s2 - - let add h tag fl s1 s2 data = - let hf = h.(tag) in - let hs1 = - try FllTable.find hf fl with - | Not_found -> - let hs1 = SListTable.create SMALL_H_SIZE - in FllTable.add hf fl hs1;hs1 - in - let hs2 = - try SListTable.find hs1 s1 - with - | Not_found -> - let hs2 = SListTable.create SMALL_H_SIZE - in SListTable.add hs1 s1 hs2;hs2 - in - SListTable.add hs2 s2 data - end - module Fold2Res = struct external get : 'a array -> int ->'a = "%array_unsafe_get" external set : 'a array -> int -> 'a -> unit = "%array_unsafe_set" @@ -976,7 +958,20 @@ END in set as2 s2.SList.Node.id data end - + + module Fold2Res2 = struct + include Hashtbl.Make(struct + type t = Tag.t*Formlistlist.t*SList.t*SList.t + let equal (a,b,c,d) (x,y,z,t) = + a == x && b == y && c == z && d == t + let hash (a,b,c,d) = HASHINT4 (a,b.Formlistlist.Node.id, + c.SList.Node.id,d.SList.Node.id) + end) + let add h t f s1 s2 d = + add h (t,f,s1,s2) d + let find h t f s1 s2 = + find h (t,f,s1,s2) + end let h_fold2 = Fold2Res.create 10000 @@ -986,35 +981,37 @@ END (* evaluation starts from the right so we put sl1,res1 at the end *) let eval_fold2_slist fll t tag (sl2,res2) (sl1,res1) = let res = Array.copy rempty in - try - let r,b,btab = Fold2Res.find h_fold2 tag fll sl1 sl2 in - if b then for i=0 to slot_size - 1 do - res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); - done; - r,res - with - Not_found -> - let btab = Array.make slot_size NO in - let rec fold l1 l2 fll i aq ab = - match fll.Formlistlist.Node.node, - l1.SList.Node.node, - l2.SList.Node.node - with - | Formlistlist.Cons(fl,fll), - SList.Cons(s1,ll1), - SList.Cons(s2,ll2) -> - let r',conf = eval_formlist tag s1 s2 fl in - let _ = btab.(i) <- conf + try + let r,b,btab = Fold2Res.find h_fold2 tag fll sl1 sl2 in + if b then for i=0 to slot_size - 1 do + res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); + done; + r,res + with + Not_found -> + begin + let btab = Array.make slot_size NO in + let rec fold l1 l2 fll i aq ab = + match fll.Formlistlist.Node.node, + l1.SList.Node.node, + l2.SList.Node.node + with + | Formlistlist.Cons(fl,fll), + SList.Cons(s1,ll1), + SList.Cons(s2,ll2) -> + let r',conf = eval_formlist tag s1 s2 fl in + let _ = btab.(i) <- conf in - fold ll1 ll2 fll (i+1) (SList.cons r' aq) ((conf!=NO)||ab) - | _ -> aq,ab - in - let r,b = fold sl1 sl2 fll 0 SList.nil false in - Fold2Res.add h_fold2 tag fll sl1 sl2 (r,b,btab); - if b then for i=0 to slot_size - 1 do - res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); - done; - r,res + fold ll1 ll2 fll (i+1) (SList.cons r' aq) ((conf!=NO)||ab) + | _ -> aq,ab + in + let r,b = fold sl1 sl2 fll 0 SList.nil false in + Fold2Res.add h_fold2 tag fll sl1 sl2 (r,b,btab); + if b then for i=0 to slot_size - 1 do + res.(i) <- RS.merge btab.(i) t res1.(i) res2.(i); + done; + r,res; + end in let null_result = (pempty,Array.copy rempty) in @@ -1029,7 +1026,7 @@ END try TransCache.find td_trans tag slist with - | Not_found -> + | Not_found -> let fl_list,llist,rlist,ca,da,sa,fa = SList.fold (fun set (fll_acc,lllacc,rllacc,ca,da,sa,fa) -> (* For each set *) @@ -1067,7 +1064,10 @@ END let d_n = Algebra.decide a tags_siblings tags_after (StateSet.union sa fa) false in let f_kind,first = choose_jump_down tree d_f and n_kind,next = if noright then (`NIL, fun _ _ -> Tree.nil ) - else choose_jump_next tree d_n in + else choose_jump_next tree d_n in + (*let f_kind,first = `ANY, Tree.first_child tree + and n_kind,next = `ANY, Tree.next_sibling_below tree + in *) let empty_res = null_result in let cont = match f_kind,n_kind with @@ -1096,13 +1096,13 @@ END | `NIL,_ -> ( match n_kind with |`TAG(tag') -> - if SList.equal rlist slist && tag == tag' then + (*if SList.equal rlist slist && tag == tag' then let rec loop t ctx = if t == Tree.nil then empty_res else let res2 = loop (next t ctx) ctx in eval_fold2_slist fl_list t tag res2 empty_res in loop - else + else *) (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop_tag tag' (next t ctx) rlist ctx ) empty_res) @@ -1130,7 +1130,7 @@ END (loop (first t) llist t )) | `ANY,`ANY -> - if SList.equal slist rlist && SList.equal slist llist + (*if SList.equal slist rlist && SList.equal slist llist then let rec loop t ctx = if t == Tree.nil then empty_res else @@ -1139,7 +1139,7 @@ END in eval_fold2_slist fl_list t (Tree.tag tree t) r2 r1 in loop - else + else *) (fun t ctx -> eval_fold2_slist fl_list t (Tree.tag tree t) (loop (next t ctx) rlist ctx ) @@ -1157,7 +1157,7 @@ END (a,b) ) ,cont) in - (TransCache.add td_trans tag slist cont ;cont) + ( TransCache.add td_trans tag slist cont ; cont) in cont t ctx in @@ -1371,7 +1371,7 @@ END match k with | `TAG (tag) -> (*Tree.tagged_lowest t tag, fun tree -> Tree.tagged_next tree tag*) - (Tree.tagged_desc tree tag t, let jump = Tree.tagged_foll_ctx tree tag + (Tree.tagged_descendant tree tag t, let jump = Tree.tagged_following_below tree tag in fun n -> jump n t ) | `CONTAINS(_) -> (Tree.text_below tree t,let jump = Tree.text_next tree in fun n -> jump n t) diff --git a/ata.mli b/ata.mli index a5fb3e1..13e8431 100644 --- a/ata.mli +++ b/ata.mli @@ -4,7 +4,7 @@ sig include Sigs.T with type t = int val make : unit -> t end -type merge_conf = NO | MARK | ONLY1 | ONLY2 | ONLY12 | MARK1 | MARK2 | MARK12 +type merge_conf = NO | ONLY1 | ONLY2 | ONLY12 | MARK | MARK1 | MARK2 | MARK12 module StateSet : sig include Ptset.S with type elt = int diff --git a/main.ml b/main.ml index c889833..342bb2f 100644 --- a/main.ml +++ b/main.ml @@ -16,10 +16,10 @@ let disabled_gc = { Gc.get() with let hash x = 131*x/(x-1+1) let test_loop tree tag = - let t' = Tree.tagged_desc tree tag Tree.root in + let t' = Tree.tagged_descendant tree tag Tree.root in let f = Hashtbl.create 4096 in - let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in + let jump t _ = Tree.tagged_following_below tree tag t Tree.root in let g t ctx = if t == Tree.nil then 0 else 1+ ((Hashtbl.find f (hash 101)) (jump t ctx) ctx) @@ -41,10 +41,10 @@ let test_full tree = let test_loop2 tree tag = - let t' = Tree.tagged_desc tree tag Tree.root in + let t' = Tree.tagged_descendant tree tag Tree.root in let f = Hashtbl.create 4096 in - let jump t _ = Tree.tagged_foll_ctx tree tag t Tree.root in + let jump t _ = Tree.tagged_following_below tree tag t Tree.root in let rec g t ctx = if t == Tree.nil then 0 else 1+ (match (Hashtbl.find f (hash 101)) with @@ -64,6 +64,12 @@ let main v query_string output = with Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in + let _ = Printf.eprintf "Number of nodes %i\n%!" (Tree.size v) in + let _ = Printf.eprintf "Timing first_child/next_sibling %!" in + let _ = time (Tree.benchmark_fsns) v in + let _ = Printf.eprintf "Timing jump to a %!" in + let _ = time (Tree.benchmark_jump v) (Tag.tag "a") in + (* let _ = Printf.eprintf "Timing //keyword :" in let r = time (test_loop v) (Tag.tag "keyword") in let _ = Printf.eprintf "Count is %i\n%!" r in diff --git a/results.c b/results.c index 7570e5a..1f1deda 100644 --- a/results.c +++ b/results.c @@ -212,7 +212,7 @@ int nextResult (results R, int p) // returns pos of next(p) or -1 if none // Naively implemented by kim unsigned int countResult(results R) { - unsigned int result = 0; + unsigned int result = -1; int i = 0; while ( i != -1 && i < R.n) { result ++; diff --git a/tag.ml b/tag.ml index ec80df4..139ff91 100644 --- a/tag.ml +++ b/tag.ml @@ -13,7 +13,7 @@ type pool external null_pool : unit -> pool = "caml_xml_tree_nullt" external null_tag : unit -> t = "caml_xml_tree_nullt" external register_tag : pool -> string -> t = "caml_xml_tree_register_tag" -external tag_name : pool -> t -> string = "caml_xml_tree_tag_name" +external tag_name : pool -> t -> string = "caml_xml_tree_get_tag_name" let nullt = null_tag () (* Defined in XMLTree.cpp *) diff --git a/tree.ml b/tree.ml index f21015d..86f7e5b 100644 --- a/tree.ml +++ b/tree.ml @@ -43,84 +43,112 @@ let nil : [`Tree ] node = nodei ~-1 let nulldoc : [`Text ] node = nodei ~-1 let root : [`Tree ] node = nodei 0 -external text_get_tc_text : tree -> [`Text] node -> string = "caml_text_collection_get_text" - +external text_get_text : tree -> [`Text] node -> string = "caml_text_collection_get_text" external text_is_empty : tree -> [`Text ] node -> bool = "caml_text_collection_empty_text" let text_is_empty t n = (equal_node nulldoc n) || text_is_empty t n +external text_is_prefix : tree -> string -> bool = "caml_text_collection_is_prefix" +external text_is_suffix : tree -> string -> bool = "caml_text_collection_is_suffix" +external text_is_equal : tree -> string -> bool = "caml_text_collection_is_equal" external text_is_contains : tree -> string -> bool = "caml_text_collection_is_contains" -external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains" -external text_count : tree -> string -> int = "caml_text_collection_count" -external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" -external text_startswith : tree -> string -> [`Text ] node array = "caml_text_collection_startswith" -external text_endswith : tree -> string -> [`Text ] node array = "caml_text_collection_endswith" -external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals" -external text_unsorted_contains : tree -> string -> [`Text ] node array = "caml_text_collection_unsorted_contains" -external text_get_cached_text : tree -> [`Text] node -> string = "caml_text_collection_get_cached_text" +external text_is_lessthan : tree -> string -> bool = "caml_text_collection_is_lessthan" + +external text_count : tree -> string -> int = "caml_text_collection_count" +external text_count_prefix : tree -> string -> int = "caml_text_collection_count_prefix" +external text_count_suffix : tree -> string -> int = "caml_text_collection_count_suffix" +external text_count_equal : tree -> string -> int = "caml_text_collection_count_equal" +external text_count_contains : tree -> string -> int = "caml_text_collection_count_contains" +external text_count_lessthan : tree -> string -> int = "caml_text_collection_count_lessthan" + +external text_prefix : tree -> string -> [`Text ] node array = "caml_text_collection_prefix" +external text_suffix : tree -> string -> [`Text ] node array = "caml_text_collection_suffix" +external text_equals : tree -> string -> [`Text ] node array = "caml_text_collection_equals" +external text_contains : tree -> string -> [`Text ] node array = "caml_text_collection_contains" +external text_lessthan : tree -> string -> [`Text ] node array = "caml_text_collection_lessthan" + -external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" -external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" -external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" - +external tree_root : tree -> [`Tree] node = "caml_xml_tree_root" "noalloc" +external tree_size : tree -> int = "caml_xml_tree_size" "noalloc" +external tree_subtree_size : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_size" "noalloc" +external tree_subtree_elements : tree -> [`Tree] node -> int = "caml_xml_tree_subtree_elements" "noalloc" +external tree_subtree_tags : tree -> [`Tree] node -> Tag.t -> int = "caml_xml_tree_subtree_elements" "noalloc" + let tree_is_nil x = equal_node x nil +external tree_is_leaf : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_leaf" "noalloc" +external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc" +external tree_is_child : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_child" "noalloc" +external tree_is_first_child : tree -> [`Tree ] node -> bool = "caml_xml_tree_is_first_child" "noalloc" +external tree_num_children : tree -> [`Tree ] node -> int = "caml_xml_tree_num_children" "noalloc" +external tree_child_number : tree -> [`Tree ] node -> int = "caml_xml_tree_child_number" "noalloc" +external tree_depth : tree -> [`Tree ] node -> int = "caml_xml_tree_depth" "noalloc" +external tree_preorder : tree -> [`Tree ] node -> int = "caml_xml_tree_preorder" "noalloc" +external tree_postorder : tree -> [`Tree ] node -> int = "caml_xml_tree_postorder" "noalloc" +external tree_tag : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" "noalloc" +external tree_doc_ids : tree -> [`Tree ] node -> [`Text] node*[`Text] node = "caml_xml_tree_doc_ids" external tree_parent : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_parent" "noalloc" -external tree_parent_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_doc" "noalloc" -(*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *) +external tree_child : tree -> [`Tree] node -> int -> [`Tree] node = "caml_xml_tree_child" "noalloc" external tree_first_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_child" "noalloc" +external tree_first_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" +external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" "noalloc" +external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc" +external tree_next_element : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc" +external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc" +external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc" + +type unordered_set +external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc" +external unordered_set_length : unordered_set -> int = "caml_unordered_set_length" +external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc" + +external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_child" "noalloc" +external tree_tagged_following_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_following_sibling" "noalloc" +external tree_select_following_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_following_sibling" "noalloc" +external tree_tagged_descendant : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_descendant" "noalloc" +external tree_select_descendant : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_descendant" "noalloc" +external tree_tagged_following : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_following" "noalloc" +external tree_tagged_following_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_below" "noalloc" +external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_below" "noalloc" + + +external tree_tagged_following_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_following_before" "noalloc" +external tree_select_following_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_following_before" "noalloc" + +external tree_my_text : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text" "noalloc" +external tree_my_text_unsafe : tree -> [`Tree ] node -> [`Text] node = "caml_xml_tree_my_text_unsafe" "noalloc" +external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" "noalloc" +external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" "noalloc" + +external tree_parent_node : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_parent_node" "noalloc" + +(*external tree_prev_doc : tree -> [`Text ] node -> [`Tree ] node = "caml_xml_tree_prev_doc" "noalloc" *) + external tree_closing : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_closing" "noalloc" external tree_is_open : tree -> [`Tree] node -> bool = "caml_xml_tree_is_open" "noalloc" -external tree_first_element : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_first_element" "noalloc" -external tree_tagged_child : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_child" "noalloc" -external tree_next_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_sibling" "noalloc" -external tree_next_element : t -> [`Tree] node -> [`Tree] node = "caml_xml_tree_next_element" "noalloc" -external tree_tagged_sibling : tree -> [`Tree] node -> Tag.t -> [`Tree] node = "caml_xml_tree_tagged_sibling" "noalloc" -external tree_prev_sibling : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_prev_sibling" "noalloc" -external tree_is_leaf : tree -> [`Tree] node -> bool = "caml_xml_tree_is_leaf" "noalloc" -external tree_last_child : tree -> [`Tree] node -> [`Tree] node = "caml_xml_tree_last_child" "noalloc" -external tree_is_first_child : tree -> [`Tree] node -> bool = "caml_xml_tree_is_first_child" "noalloc" +external benchmark_jump : tree -> Tag.t -> unit = "caml_benchmark_jump" "noalloc" -external tree_tag_id : tree -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag_id" "noalloc" - +let benchmark_jump t s = benchmark_jump t.doc s -let tree_is_last t n = equal_node nil (tree_next_sibling t n) +external benchmark_fsns : tree -> unit = "caml_benchmark_fsns" "noalloc" +let benchmark_fsns t = benchmark_fsns t.doc -external tree_my_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text" "noalloc" -external tree_my_text_unsafe : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_my_text_unsafe" "noalloc" -(*external tree_next_text : tree -> [`Tree] node -> [`Text ] node = "caml_xml_tree_next_text" "noalloc" *) -external tree_doc_ids : tree -> [`Tree ] node -> [`Text ] node * [`Text ] node = "caml_xml_tree_doc_ids" -let text_size tree = inode (snd ( tree_doc_ids tree root )) -let text_get_cached_text t (x:[`Text] node) = - if x == nulldoc then "" - else - text_get_cached_text t x -external tree_text_xml_id : tree -> [`Text ] node -> int = "caml_xml_tree_text_xml_id" "noalloc" -external tree_node_xml_id : tree -> [`Tree ] node -> int = "caml_xml_tree_node_xml_id" "noalloc" -external tree_is_ancestor : tree -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" "noalloc" -external tree_tagged_desc : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node = "caml_xml_tree_tagged_desc" "noalloc" -external tree_tagged_foll_below : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_below" "noalloc" -external tree_tagged_foll_before : tree -> [`Tree ] node -> Tag.t -> [`Tree ] node -> [`Tree ] node = "caml_xml_tree_tagged_foll_before" "noalloc" -external tree_subtree_tags : tree -> [`Tree ] node -> Tag.t -> int = "caml_xml_tree_subtree_tags" "noalloc" -type unordered_set -external unordered_set_alloc : int -> unordered_set = "caml_unordered_set_alloc" -external unordered_set_length : unordered_set -> int = "caml_unordered_set_length" -external unordered_set_insert : unordered_set -> int -> unit = "caml_unordered_set_set" "noalloc" +let text_size tree = inode (snd ( tree_doc_ids tree root )) + +let text_get_text t (x:[`Text] node) = + if x == nulldoc then "" + else text_get_text t x + -external tree_select_child : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_child" "noalloc" -external tree_select_foll_sibling : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_foll_sibling" "noalloc" -external tree_select_desc : tree -> [`Tree ] node -> unordered_set -> [`Tree] node = "caml_xml_tree_select_desc" "noalloc" -external tree_select_foll_below : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_below" "noalloc" -external tree_select_foll_before : tree -> [`Tree ] node -> unordered_set -> [`Tree] node -> [`Tree] node = "caml_xml_tree_select_foll_before" "noalloc" module HPtset = Hashtbl.Make(Ptset.Int) @@ -144,24 +172,25 @@ let text_size t = text_size t.doc module MemUnion = Hashtbl.Make (struct type t = Ptset.Int.t*Ptset.Int.t - let equal (x,y) (z,t) = (Ptset.Int.equal x z)&&(Ptset.Int.equal y t) + let equal (x,y) (z,t) = x == z || y == t let equal a b = equal a b || equal b a let hash (x,y) = (* commutative hash *) - let x = Ptset.Int.hash x - and y = Ptset.Int.hash y + let x = Ptset.Int.uid x + and y = Ptset.Int.uid y in - if x < y then HASHINT2(x,y) else HASHINT2(y,x) + if x <= y then HASHINT2(x,y) else HASHINT2(y,x) end) module MemAdd = Hashtbl.Make ( struct type t = Tag.t*Ptset.Int.t - let equal (x,y) (z,t) = (x == z)&&(Ptset.Int.equal y t) - let hash (x,y) = HASHINT2(x,Ptset.Int.hash y) + let equal (x,y) (z,t) = (x == z)&&(y == t) + let hash (x,y) = HASHINT2(x,Ptset.Int.uid y) end) let collect_tags tree = - let h_union = MemUnion.create BIG_H_SIZE in + let _ = Printf.eprintf "Collecting Tags\n%!" in +(* let h_union = MemUnion.create BIG_H_SIZE in let pt_cup s1 s2 = try MemUnion.find h_union (s1,s2) @@ -176,9 +205,11 @@ let collect_tags tree = with | Not_found -> let r = Ptset.Int.add t s in MemAdd.add h_add (t,s) r;r - in + in *) + let pt_cup = Ptset.Int.union in + let pt_add = Ptset.Int.add in let h = Hashtbl.create BIG_H_SIZE in - let update t sc sb ss sa = + let update t sc sb ss sa = let schild,sbelow,ssibling,safter = try Hashtbl.find h t @@ -187,32 +218,22 @@ let collect_tags tree = (Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty,Ptset.Int.empty) in Hashtbl.replace h t - (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa) + (pt_cup sc schild,pt_cup sbelow sb, pt_cup ssibling ss, pt_cup safter sa) in - let rec loop_right id acc_after = + let rec loop right id acc_after = if id == nil - then Ptset.Int.empty,Ptset.Int.empty,acc_after - else - let sibling2,desc2,after2 = loop_right (tree_next_sibling tree id) acc_after in - let child1,desc1,after1 = loop_left (tree_first_child tree id) after2 in - let tag = tree_tag_id tree id in + then Ptset.Int.empty,Ptset.Int.empty,acc_after else + let sibling2,desc2,after2 = loop true (tree_next_sibling tree id) acc_after in + let child1,desc1,after1 = loop false (tree_first_child tree id) after2 in + let tag = tree_tag tree id in update tag child1 desc1 sibling2 after2; ( pt_add tag sibling2, pt_add tag (pt_cup desc1 desc2), - pt_cup after1 (pt_cup desc1 desc2) ) - and loop_left id acc_after = - if id == nil - then Ptset.Int.empty,Ptset.Int.empty,acc_after - else - let sibling2,desc2,after2 = loop_right (tree_next_sibling tree id) acc_after in - let child1,desc1,after1 = loop_left (tree_first_child tree id) after2 in - let tag = tree_tag_id tree id in - update tag child1 desc1 sibling2 after2; - (pt_add tag sibling2, - pt_add tag (pt_cup desc1 desc2), - acc_after ) + if right then pt_cup after1 (pt_cup desc1 desc2) else acc_after ) in - let _ = loop_left (tree_root tree) Ptset.Int.empty in h + let _ = loop false (tree_root tree) Ptset.Int.empty in + let _ = Printf.eprintf "Finished\n%!" in + h @@ -228,8 +249,8 @@ let in_array _ i = let init_textfun f t s = let a = match f with | `CONTAINS -> text_contains t.doc s - | `STARTSWITH -> text_startswith t.doc s - | `ENDSWITH -> text_endswith t.doc s + | `STARTSWITH -> text_prefix t.doc s + | `ENDSWITH -> text_suffix t.doc s | `EQUALS -> text_equals t.doc s in (*Array.fast_sort (compare) a; *) @@ -237,7 +258,6 @@ let init_textfun f t s = Array.iter (fun x -> Hashtbl.add contains_index x true) !contains_array let count_contains t s = text_count_contains t.doc s -let unsorted_contains t s = text_unsorted_contains t.doc s let init_naive_contains t s = let i,j = tree_doc_ids t.doc (tree_root t.doc) @@ -252,7 +272,7 @@ let init_naive_contains t s = let rec loop n acc l = if n >= j then acc,l else - let s = text_get_cached_text t.doc n + let s = text_get_text t.doc n in if matching s then loop (nodei ((inode n)+1)) (n::acc) (l+1) @@ -281,7 +301,7 @@ let text_below tree t = let l = Array.length !contains_array in let i,j = tree_doc_ids tree.doc t in let id = if l == 0 then i else (array_find !contains_array i j) in - tree_parent_doc tree.doc id + tree_parent_node tree.doc id let text_next tree t root = let l = Array.length !contains_array in @@ -290,7 +310,7 @@ let text_next tree t root = let id = if l == 0 then if inf > j then nulldoc else inf else array_find !contains_array inf j in - tree_parent_doc tree.doc id + tree_parent_node tree.doc id @@ -339,6 +359,7 @@ let parse f str = let parse_xml_uri str = parse parse_xml_uri str let parse_xml_string str = parse parse_xml_string str +let size t = tree_size t.doc;; external pool : tree -> Tag.pool = "%identity" @@ -452,7 +473,7 @@ let is_binary_ancestor t n1 n2 = let parent t n = tree_parent t.doc n let first_child t = (); fun n -> tree_first_child t.doc n -let first_element t = (); fun n -> tree_first_element t n +let first_element t = (); fun n -> tree_first_element t.doc n (* these function will be called in two times: first partial application on the tag, then application of the tag and the tree, then application of @@ -466,37 +487,37 @@ let select_child t = fun ts -> fun n -> tree_select_child t.doc n v let next_sibling t = (); fun n -> tree_next_sibling t.doc n -let next_element t = (); fun n -> tree_next_element t n +let next_element t = (); fun n -> tree_next_element t.doc n -let tagged_sibling t tag = (); fun n -> tree_tagged_sibling t.doc n tag +let tagged_following_sibling t tag = (); fun n -> tree_tagged_following_sibling t.doc n tag -let select_sibling t = fun ts -> +let select_following_sibling t = fun ts -> let v = (ptset_to_vector ts) in (); - fun n -> tree_select_foll_sibling t.doc n v + fun n -> tree_select_following_sibling t.doc n v -let next_sibling_ctx t = (); fun n _ -> tree_next_sibling t.doc n -let next_element_ctx t = (); fun n _ -> tree_next_element t n -let tagged_sibling_ctx t tag = (); fun n _ -> tree_tagged_sibling t.doc n tag +let next_sibling_below t = (); fun n _ -> tree_next_sibling t.doc n +let next_element_below t = (); fun n _ -> tree_next_element t.doc n +let tagged_following_sibling_below t tag = (); fun n _ -> tree_tagged_following_sibling t.doc n tag -let select_sibling_ctx t = fun ts -> +let select_following_sibling_below t = fun ts -> let v = (ptset_to_vector ts) in (); - fun n _ -> tree_select_foll_sibling t.doc n v + fun n _ -> tree_select_following_sibling t.doc n v let id t n = tree_node_xml_id t.doc n -let tag t n = if n == nil then Tag.nullt else tree_tag_id t.doc n +let tag t n = if n == nil then Tag.nullt else tree_tag t.doc n -let tagged_desc t tag = (); fun n -> tree_tagged_desc t.doc n tag +let tagged_descendant t tag = (); fun n -> tree_tagged_descendant t.doc n tag -let select_desc t = fun ts -> +let select_descendant t = fun ts -> let v = (ptset_to_vector ts) in (); - fun n -> tree_select_desc t.doc n v + fun n -> tree_select_descendant t.doc n v -let tagged_foll_ctx t tag = (); fun n ctx -> tree_tagged_foll_below t.doc n tag ctx +let tagged_following_below t tag = (); fun n ctx -> tree_tagged_following_below t.doc n tag ctx -let select_foll_ctx t = fun ts -> +let select_following_below t = fun ts -> let v = (ptset_to_vector ts) in (); - fun n ctx -> tree_select_foll_below t.doc n v ctx + fun n ctx -> tree_select_following_below t.doc n v ctx let closing t n = tree_closing t.doc n let is_open t n = tree_is_open t.doc n @@ -552,7 +573,7 @@ let array_find a i j = (* opening tag *) if tag == Tag.pcdata then begin - output_string outc (text_get_cached_text tree (tree_my_text_unsafe tree t)); + output_string outc (text_get_text tree (tree_my_text_unsafe tree t)); loop (next2 t) (* skip closing $ *) end else @@ -562,7 +583,7 @@ let array_find a i j = let t' = next t in if tree_is_open tree t' then let _ = push tagstr in - let tag' = tree_tag_id tree t' in + let tag' = tree_tag tree t' in if tag' == Tag.attribute then let t'' = loop_attr (next t') 0 in output_string outc ">"; loop t'' else (output_string outc ">";loop_tag t' tag') else (* closing with no content *) @@ -576,15 +597,15 @@ let array_find a i j = output_char outc '>'; loop (next t); end - and loop t = loop_tag t (tree_tag_id tree t) + and loop t = loop_tag t (tree_tag tree t) and loop_attr t n = if tree_is_open tree t then - let attname = att_str (tree_tag_id tree t) in + let attname = att_str (tree_tag tree t) in output_char outc ' '; output_string outc attname; output_string outc "=\""; let t = next t in (* open $@ *) - output_string outc (text_get_cached_text tree (tree_my_text_unsafe tree t)); + output_string outc (text_get_text tree (tree_my_text_unsafe tree t)); output_char outc '"'; loop_attr (next3 t) (n+1) else @@ -606,12 +627,12 @@ let array_find a i j = let rec loop ?(print_right=true) t = if t != nil then - let tagid = tree_tag_id tree.doc t in + let tagid = tree_tag tree.doc t in if tagid==Tag.pcdata then begin let tid = tree_my_text_unsafe tree.doc t in - output_string outc (text_get_cached_text tree.doc tid); + output_string outc (text_get_text tree.doc tid); if print_right then loop (next_sibling tree t); end @@ -655,7 +676,7 @@ let array_find a i j = output_char outc ' '; output_string outc attname; output_string outc "=\""; - output_string outc (text_get_cached_text tree.doc tid); + output_string outc (text_get_text tree.doc tid); output_char outc '"'; loop_attributes (next_sibling tree a) in @@ -685,7 +706,7 @@ let rec binary_parent t n = if tree_is_first_child t.doc n then tree_parent t.doc n else tree_prev_sibling t.doc n - in if tree_tag_id t.doc r = Tag.pcdata then + in if tree_tag t.doc r = Tag.pcdata then binary_parent t r else r @@ -698,20 +719,20 @@ let subtree_tags t tag = (); let get_text t n = let tid = tree_my_text t.doc n in if tid == nulldoc then "" else - text_get_cached_text t.doc tid + text_get_text t.doc tid let dump_tree fmt tree = let rec loop t n = if t != nil then - let tag = (tree_tag_id tree.doc t ) in + let tag = (tree_tag tree.doc t ) in let tagstr = Tag.to_string tag in let tab = String.make n ' ' in if tag == Tag.pcdata || tag == Tag.attribute_data then Format.fprintf fmt "%s<%s>%s\n" - tab tagstr (text_get_cached_text tree.doc (tree_my_text tree.doc t)) tagstr + tab tagstr (text_get_text tree.doc (tree_my_text tree.doc t)) tagstr else begin Format.fprintf fmt "%s<%s>\n" tab tagstr; loop (tree_first_child tree.doc t) (n+2); @@ -724,3 +745,14 @@ let dump_tree fmt tree = let print_xml_fast3 t = tree_print_xml_fast3 t.doc + + + + + + + + + + + diff --git a/tree.mli b/tree.mli index 8cffd18..59bbcc2 100644 --- a/tree.mli +++ b/tree.mli @@ -20,7 +20,7 @@ val dump_node : 'a node -> string val nil : [ `Tree ] node val root : [ `Tree ] node - +val size : t -> int val is_root : [ `Tree ] node -> bool val is_nil : [ `Tree ] node -> bool @@ -34,25 +34,25 @@ val select_child : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node val next_sibling : t -> [ `Tree ] node -> [ `Tree ] node val next_element : t -> [ `Tree ] node -> [ `Tree ] node -val next_sibling_ctx : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node -val next_element_ctx : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node +val next_sibling_below : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node +val next_element_below : t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node -val tagged_sibling : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node -val tagged_sibling_ctx : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node +val tagged_following_sibling : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node +val tagged_following_sibling_below : t -> Tag.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node -val select_sibling : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node -val select_sibling_ctx : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node +val select_following_sibling : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node +val select_following_sibling_below : t -> Ptset.Int.t -> [ `Tree ] node -> [ `Tree ] node -> [ `Tree ] node val tag : t -> [ `Tree ] node -> Tag.t val id : t -> [ `Tree ] node -> int -val tagged_desc : t -> Tag.t -> [ `Tree ] node -> [`Tree] node -val select_desc : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node +val tagged_descendant : t -> Tag.t -> [ `Tree ] node -> [`Tree] node +val select_descendant : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -val tagged_foll_ctx : t -> Tag.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node -val select_foll_ctx : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node +val tagged_following_below : t -> Tag.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node +val select_following_below : t -> Ptset.Int.t -> [ `Tree ] node -> [`Tree] node -> [ `Tree ] node val count : t -> string -> int val print_xml_fast : out_channel -> t -> [ `Tree ] node -> unit @@ -86,3 +86,6 @@ val text_next : t -> [`Tree] node -> [`Tree] node -> [`Tree] node val closing : t -> [`Tree] node -> [`Tree] node val is_open : t -> [`Tree] node -> bool + +val benchmark_jump : t -> Tag.t -> unit +val benchmark_fsns : t -> unit -- 2.17.1