X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2Flexindex_stub.cpp;h=aad2333fd5d8b0b1392bc452bd21df8e92091760;hb=8fc8254d75a8970b7f08f81e4a809da089e567e6;hp=a09216ef0096fee40110a870752928d614a0d273;hpb=852a21ccb090cc11e996d7bae3322882cc694e3e;p=SXSI%2Fxpathcomp.git diff --git a/src/lexindex_stub.cpp b/src/lexindex_stub.cpp index a09216e..aad2333 100644 --- a/src/lexindex_stub.cpp +++ b/src/lexindex_stub.cpp @@ -2,24 +2,60 @@ #include #include #include +#include #include "xml-tree.hpp" #include "utils_stub.hpp" +#include +// #include +using namespace std; -//define a type for the lexicographic index +struct myclass { + xml_tree * tree; + bool operator() (int32_t i,int32_t j) { + return (strcmp((const char*) tree->get_text(i), + (const char*) tree->get_text(j))<0);} +} myobject; +//define a type for the lexicographic index class lex_index { public: - //The tag ID + //The tag IDs xml_tree::tag_t tag; + xml_tree::tag_t tag2; //The text data - std::vector > data; + vector tagVector; + vector::iterator tagVectorIt; + vector tag2Vector; + vector::iterator tag2VectorIt; + void printVector(xml_tree::tag_t t, vector v); + void print(); }; +void lex_index::printVector(xml_tree::tag_t t, vector v){ + vector::iterator i=v.begin(); + if (i!=v.end()) { + printf("%i-vector: [%i", t, *i); + for (++i; i!=v.end(); ++i) + printf(",%i", *i); + printf("]\n"); + } +} +void lex_index::print(){ + printf("Print called\n"); + printVector(tag, tagVector); + printVector(tag2, tag2Vector); +} -using namespace SXSI; +// class prefix_treeNode { +// public: + // std::map Children; +// std::array Children; +// uint32 textID; +//}; +using namespace SXSI; static xml_tree*& XMLTREE(value v) { @@ -41,36 +77,90 @@ static xml_tree::tag_t TAG(value i) return static_cast(Int_val(i)); } +void preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node, lex_index* lindex){ + if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID) + if (parent_tag==lindex->tag) lindex->tagVectorIt = + lindex->tagVector.insert(lindex->tagVectorIt, tree->text_id(node)); + else if (parent_tag==lindex->tag2) lindex->tag2VectorIt = + lindex->tag2Vector.insert(lindex->tag2VectorIt, tree->text_id(node)); + if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree,tree->tag(node),tree->first_child(node),lindex); + if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(tree,parent_tag,tree->next_sibling(node),lindex); +} +vector mergeJoin(xml_tree * tree, vector v1, vector v2){ + vector v; + vector::iterator i=v.begin(); + vector::iterator i1=v1.begin(); + vector::iterator i2=v2.begin(); + int k; + + while((i1!=v1.end()) && (i2!=v2.end())){ + k = strcmp((const char*) tree->get_text(*i1), + (const char*) tree->get_text(*i2)); + if (k==0) + { + i = v.insert( i, *i1 ); + //advance left + i1++; + //advance right + i2++; + } + else if (k<0) i1++; //advance left + else i2++; //advance right + } + return(v); +} +void printVector(const char * label, vector v){ + vector::iterator i=v.begin(); + if (i!=v.end()) { + printf("%s-vector: [%i", label, *i); + for (++i; i!=v.end(); ++i) + printf(",%i", *i); + printf("]\n"); + } +} -extern "C" value caml_build_lex_index(value vtree, value vtag) +ML_BINDING value caml_build_lex_index(value vtree, value vtag, value vtag2) { CAMLparam2(vtree, vtag); CAMLlocal1(vindex); vindex = sxsi_alloc_custom(); xml_tree * tree = XMLTREE(vtree); - xml_tree::tag_t tag = TAG(vtag); + myobject.tree = tree; - //Uncomment the following and comment the failwith line - //LEXINDEX(vindex) = ... return a lex_index* .... + //allocate a lex index + lex_index* mylindex = new lex_index(); + //take the tag parameter given by ocaml and convert + //it to a C++ tag and store it in the tag field of mylindex + if ((TAG(vtag)==-1) || (TAG(vtag2)==-1)) caml_failwith(""); + mylindex->tag = TAG(vtag); + mylindex->tag2 = TAG(vtag2); - caml_failwith("build_lex_index not implemented"); + //initialize iterators for the two vectors + mylindex->tagVectorIt=mylindex->tagVector.begin(); + mylindex->tag2VectorIt=mylindex->tag2Vector.begin(); + preorderTraverse(tree, 0, tree->first_child(tree->ROOT), mylindex); + sort(mylindex->tagVector.begin(), mylindex->tagVector.end(), myobject); + sort(mylindex->tag2Vector.begin(), mylindex->tag2Vector.end(), myobject); + printVector(tree->get_tag_name_by_ref(mylindex->tag), mylindex->tagVector); + printVector(tree->get_tag_name_by_ref(mylindex->tag2), mylindex->tag2Vector); + printVector("Result" , mergeJoin(tree, mylindex->tagVector, mylindex->tag2Vector)); - CAMLreturn (vindex); + //Uncomment the following and comment the failwith line + //LEXINDEX(vindex) = ... return a lex_index* .... + LEXINDEX(vindex)=mylindex; } -extern "C" value caml_print_lex_index(value vindex) +ML_BINDING value caml_print_lex_index(value vindex) { CAMLparam1(vindex); lex_index* index = LEXINDEX(vindex); //Print the index to the terminal - - caml_failwith("print_lex_index not implemented"); - - + // caml_failwith("print_lex_index not implemented"); + index->print(); CAMLreturn (Val_unit); }