X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2Flexindex_stub.cpp;h=9f88afb6d78c7a34895eee89395c0c19c7b7d420;hb=3fbeef47be08dd39be31af8dedfcdda30ac63f6f;hp=315be7f31380d22e35681ef939983e5844f34312;hpb=5f371ee50291faed9ae0514dfd4ba2aec87faea1;p=SXSI%2Fxpathcomp.git diff --git a/src/lexindex_stub.cpp b/src/lexindex_stub.cpp index 315be7f..9f88afb 100644 --- a/src/lexindex_stub.cpp +++ b/src/lexindex_stub.cpp @@ -10,23 +10,170 @@ // #include using namespace std; -vector myvector; -vector::iterator it=myvector.begin(); -vector myvector2; -vector::iterator it2=myvector2.begin(); -xml_tree * tree; -xml_tree::tag_t tag; -xml_tree::tag_t tag2; +struct myclass { + xml_tree * tree; + bool operator() (int32_t i,int32_t j) { + return (strcmp((const char*) tree->get_text(i), + (const char*) tree->get_text(j))<0);} +} myobject; -//define a type for the lexicographic index +void printVector(vector v){ + vector::iterator i=v.begin(); + if (i!=v.end()) { + printf("[%i", *i); + for (++i; i!=v.end(); ++i) + printf(",%i", *i); + printf("]\n"); + } +} + +void printPairVector(vector> v){ + vector>::iterator i=v.begin(); + if (i!=v.end()) { + printf("[(%i,%i)", (*i).first, (*i).second); + for (++i; i!=v.end(); ++i) + printf(",(%i,%i)", (*i).first, (*i).second); + printf("]\n"); + } +} +//define a type for the lexicographic index class lex_index { public: - //The tag ID xml_tree::tag_t tag; - //The text data - std::vector > data; + xml_tree::tag_t tag2; + vector tagVector; + vector tag2Vector; + void preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node); + void createIndex(xml_tree * tree); + void print(); +}; + +void lex_index::preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node){ + if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID) + if (parent_tag==tag) tagVector.push_back(tree->text_id(node)); + else if (parent_tag==tag2) tag2Vector.push_back(tree->text_id(node)); + if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree,tree->tag(node),tree->first_child(node)); + if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(tree,parent_tag,tree->next_sibling(node)); +} + +void lex_index::createIndex(xml_tree * tree){ + preorderTraverse(tree, 0, tree->first_child(tree->ROOT)); + sort(tagVector.begin(), tagVector.end(), myobject); + sort(tag2Vector.begin(), tag2Vector.end(), myobject); +} + +void lex_index::print(){ + printf("%i-vector: ", tag); + printVector(tagVector); + printf("%i-vector: ", tag2); + printVector(tag2Vector); +} + +class lex_index_count : public lex_index { +public: + vector> tcountVector; + vector> tcount2Vector; + void createCounts(xml_tree * tree); + void print(); +}; + +void lex_index_count::createCounts(xml_tree * tree){ + int k; + int i=0; + while((i+1)get_text(tagVector[i]), + (const char*) tree->get_text(tagVector[i+1]))==0){ + k++; + tcountVector.push_back(make_pair(tagVector[i],1)); + i++; + } + if(k>1) tcountVector[i-k+1]=make_pair((tcountVector[i-k+1]).first,k); + tcountVector.push_back(make_pair(tagVector[i],1)); + i++; + } + if(iget_text(tag2Vector[i]), + (const char*) tree->get_text(tag2Vector[i+1]))==0){ + k++; + tcount2Vector.push_back(make_pair(tag2Vector[i],1)); + i++; + } + if(k>1) tcount2Vector[i-k+1]=make_pair((tcount2Vector[i-k+1]).first,k); + tcount2Vector.push_back(make_pair(tag2Vector[i],1)); + i++; + } + if(i resultVector; + void mergeJoin(vector v1, vector v2); +}; + +void VectorMergeJoin::mergeJoin(vector v1, vector v2){ + int i1=0; + int i2=0; + int k; + while((i1!=v1.size()) && (i2!=v2.size())){ + k = strcmp((const char*) tree->get_text(v1[i1]), + (const char*) tree->get_text(v2[i2])); + if (k==0) + { + resultVector.push_back(v1[i1]); + i1++; //advance left + } + else if (k<0) i1++; //advance left + else i2++; //advance right + } +} + +class VectorCountMergeJoin { +public: + xml_tree * tree; + vector> resultVector; + void mergeJoin(vector> v1, vector> v2); }; + +void VectorCountMergeJoin::mergeJoin(vector> v1, vector> v2){ + int comp=0; + int i1=0; + int i2=0; + int k; + while((i1get_text(v1[i1].first), + (const char*) tree->get_text(v2[i2].first)); + comp++; + if (k==0) + { + resultVector.push_back(v1[i1]); + i1++; //advance left + } + // else if (k<0) i1++; //advance left + else if (k<0) i1=i1+v1[i1].second; //advance left + // else i2++; //advance right + else i2=i2+v2[i2].second; //advance right + + } + printf("Number of comparisons during MergeJoin: %i\n",comp); +} // class prefix_treeNode { // public: @@ -37,7 +184,6 @@ public: using namespace SXSI; - static xml_tree*& XMLTREE(value v) { return Obj_val(v); @@ -48,6 +194,11 @@ static lex_index*& LEXINDEX(value v) return Obj_val(v); } +static lex_index_count*& LEXINDEXCOUNT(value v) +{ + return Obj_val(v); +} + static xml_tree::node_t TREENODE(value i) { return static_cast(Int_val(i)); @@ -58,80 +209,61 @@ static xml_tree::tag_t TAG(value i) return static_cast(Int_val(i)); } -void preorderTraverse(xml_tree::tag_t parent_tag, xml_tree::node_t node){ - if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID) - if (parent_tag==tag) it = myvector.insert(it, tree->text_id(node)); - else if (parent_tag==tag2) it2 = myvector2.insert(it2, tree->text_id(node)); - if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree->tag(node), tree->first_child(node)); - if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(parent_tag, tree->next_sibling(node)); -} +ML_BINDING value caml_build_lex_index(value vtree, value vtag, value vtag2) +{ + if ((TAG(vtag)==-1) || (TAG(vtag2)==-1)) caml_failwith(""); + CAMLparam2(vtree, vtag); + CAMLlocal1(vindex); + // Version without count: + // vindex = sxsi_alloc_custom(); + // Version with counts: + vindex = sxsi_alloc_custom(); + xml_tree * tree = XMLTREE(vtree); + myobject.tree = tree; -bool myfunction (int32_t i,int32_t j) { - return (strcmp((const char*) tree->get_text(i), - (const char*) tree->get_text(j))<0); -} + // create the index + // Version without count: + // lex_index* mylindex = new lex_index(); + // Version with count: + lex_index_count* mylindex = new lex_index_count(); + mylindex->tag = TAG(vtag); + mylindex->tag2 = TAG(vtag2); + mylindex->createIndex(tree); -vector mergeJoin(vector v1, vector v2){ - vector v; - vector::iterator i=v.begin(); - vector::iterator i1=v1.begin(); - vector::iterator i2=v2.begin(); - int k; + // printf("%i-vector: ", mylindex->tag); + // printVector(mylindex->tagVector); - while((i1!=v1.end()) && (i2!=v2.end())){ - k = strcmp((const char*) tree->get_text(*i1), - (const char*) tree->get_text(*i2)); - if (k==0) - { - i = v.insert( i, *i1 ); - //advance left - i1++; - //advance right - i2++; - } - else if (k<0) i1++; //advance left - else i2++; //advance right - } - return(v); -} + // Version with count: + mylindex->createCounts(tree); -void printIndex(const char * label, vector v){ - vector::iterator i=v.begin(); - if (i!=v.end()) { - printf("%s-vector: [%i", label, *i); - for (++i; i!=v.end(); ++i) - printf(",%i", *i); - printf("]\n"); - } -} + //do a join + // VectorMergeJoin* myVMJoin = new VectorMergeJoin(); + // myVMJoin->tree = tree; + // myVMJoin->mergeJoin(mylindex->tagVector, mylindex->tag2Vector); + // printf("Result-vector: "); + // printVector(myVMJoin->resultVector); -ML_BINDING value caml_build_lex_index(value vtree, value vtag, value vtag2) -{ - CAMLparam2(vtree, vtag); - CAMLlocal1(vindex); - const char * s; - vindex = sxsi_alloc_custom(); - tree = XMLTREE(vtree); - tag = TAG(vtag); - tag2 = TAG(vtag2); - //Uncomment the following and comment the failwith line - //LEXINDEX(vindex) = ... return a lex_index* .... - - if ((tag==-1) || (tag2==-1)) caml_failwith(""); - preorderTraverse(0, tree->first_child(tree->ROOT)); - sort(myvector.begin(), myvector.end(), myfunction); - sort(myvector2.begin(), myvector2.end(), myfunction); - printIndex(tree->get_tag_name_by_ref(tag), myvector); - printIndex(tree->get_tag_name_by_ref(tag2), myvector2); - printIndex("Result" , mergeJoin(myvector, myvector2)); + VectorCountMergeJoin* myVMJoin = new VectorCountMergeJoin(); + myVMJoin->tree = tree; + myVMJoin->mergeJoin(mylindex->tcountVector, mylindex->tcount2Vector); + printf("Result-vector: "); + printPairVector(myVMJoin->resultVector); + + // LEXINDEX(vindex)=mylindex; + LEXINDEXCOUNT(vindex)=mylindex; } ML_BINDING value caml_print_lex_index(value vindex) { CAMLparam1(vindex); - lex_index* index = LEXINDEX(vindex); + // Version without count: + // lex_index* index = LEXINDEX(vindex); + // Version without count: + lex_index_count* index = LEXINDEXCOUNT(vindex); //Print the index to the terminal - caml_failwith("print_lex_index not implemented"); + // caml_failwith("print_lex_index not implemented"); + + index->print(); CAMLreturn (Val_unit); }