From: Sebastian Maneth Date: Fri, 2 Nov 2012 17:45:04 +0000 (+0100) Subject: put the mergeJoin function into its own class X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=d48d9af331bfa911b7a7c6b11144d90b25ae1705;p=SXSI%2Fxpathcomp.git put the mergeJoin function into its own class --- diff --git a/src/lexindex_stub.cpp b/src/lexindex_stub.cpp index aad2333..7e5eba6 100644 --- a/src/lexindex_stub.cpp +++ b/src/lexindex_stub.cpp @@ -17,6 +17,16 @@ struct myclass { (const char*) tree->get_text(j))<0);} } myobject; +void printVector(vector v){ + vector::iterator i=v.begin(); + if (i!=v.end()) { + printf("[%i", *i); + for (++i; i!=v.end(); ++i) + printf(",%i", *i); + printf("]\n"); + } +} + //define a type for the lexicographic index class lex_index { public: @@ -28,24 +38,64 @@ public: vector::iterator tagVectorIt; vector tag2Vector; vector::iterator tag2VectorIt; - void printVector(xml_tree::tag_t t, vector v); + void preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node); + void createIndex(xml_tree * tree); void print(); }; -void lex_index::printVector(xml_tree::tag_t t, vector v){ - vector::iterator i=v.begin(); - if (i!=v.end()) { - printf("%i-vector: [%i", t, *i); - for (++i; i!=v.end(); ++i) - printf(",%i", *i); - printf("]\n"); - } +void lex_index::preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node){ + if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID) + if (parent_tag==tag) tagVectorIt = tagVector.insert(tagVectorIt, tree->text_id(node)); + else if (parent_tag==tag2) tag2VectorIt = tag2Vector.insert(tag2VectorIt, tree->text_id(node)); + if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree,tree->tag(node),tree->first_child(node)); + if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(tree,parent_tag,tree->next_sibling(node)); +} + +void lex_index::createIndex(xml_tree * tree){ + tagVectorIt=tagVector.begin(); + tag2VectorIt=tag2Vector.begin(); + preorderTraverse(tree, 0, tree->first_child(tree->ROOT)); + sort(tagVector.begin(), tagVector.end(), myobject); + sort(tag2Vector.begin(), tag2Vector.end(), myobject); } void lex_index::print(){ - printf("Print called\n"); - printVector(tag, tagVector); - printVector(tag2, tag2Vector); + printf("%i-vector: ", tag); + printVector(tagVector); + printf("%i-vector: ", tag2); + printVector(tag2Vector); +} + +class VectorMergeJoin { +public: + xml_tree * tree; + //The tag IDs to be semi-joined + const char * tagname1; + const char * tagname2; + vector resultVector; + void mergeJoin(vector v1, vector v2); +}; + +void VectorMergeJoin::mergeJoin(vector v1, vector v2){ + vector::iterator i=resultVector.begin(); + vector::iterator i1=v1.begin(); + vector::iterator i2=v2.begin(); + int k; + + while((i1!=v1.end()) && (i2!=v2.end())){ + k = strcmp((const char*) tree->get_text(*i1), + (const char*) tree->get_text(*i2)); + if (k==0) + { + i = resultVector.insert( i, *i1 ); + //advance left + i1++; + //advance right + i2++; + } + else if (k<0) i1++; //advance left + else i2++; //advance right + } } // class prefix_treeNode { @@ -77,80 +127,28 @@ static xml_tree::tag_t TAG(value i) return static_cast(Int_val(i)); } -void preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node, lex_index* lindex){ - if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID) - if (parent_tag==lindex->tag) lindex->tagVectorIt = - lindex->tagVector.insert(lindex->tagVectorIt, tree->text_id(node)); - else if (parent_tag==lindex->tag2) lindex->tag2VectorIt = - lindex->tag2Vector.insert(lindex->tag2VectorIt, tree->text_id(node)); - if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree,tree->tag(node),tree->first_child(node),lindex); - if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(tree,parent_tag,tree->next_sibling(node),lindex); -} - -vector mergeJoin(xml_tree * tree, vector v1, vector v2){ - vector v; - vector::iterator i=v.begin(); - vector::iterator i1=v1.begin(); - vector::iterator i2=v2.begin(); - int k; - - while((i1!=v1.end()) && (i2!=v2.end())){ - k = strcmp((const char*) tree->get_text(*i1), - (const char*) tree->get_text(*i2)); - if (k==0) - { - i = v.insert( i, *i1 ); - //advance left - i1++; - //advance right - i2++; - } - else if (k<0) i1++; //advance left - else i2++; //advance right - } - return(v); -} - -void printVector(const char * label, vector v){ - vector::iterator i=v.begin(); - if (i!=v.end()) { - printf("%s-vector: [%i", label, *i); - for (++i; i!=v.end(); ++i) - printf(",%i", *i); - printf("]\n"); - } -} - ML_BINDING value caml_build_lex_index(value vtree, value vtag, value vtag2) { + if ((TAG(vtag)==-1) || (TAG(vtag2)==-1)) caml_failwith(""); CAMLparam2(vtree, vtag); CAMLlocal1(vindex); vindex = sxsi_alloc_custom(); xml_tree * tree = XMLTREE(vtree); myobject.tree = tree; - //allocate a lex index + //create the index lex_index* mylindex = new lex_index(); - - //take the tag parameter given by ocaml and convert - //it to a C++ tag and store it in the tag field of mylindex - if ((TAG(vtag)==-1) || (TAG(vtag2)==-1)) caml_failwith(""); mylindex->tag = TAG(vtag); mylindex->tag2 = TAG(vtag2); + mylindex->createIndex(tree); - //initialize iterators for the two vectors - mylindex->tagVectorIt=mylindex->tagVector.begin(); - mylindex->tag2VectorIt=mylindex->tag2Vector.begin(); - - preorderTraverse(tree, 0, tree->first_child(tree->ROOT), mylindex); - sort(mylindex->tagVector.begin(), mylindex->tagVector.end(), myobject); - sort(mylindex->tag2Vector.begin(), mylindex->tag2Vector.end(), myobject); - printVector(tree->get_tag_name_by_ref(mylindex->tag), mylindex->tagVector); - printVector(tree->get_tag_name_by_ref(mylindex->tag2), mylindex->tag2Vector); - printVector("Result" , mergeJoin(tree, mylindex->tagVector, mylindex->tag2Vector)); + //do a join + VectorMergeJoin* myVMJoin = new VectorMergeJoin(); + myVMJoin->tree = tree; + myVMJoin->mergeJoin(mylindex->tagVector, mylindex->tag2Vector); + printf("Result-vector: "); + printVector(myVMJoin->resultVector); - //Uncomment the following and comment the failwith line - //LEXINDEX(vindex) = ... return a lex_index* .... LEXINDEX(vindex)=mylindex; }