(const char*) tree->get_text(j))<0);}
} myobject;
+void printVector(vector<int32_t> v){
+ vector<int32_t>::iterator i=v.begin();
+ if (i!=v.end()) {
+ printf("[%i", *i);
+ for (++i; i!=v.end(); ++i)
+ printf(",%i", *i);
+ printf("]\n");
+ }
+}
+
//define a type for the lexicographic index
class lex_index {
public:
vector<int32_t>::iterator tagVectorIt;
vector<int32_t> tag2Vector;
vector<int32_t>::iterator tag2VectorIt;
+ void preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node);
+ void createIndex(xml_tree * tree);
void print();
};
-void lex_index::print(){printf("Print called\n");}
+void lex_index::preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node){
+ if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID)
+ if (parent_tag==tag) tagVectorIt = tagVector.insert(tagVectorIt, tree->text_id(node));
+ else if (parent_tag==tag2) tag2VectorIt = tag2Vector.insert(tag2VectorIt, tree->text_id(node));
+ if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree,tree->tag(node),tree->first_child(node));
+ if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(tree,parent_tag,tree->next_sibling(node));
+}
+
+void lex_index::createIndex(xml_tree * tree){
+ tagVectorIt=tagVector.begin();
+ tag2VectorIt=tag2Vector.begin();
+ preorderTraverse(tree, 0, tree->first_child(tree->ROOT));
+ sort(tagVector.begin(), tagVector.end(), myobject);
+ sort(tag2Vector.begin(), tag2Vector.end(), myobject);
+}
+
+void lex_index::print(){
+ printf("%i-vector: ", tag);
+ printVector(tagVector);
+ printf("%i-vector: ", tag2);
+ printVector(tag2Vector);
+}
+
+class VectorMergeJoin {
+public:
+ xml_tree * tree;
+ //The tag IDs to be semi-joined
+ const char * tagname1;
+ const char * tagname2;
+ vector<int32_t> resultVector;
+ void mergeJoin(vector<int32_t> v1, vector<int32_t> v2);
+};
+
+void VectorMergeJoin::mergeJoin(vector<int32_t> v1, vector<int32_t> v2){
+ vector<int32_t>::iterator i=resultVector.begin();
+ vector<int32_t>::iterator i1=v1.begin();
+ vector<int32_t>::iterator i2=v2.begin();
+ int k;
+
+ while((i1!=v1.end()) && (i2!=v2.end())){
+ k = strcmp((const char*) tree->get_text(*i1),
+ (const char*) tree->get_text(*i2));
+ if (k==0)
+ {
+ i = resultVector.insert( i, *i1 );
+ //advance left
+ i1++;
+ //advance right
+ i2++;
+ }
+ else if (k<0) i1++; //advance left
+ else i2++; //advance right
+ }
+}
// class prefix_treeNode {
// public:
return static_cast<xml_tree::tag_t>(Int_val(i));
}
-void preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node, lex_index* lindex){
- if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID)
- if (parent_tag==lindex->tag) lindex->tagVectorIt =
- lindex->tagVector.insert(lindex->tagVectorIt, tree->text_id(node));
- else if (parent_tag==lindex->tag2) lindex->tag2VectorIt =
- lindex->tag2Vector.insert(lindex->tag2VectorIt, tree->text_id(node));
- if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree,tree->tag(node),tree->first_child(node),lindex);
- if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(tree,parent_tag,tree->next_sibling(node),lindex);
-}
-
-vector<int32_t> mergeJoin(xml_tree * tree, vector<int32_t> v1, vector<int32_t> v2){
- vector<int32_t> v;
- vector<int32_t>::iterator i=v.begin();
- vector<int32_t>::iterator i1=v1.begin();
- vector<int32_t>::iterator i2=v2.begin();
- int k;
-
- while((i1!=v1.end()) && (i2!=v2.end())){
- k = strcmp((const char*) tree->get_text(*i1),
- (const char*) tree->get_text(*i2));
- if (k==0)
- {
- i = v.insert( i, *i1 );
- //advance left
- i1++;
- //advance right
- i2++;
- }
- else if (k<0) i1++; //advance left
- else i2++; //advance right
- }
- return(v);
-}
-
-void printVector(const char * label, vector<int32_t> v){
- vector<int32_t>::iterator i=v.begin();
- if (i!=v.end()) {
- printf("%s-vector: [%i", label, *i);
- for (++i; i!=v.end(); ++i)
- printf(",%i", *i);
- printf("]\n");
- }
-}
-
ML_BINDING value caml_build_lex_index(value vtree, value vtag, value vtag2)
{
+ if ((TAG(vtag)==-1) || (TAG(vtag2)==-1)) caml_failwith("<INVALID TAG>");
CAMLparam2(vtree, vtag);
CAMLlocal1(vindex);
vindex = sxsi_alloc_custom<lex_index*>();
xml_tree * tree = XMLTREE(vtree);
myobject.tree = tree;
- //allocate a lex index
+ //create the index
lex_index* mylindex = new lex_index();
-
- //take the tag parameter given by ocaml and convert
- //it to a C++ tag and store it in the tag field of mylindex
- if ((TAG(vtag)==-1) || (TAG(vtag2)==-1)) caml_failwith("<INVALID TAG>");
mylindex->tag = TAG(vtag);
mylindex->tag2 = TAG(vtag2);
+ mylindex->createIndex(tree);
- //initialize iterators for the two vectors
- mylindex->tagVectorIt=mylindex->tagVector.begin();
- mylindex->tag2VectorIt=mylindex->tag2Vector.begin();
-
- preorderTraverse(tree, 0, tree->first_child(tree->ROOT), mylindex);
- sort(mylindex->tagVector.begin(), mylindex->tagVector.end(), myobject);
- sort(mylindex->tag2Vector.begin(), mylindex->tag2Vector.end(), myobject);
- printVector(tree->get_tag_name_by_ref(mylindex->tag), mylindex->tagVector);
- printVector(tree->get_tag_name_by_ref(mylindex->tag2), mylindex->tag2Vector);
- printVector("Result" , mergeJoin(tree, mylindex->tagVector, mylindex->tag2Vector));
+ //do a join
+ VectorMergeJoin* myVMJoin = new VectorMergeJoin();
+ myVMJoin->tree = tree;
+ myVMJoin->mergeJoin(mylindex->tagVector, mylindex->tag2Vector);
+ printf("Result-vector: ");
+ printVector(myVMJoin->resultVector);
- //Uncomment the following and comment the failwith line
- //LEXINDEX(vindex) = ... return a lex_index* ....
LEXINDEX(vindex)=mylindex;
}