6 #include "xml-tree.hpp"
7 #include "utils_stub.hpp"
10 // #include <algorithm>
15 bool operator() (int32_t i,int32_t j) {
16 return (strcmp((const char*) tree->get_text(i),
17 (const char*) tree->get_text(j))<0);}
20 void printVector(vector<int32_t> v){
21 vector<int32_t>::iterator i=v.begin();
24 for (++i; i!=v.end(); ++i)
30 void printPairVector(vector<pair<int32_t,int32_t>> v){
31 vector<pair<int32_t,int32_t>>::iterator i=v.begin();
33 printf("[(%i,%i)", (*i).first, (*i).second);
34 for (++i; i!=v.end(); ++i)
35 printf(",(%i,%i)", (*i).first, (*i).second);
40 //define a type for the lexicographic index
45 vector<int32_t> tagVector;
46 vector<int32_t> tag2Vector;
47 void preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node);
48 void createIndex(xml_tree * tree);
52 void lex_index::preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node){
53 if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID)
54 if (parent_tag==tag) tagVector.push_back(tree->text_id(node));
55 else if (parent_tag==tag2) tag2Vector.push_back(tree->text_id(node));
56 if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree,tree->tag(node),tree->first_child(node));
57 if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(tree,parent_tag,tree->next_sibling(node));
60 void lex_index::createIndex(xml_tree * tree){
61 preorderTraverse(tree, 0, tree->first_child(tree->ROOT));
62 sort(tagVector.begin(), tagVector.end(), myobject);
63 sort(tag2Vector.begin(), tag2Vector.end(), myobject);
66 void lex_index::print(){
67 printf("%i-vector: ", tag);
68 printVector(tagVector);
69 printf("%i-vector: ", tag2);
70 printVector(tag2Vector);
73 class lex_index_count : public lex_index {
75 vector<pair<int32_t,int32_t>> tcountVector;
76 vector<pair<int32_t,int32_t>> tcount2Vector;
77 void createCounts(xml_tree * tree);
81 void lex_index_count::createCounts(xml_tree * tree){
84 while((i+1)<tagVector.size())
87 while(strcmp((const char*) tree->get_text(tagVector[i]),
88 (const char*) tree->get_text(tagVector[i+1]))==0){
90 tcountVector.push_back(make_pair(tagVector[i],1));
93 if(k>1) tcountVector[i-k+1]=make_pair((tcountVector[i-k+1]).first,k);
94 tcountVector.push_back(make_pair(tagVector[i],1));
97 if(i<tagVector.size())
98 tcountVector.push_back(make_pair(tagVector[i],1));
100 while((i+1)<tag2Vector.size())
103 while(strcmp((const char*) tree->get_text(tag2Vector[i]),
104 (const char*) tree->get_text(tag2Vector[i+1]))==0){
106 tcount2Vector.push_back(make_pair(tag2Vector[i],1));
109 if(k>1) tcount2Vector[i-k+1]=make_pair((tcount2Vector[i-k+1]).first,k);
110 tcount2Vector.push_back(make_pair(tag2Vector[i],1));
113 if(i<tag2Vector.size())
114 tcount2Vector.push_back(make_pair(tag2Vector[i],1));
117 void lex_index_count::print(){
118 printf("%i-vector: ", tag);
119 printPairVector(tcountVector);
120 printf("%i-vector: ", tag2);
121 printPairVector(tcount2Vector);
124 class VectorMergeJoin {
127 vector<int32_t> resultVector;
128 void mergeJoin(vector<int32_t> v1, vector<int32_t> v2);
131 void VectorMergeJoin::mergeJoin(vector<int32_t> v1, vector<int32_t> v2){
135 while((i1!=v1.size()) && (i2!=v2.size())){
136 k = strcmp((const char*) tree->get_text(v1[i1]),
137 (const char*) tree->get_text(v2[i2]));
140 resultVector.push_back(v1[i1]);
143 else if (k<0) i1++; //advance left
144 else i2++; //advance right
148 class VectorCountMergeJoin {
151 vector<pair<int32_t,int32_t>> resultVector;
152 void mergeJoin(vector<pair<int32_t,int32_t>> v1, vector<pair<int32_t,int32_t>> v2);
155 void VectorCountMergeJoin::mergeJoin(vector<pair<int32_t,int32_t>> v1, vector<pair<int32_t,int32_t>> v2){
160 while((i1<v1.size()) && (i2<v2.size())){
161 k = strcmp((const char*) tree->get_text(v1[i1].first),
162 (const char*) tree->get_text(v2[i2].first));
166 for(j=0; j<v1[i1].second ; j++)
167 resultVector.push_back(v1[i1+j]);
169 // i1++; //advance left
171 // else if (k<0) i1++; //advance left
172 else if (k<0) i1=i1+v1[i1].second; //advance left
173 // else i2++; //advance right
174 else i2=i2+v2[i2].second; //advance right
177 printf("Number of comparisons during MergeJoin: %i\n",comp);
180 // class prefix_treeNode {
182 // std::map<char, prefix_treeNode> Children;
183 // std::array<prefix_treeNode, 256> Children;
187 using namespace SXSI;
189 static xml_tree*& XMLTREE(value v)
191 return Obj_val<xml_tree*>(v);
194 static lex_index*& LEXINDEX(value v)
196 return Obj_val<lex_index*>(v);
199 static lex_index_count*& LEXINDEXCOUNT(value v)
201 return Obj_val<lex_index_count*>(v);
204 static xml_tree::node_t TREENODE(value i)
206 return static_cast<xml_tree::node_t>(Int_val(i));
209 static xml_tree::tag_t TAG(value i)
211 return static_cast<xml_tree::tag_t>(Int_val(i));
214 ML_BINDING value caml_build_lex_index(value vtree, value vtag, value vtag2)
216 if ((TAG(vtag)==-1) || (TAG(vtag2)==-1)) caml_failwith("<INVALID TAG>");
217 CAMLparam2(vtree, vtag);
219 // Version without count:
220 // vindex = sxsi_alloc_custom<lex_index*>();
221 // Version with counts:
222 vindex = sxsi_alloc_custom<lex_index_count*>();
223 xml_tree * tree = XMLTREE(vtree);
224 myobject.tree = tree;
227 // Version without count:
228 // lex_index* mylindex = new lex_index();
229 // Version with count:
230 lex_index_count* mylindex = new lex_index_count();
231 mylindex->tag = TAG(vtag);
232 mylindex->tag2 = TAG(vtag2);
233 mylindex->createIndex(tree);
235 // printf("%i-vector: ", mylindex->tag);
236 // printVector(mylindex->tagVector);
238 // Version with count:
239 mylindex->createCounts(tree);
242 // VectorMergeJoin* myVMJoin = new VectorMergeJoin();
243 // myVMJoin->tree = tree;
244 // myVMJoin->mergeJoin(mylindex->tagVector, mylindex->tag2Vector);
245 // printf("Result-vector: ");
246 // printVector(myVMJoin->resultVector);
248 VectorCountMergeJoin* myVMJoin = new VectorCountMergeJoin();
249 myVMJoin->tree = tree;
250 myVMJoin->mergeJoin(mylindex->tcountVector, mylindex->tcount2Vector);
251 printf("Result-vector: ");
252 printPairVector(myVMJoin->resultVector);
254 // LEXINDEX(vindex)=mylindex;
255 LEXINDEXCOUNT(vindex)=mylindex;
258 ML_BINDING value caml_print_lex_index(value vindex)
261 // Version without count:
262 // lex_index* index = LEXINDEX(vindex);
263 // Version without count:
264 lex_index_count* index = LEXINDEXCOUNT(vindex);
266 //Print the index to the terminal
267 // caml_failwith("print_lex_index not implemented");
270 CAMLreturn (Val_unit);