6 #include "xml-tree.hpp"
7 #include "utils_stub.hpp"
10 // #include <algorithm>
15 bool operator() (int32_t i,int32_t j) {
16 return (strcmp((const char*) tree->get_text(i),
17 (const char*) tree->get_text(j))<0);}
20 void printVector(vector<int32_t> v){
21 vector<int32_t>::iterator i=v.begin();
24 for (++i; i!=v.end(); ++i)
30 //define a type for the lexicographic index
37 vector<int32_t> tagVector;
38 vector<int32_t>::iterator tagVectorIt;
39 vector<int32_t> tag2Vector;
40 vector<int32_t>::iterator tag2VectorIt;
41 void preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node);
42 void createIndex(xml_tree * tree);
46 void lex_index::preorderTraverse(xml_tree * tree, xml_tree::tag_t parent_tag, xml_tree::node_t node){
47 if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID)
48 if (parent_tag==tag) tagVectorIt = tagVector.insert(tagVectorIt, tree->text_id(node));
49 else if (parent_tag==tag2) tag2VectorIt = tag2Vector.insert(tag2VectorIt, tree->text_id(node));
50 if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree,tree->tag(node),tree->first_child(node));
51 if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(tree,parent_tag,tree->next_sibling(node));
54 void lex_index::createIndex(xml_tree * tree){
55 tagVectorIt=tagVector.begin();
56 tag2VectorIt=tag2Vector.begin();
57 preorderTraverse(tree, 0, tree->first_child(tree->ROOT));
58 sort(tagVector.begin(), tagVector.end(), myobject);
59 sort(tag2Vector.begin(), tag2Vector.end(), myobject);
62 void lex_index::print(){
63 printf("%i-vector: ", tag);
64 printVector(tagVector);
65 printf("%i-vector: ", tag2);
66 printVector(tag2Vector);
69 class VectorMergeJoin {
72 //The tag IDs to be semi-joined
73 const char * tagname1;
74 const char * tagname2;
75 vector<int32_t> resultVector;
76 void mergeJoin(vector<int32_t> v1, vector<int32_t> v2);
79 void VectorMergeJoin::mergeJoin(vector<int32_t> v1, vector<int32_t> v2){
80 vector<int32_t>::iterator i=resultVector.begin();
81 vector<int32_t>::iterator i1=v1.begin();
82 vector<int32_t>::iterator i2=v2.begin();
85 while((i1!=v1.end()) && (i2!=v2.end())){
86 k = strcmp((const char*) tree->get_text(*i1),
87 (const char*) tree->get_text(*i2));
90 i = resultVector.insert( i, *i1 );
96 else if (k<0) i1++; //advance left
97 else i2++; //advance right
101 // class prefix_treeNode {
103 // std::map<char, prefix_treeNode> Children;
104 // std::array<prefix_treeNode, 256> Children;
108 using namespace SXSI;
110 static xml_tree*& XMLTREE(value v)
112 return Obj_val<xml_tree*>(v);
115 static lex_index*& LEXINDEX(value v)
117 return Obj_val<lex_index*>(v);
120 static xml_tree::node_t TREENODE(value i)
122 return static_cast<xml_tree::node_t>(Int_val(i));
125 static xml_tree::tag_t TAG(value i)
127 return static_cast<xml_tree::tag_t>(Int_val(i));
130 ML_BINDING value caml_build_lex_index(value vtree, value vtag, value vtag2)
132 if ((TAG(vtag)==-1) || (TAG(vtag2)==-1)) caml_failwith("<INVALID TAG>");
133 CAMLparam2(vtree, vtag);
135 vindex = sxsi_alloc_custom<lex_index*>();
136 xml_tree * tree = XMLTREE(vtree);
137 myobject.tree = tree;
140 lex_index* mylindex = new lex_index();
141 mylindex->tag = TAG(vtag);
142 mylindex->tag2 = TAG(vtag2);
143 mylindex->createIndex(tree);
146 VectorMergeJoin* myVMJoin = new VectorMergeJoin();
147 myVMJoin->tree = tree;
148 myVMJoin->mergeJoin(mylindex->tagVector, mylindex->tag2Vector);
149 printf("Result-vector: ");
150 printVector(myVMJoin->resultVector);
152 LEXINDEX(vindex)=mylindex;
155 ML_BINDING value caml_print_lex_index(value vindex)
158 lex_index* index = LEXINDEX(vindex);
160 //Print the index to the terminal
161 // caml_failwith("print_lex_index not implemented");
163 CAMLreturn (Val_unit);