From 8f5b2f0c7e95245c1b7f4cc18aa137ba8e784166 Mon Sep 17 00:00:00 2001 From: Sebastian Maneth Date: Sun, 14 Oct 2012 18:43:23 +0200 Subject: [PATCH] i --- src/lexindex_stub.cpp | 83 ++++++++++++++++++++++++++++++++++++++----- src/lextest.ml | 11 +++--- 2 files changed, 80 insertions(+), 14 deletions(-) diff --git a/src/lexindex_stub.cpp b/src/lexindex_stub.cpp index 5e6dcdc..bf038b2 100644 --- a/src/lexindex_stub.cpp +++ b/src/lexindex_stub.cpp @@ -2,9 +2,21 @@ #include #include #include +#include #include "xml-tree.hpp" #include "utils_stub.hpp" +#include +// #include +using namespace std; + +vector myvector; +vector::iterator it=myvector.begin(); +vector myvector2; +vector::iterator it2=myvector2.begin(); +xml_tree * tree; +xml_tree::tag_t tag; +xml_tree::tag_t tag2; //define a type for the lexicographic index @@ -16,7 +28,12 @@ public: std::vector > data; }; - +// class prefix_treeNode { +// public: + // std::map Children; +// std::array Children; +// uint32 textID; +//}; using namespace SXSI; @@ -41,25 +58,74 @@ static xml_tree::tag_t TAG(value i) return static_cast(Int_val(i)); } +void preorderTraverse(xml_tree::tag_t parent_tag, xml_tree::node_t node){ + if (tree->tag(node)==tree->PCDATA_OPEN_TAG_ID) + if (parent_tag==tag) it = myvector.insert(it, tree->text_id(node)); + else if (parent_tag==tag2) it2 = myvector2.insert(it2, tree->text_id(node)); + if (tree->tag(tree->first_child(node))!=0) preorderTraverse(tree->tag(node), tree->first_child(node)); + if (tree->tag(tree->next_sibling(node))!=0) preorderTraverse(parent_tag, tree->next_sibling(node)); +} + +bool myfunction (int32_t i,int32_t j) { + return (strcmp((const char*) tree->get_text(i), + (const char*) tree->get_text(j))<0); +} +vector mergeJoin(vector v1, vector v2){ + vector v; + vector::iterator i=v.begin(); + vector::iterator i1=v1.begin(); + vector::iterator i2=v2.begin(); + int k; + + while((i1!=v1.end()) && (i2!=v2.end())){ + k = strcmp((const char*) tree->get_text(*i1), + (const char*) tree->get_text(*i2)); + if (k==0) + { + i = v.insert( i, *i1 ); + //advance left + i1++; + //advance right + i2++; + } + else if (k<0) i1++; //advance left + else i2++; //advance right + } + return(v); +} +void printIndex(const char * label, vector v){ + vector::iterator i=v.begin(); + if (i!=v.end()) { + printf("%s-vector: [%i", label, *i); + for (++i; i!=v.end(); ++i) + printf(",%i", *i); + printf("]\n"); + } +} -extern "C" value caml_build_lex_index(value vtree, value vtag) +extern "C" value caml_build_lex_index(value vtree, value vtag, value vtag2) { CAMLparam2(vtree, vtag); CAMLlocal1(vindex); const char * s; vindex = sxsi_alloc_custom(); - xml_tree * tree = XMLTREE(vtree); - xml_tree::tag_t tag = TAG(vtag); + tree = XMLTREE(vtree); + tag = TAG(vtag); + tag2 = TAG(vtag2); + //Uncomment the following and comment the failwith line //LEXINDEX(vindex) = ... return a lex_index* .... + if ((tag==-1) || (tag2==-1)) caml_failwith(""); - caml_failwith("build_lex_index not implemented"); - - - CAMLreturn (vindex); + preorderTraverse(0, tree->first_child(tree->ROOT)); + sort(myvector.begin(), myvector.end(), myfunction); + sort(myvector2.begin(), myvector2.end(), myfunction); + printIndex(tree->get_tag_name_by_ref(tag), myvector); + printIndex(tree->get_tag_name_by_ref(tag2), myvector2); + printIndex("Result" , mergeJoin(myvector, myvector2)); } extern "C" value caml_print_lex_index(value vindex) @@ -71,6 +137,5 @@ extern "C" value caml_print_lex_index(value vindex) caml_failwith("print_lex_index not implemented"); - CAMLreturn (Val_unit); } diff --git a/src/lextest.ml b/src/lextest.ml index 77701cb..337b792 100644 --- a/src/lextest.ml +++ b/src/lextest.ml @@ -2,13 +2,13 @@ type index -external build_lex_index : Tree.tree_pointer -> Tag.t -> index = "caml_build_lex_index" +external build_lex_index : Tree.tree_pointer -> Tag.t -> Tag.t -> index = "caml_build_lex_index" external print_lex_index : index -> unit = "caml_print_lex_index" let main () = - if Array.length Sys.argv != 3 then + if Array.length Sys.argv != 4 then let () = Printf.eprintf "Error: invalid argument" in exit 1 else let file = Sys.argv.(1) in @@ -22,10 +22,11 @@ let main () = let () = Printf.eprintf "Error: unrecognized extension" in exit 2 in Tag.init (Tree.tag_operations document); - Printf.printf "Building lex index for tag %s\n%!" (Tag.to_string (Tag.tag Sys.argv.(2))); - let index = build_lex_index (Tree.get_tree_pointer document) (Tag.tag Sys.argv.(2)) in + Printf.printf "Building lex index for tags %s, %s\n%!" (Tag.to_string (Tag.tag Sys.argv.(2))) + (Tag.to_string (Tag.tag Sys.argv.(3))); + let index = build_lex_index (Tree.get_tree_pointer document) (Tag.tag Sys.argv.(2)) (Tag.tag Sys.argv.(3)) in Printf.printf "Printing lex index\n%!"; - print_lex_index index; + print_lex_index index; exit 0 -- 2.17.1