From 8a31bd5c0847ca9c45627cb808a57d9c07c7b0dc Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Wed, 14 Mar 2012 13:46:30 +0100 Subject: [PATCH 1/1] Sort and remove duplicate from text query results (needed for the word-based text index). --- src/OCamlDriver.cpp | 60 ++++++++++++++++++++++++++++++--------------- 1 file changed, 40 insertions(+), 20 deletions(-) diff --git a/src/OCamlDriver.cpp b/src/OCamlDriver.cpp index 6b4e773..b93286c 100644 --- a/src/OCamlDriver.cpp +++ b/src/OCamlDriver.cpp @@ -871,28 +871,48 @@ extern "C" value caml_text_collection_lessthan(value tree,value str){ /** Full reporting into a bit vector */ +static std::vector sort_results(std::vector v) +{ + std::vector res; + std::sort(v.begin(), v.end()); + DocID prev = NULLT; + for(auto i = v.begin(); i != v.end(); ++i){ + while (prev == *i){ + ++i; + if (i == v.end()) return res; + }; + prev = *i; + res.push_back(prev); + }; + return res; +} #define BV_QUERY(pref, Pref) \ - extern "C" value caml_text_collection_## pref ##_bv(value tree, value str){ \ - CAMLparam2(tree, str); \ - CAMLlocal3(res, res_bv, res_array); \ - int j; \ - uchar * cstr = (uchar *) strdup(String_val(str)); \ - std::vector results = XMLTREE(tree)->Pref(cstr); \ - res_bv = caml_alloc_string((XMLTREE(tree)->Size() / 4) + 2); \ - unsigned long slen = caml_string_length(res_bv); \ - memset(&(Byte(res_bv,0)), 0, slen); \ - res_array = caml_alloc_shr(results.size(), 0); \ - for (unsigned int i = 0; i < results.size(); ++i) { \ - j = XMLTREE(tree)->ParentNode(results[i]); \ - Byte(res_bv, j >> 3) |= (1 << (j & 7)); \ - caml_initialize(&Field(res_array, i), Val_int(j)); \ - }; \ - free(cstr); \ - res = caml_alloc(2, 0); \ - Store_field(res, 0, res_bv); \ - Store_field(res, 1, res_array); \ - CAMLreturn(res); \ + extern "C" value caml_text_collection_## pref ##_bv(value tree, value str, value dobvv){ \ + CAMLparam3(tree, str, dobvv); \ + CAMLlocal3(res, res_bv, res_array); \ + int j; \ + uchar * cstr = (uchar *) strdup(String_val(str)); \ + std::vector uresults = XMLTREE(tree)->Pref(cstr); \ + std::vector results = sort_results(uresults); \ + bool dobv = Bool_val(dobvv); \ + res_bv = caml_alloc_string(dobv ? ((XMLTREE(tree)->Size() / 4) + 2) : 0); \ + unsigned long slen = caml_string_length(res_bv); \ + if (dobv) \ + memset(&(Byte(res_bv,0)), 0, slen); \ + res_array = caml_alloc_shr(results.size(), 0); \ + for (unsigned int i = 0; i < results.size(); ++i) { \ + j = XMLTREE(tree)->ParentNode(results[i]); \ + if (dobv) { \ + Byte(res_bv, j >> 3) |= (1 << (j & 7)); \ + }; \ + caml_initialize(&Field(res_array, i), Val_int(j)); \ + }; \ + free(cstr); \ + res = caml_alloc(2, 0); \ + Store_field(res, 0, res_bv); \ + Store_field(res, 1, res_array); \ + CAMLreturn(res); \ } \ -- 2.17.1