Sort and remove duplicate from text query results (needed for the
authorKim Nguyễn <kn@lri.fr>
Wed, 14 Mar 2012 12:46:30 +0000 (13:46 +0100)
committerKim Nguyễn <kn@lri.fr>
Wed, 14 Mar 2012 12:51:18 +0000 (13:51 +0100)
word-based text index).

src/OCamlDriver.cpp

index 6b4e773..b93286c 100644 (file)
@@ -871,28 +871,48 @@ extern "C"  value caml_text_collection_lessthan(value tree,value str){
 
 /** Full reporting into a bit vector
  */
+static std::vector<DocID> sort_results(std::vector<DocID> v)
+{
+  std::vector<DocID> res;
+  std::sort(v.begin(), v.end());
+  DocID prev = NULLT;
+  for(auto i = v.begin(); i != v.end(); ++i){
+    while (prev == *i){
+      ++i;
+      if (i == v.end()) return res;
+    };
+    prev = *i;
+    res.push_back(prev);
+  };
+  return res;
+}
 
 #define BV_QUERY(pref, Pref) \
-  extern "C" value caml_text_collection_## pref ##_bv(value tree, value str){ \
-  CAMLparam2(tree, str);                                               \
-  CAMLlocal3(res, res_bv, res_array);                                  \
-  int j;                                                               \
-  uchar * cstr = (uchar *) strdup(String_val(str));                    \
-  std::vector<DocID> results = XMLTREE(tree)->Pref(cstr);              \
-  res_bv = caml_alloc_string((XMLTREE(tree)->Size() / 4) + 2);         \
-  unsigned long slen = caml_string_length(res_bv);                     \
-  memset(&(Byte(res_bv,0)), 0, slen);                                  \
-  res_array = caml_alloc_shr(results.size(), 0);                       \
-  for (unsigned int i = 0; i < results.size(); ++i) {                  \
-    j = XMLTREE(tree)->ParentNode(results[i]);                         \
-    Byte(res_bv, j >> 3) |=   (1 << (j & 7));                          \
-    caml_initialize(&Field(res_array, i), Val_int(j));                 \
-  };                                                                   \
-  free(cstr);                                                          \
-  res = caml_alloc(2, 0);                                              \
-  Store_field(res, 0, res_bv);                                         \
-  Store_field(res, 1, res_array);                                      \
-  CAMLreturn(res);                                                     \
+  extern "C" value caml_text_collection_## pref ##_bv(value tree, value str, value dobvv){ \
+    CAMLparam3(tree, str, dobvv);                                              \
+    CAMLlocal3(res, res_bv, res_array);                                        \
+    int j;                                                             \
+    uchar * cstr = (uchar *) strdup(String_val(str));                  \
+    std::vector<DocID> uresults = XMLTREE(tree)->Pref(cstr);           \
+    std::vector<DocID> results = sort_results(uresults);                \
+    bool dobv = Bool_val(dobvv);                                       \
+    res_bv = caml_alloc_string(dobv ? ((XMLTREE(tree)->Size() / 4) + 2) : 0); \
+    unsigned long slen = caml_string_length(res_bv);                   \
+    if (dobv)                                                          \
+      memset(&(Byte(res_bv,0)), 0, slen);                              \
+    res_array = caml_alloc_shr(results.size(), 0);                     \
+    for (unsigned int i = 0; i < results.size(); ++i) {                        \
+      j = XMLTREE(tree)->ParentNode(results[i]);                       \
+      if (dobv)        {                                                       \
+       Byte(res_bv, j >> 3) |=   (1 << (j & 7));                       \
+      };                                                               \
+      caml_initialize(&Field(res_array, i), Val_int(j));               \
+    };                                                                 \
+    free(cstr);                                                                \
+    res = caml_alloc(2, 0);                                            \
+    Store_field(res, 0, res_bv);                                       \
+    Store_field(res, 1, res_array);                                    \
+    CAMLreturn(res);                                                   \
   }                                                                    \