Added a timeTextCollection test program
authorkim <kim@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Fri, 6 Mar 2009 04:27:04 +0000 (04:27 +0000)
committerkim <kim@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Fri, 6 Mar 2009 04:27:04 +0000 (04:27 +0000)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@207 3cdefd35-fc62-479d-8e8d-bae585ffb9ca

makefile
timeTextCollection.cpp [new file with mode: 0644]

index 526643b..04c5f64 100644 (file)
--- a/makefile
+++ b/makefile
@@ -5,11 +5,16 @@ LIBCDSA = $(LIBCDSPATH)/lib/libcds.a
 
 testTextCollection_obs = testTextCollection.o TextCollection.o CSA.o Tools.o BitRank.o bittree.o rbtree.o dynFMI.o RLWaveletTree.o GapEncode.o BSGAP.o ${LIBCDSA}
 
+timeTextCollection_obs = timeTextCollection.o TextCollection.o CSA.o Tools.o BitRank.o bittree.o rbtree.o dynFMI.o RLWaveletTree.o GapEncode.o BSGAP.o ${LIBCDSA}
+
+
 all: $(testTextCollection_obs)
 
 testTextCollection: $(testTextCollection_obs) HeapProfiler.o
        $(CC) -o testTextCollection $(testTextCollection_obs) HeapProfiler.o
 
+timeTextCollection: $(timeTextCollection_obs)
+       $(CC) -o timeTextCollection $(timeTextCollection_obs)
 clean:
        rm -f core *.o *~ testTextCollection
 
diff --git a/timeTextCollection.cpp b/timeTextCollection.cpp
new file mode 100644 (file)
index 0000000..dbe1664
--- /dev/null
@@ -0,0 +1,106 @@
+// Test driver for text collection
+#include <iostream>
+#include <cstdlib>
+#include <string>
+using std::cout;
+using std::endl;
+using std::cin;
+using std::string;
+#include <sys/time.h>
+#include <time.h>
+
+
+#include "TextCollection.h"
+using SXSI::TextCollection;
+
+int main(int argc, char**argv)
+{
+  string str;
+  string buffer;
+  unsigned int max_str = 0;
+  unsigned int num_str = 0;
+  struct timeval t1;
+  struct timeval t2;
+  double time;
+
+  string words[] =  { "abcd", "abc", "mirrors", "attires", "mature",
+                     "rescue", "such", "embrace", "shipping", "ae",
+                     "preventions", "ab", "fe", "w" };
+  
+  
+  
+  TextCollection *csa = TextCollection::InitTextCollection(64);
+
+
+  gettimeofday(&t1,NULL);
+  std::cerr << "Filling collection\n";
+  while (not(cin.eof()) && num_str < 100000 ){
+      getline(cin,str); // Read line by line.
+      if (str.compare("----------") == 0){
+       csa->InsertText((unsigned char*) buffer.c_str());
+
+       if (num_str % 10000 == 0){
+               gettimeofday(&t2,NULL);
+               time = ((t2.tv_sec  - t1.tv_sec) * 1000000.0 
+                       + (t2.tv_usec  - t1.tv_usec))/1000.0;
+               std::cerr << "Added " << num_str << " strings in "
+                         << time  << " ms\n";
+               gettimeofday(&t1,NULL);
+       };
+
+       num_str++;
+       if (max_str < buffer.size())
+         max_str = buffer.size();
+       buffer.clear();
+
+      }
+      else 
+       buffer.append(str);
+  };
+  std::cerr << "Calling MakeStatic()\n";
+  csa->MakeStatic();
+  std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n";
+  int count;
+  bool is;
+  TextCollection::document_result res;
+  for (int i = 0; i < 14; i++){
+    gettimeofday(&t1,NULL);
+    is = csa->IsContains((unsigned char*) words[i].c_str());
+    gettimeofday(&t2,NULL);
+    time = ((t2.tv_sec  - t1.tv_sec) * 1000000.0 
+           + (t2.tv_usec  - t1.tv_usec))/1000.0;
+
+    std::cerr << is << ", " << time << ", ";
+
+
+    gettimeofday(&t1,NULL);
+    count = csa->Count((unsigned char*) words[i].c_str());
+    gettimeofday(&t2,NULL);
+    time = ((t2.tv_sec  - t1.tv_sec) * 1000000.0 
+           + (t2.tv_usec  - t1.tv_usec))/1000.0;
+
+    std::cerr << count << ", " << time << ", ";
+
+    gettimeofday(&t1,NULL);
+    count = csa->CountContains((unsigned char*) words[i].c_str());
+    gettimeofday(&t2,NULL);
+    time = ((t2.tv_sec  - t1.tv_sec) * 1000000.0 
+           + (t2.tv_usec  - t1.tv_usec))/1000.0;
+
+    std::cerr << count << ", " << time << ", ";
+    
+
+    gettimeofday(&t1,NULL);
+    res = csa->Contains((unsigned char*) words[i].c_str());
+    gettimeofday(&t2,NULL);
+    time = ((t2.tv_sec  - t1.tv_sec) * 1000000.0 
+           + (t2.tv_usec  - t1.tv_usec))/1000.0;
+    
+    std::cerr << time << "\n";
+    
+
+    
+  };
+
+  return 0;
+}