From 95fded64d90aca79f7179f147a5184e9ba3176af Mon Sep 17 00:00:00 2001 From: kim Date: Fri, 6 Mar 2009 04:27:04 +0000 Subject: [PATCH] Added a timeTextCollection test program git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@207 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- makefile | 5 ++ timeTextCollection.cpp | 106 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 timeTextCollection.cpp diff --git a/makefile b/makefile index 526643b..04c5f64 100644 --- a/makefile +++ b/makefile @@ -5,11 +5,16 @@ LIBCDSA = $(LIBCDSPATH)/lib/libcds.a testTextCollection_obs = testTextCollection.o TextCollection.o CSA.o Tools.o BitRank.o bittree.o rbtree.o dynFMI.o RLWaveletTree.o GapEncode.o BSGAP.o ${LIBCDSA} +timeTextCollection_obs = timeTextCollection.o TextCollection.o CSA.o Tools.o BitRank.o bittree.o rbtree.o dynFMI.o RLWaveletTree.o GapEncode.o BSGAP.o ${LIBCDSA} + + all: $(testTextCollection_obs) testTextCollection: $(testTextCollection_obs) HeapProfiler.o $(CC) -o testTextCollection $(testTextCollection_obs) HeapProfiler.o +timeTextCollection: $(timeTextCollection_obs) + $(CC) -o timeTextCollection $(timeTextCollection_obs) clean: rm -f core *.o *~ testTextCollection diff --git a/timeTextCollection.cpp b/timeTextCollection.cpp new file mode 100644 index 0000000..dbe1664 --- /dev/null +++ b/timeTextCollection.cpp @@ -0,0 +1,106 @@ +// Test driver for text collection +#include +#include +#include +using std::cout; +using std::endl; +using std::cin; +using std::string; +#include +#include + + +#include "TextCollection.h" +using SXSI::TextCollection; + +int main(int argc, char**argv) +{ + string str; + string buffer; + unsigned int max_str = 0; + unsigned int num_str = 0; + struct timeval t1; + struct timeval t2; + double time; + + string words[] = { "abcd", "abc", "mirrors", "attires", "mature", + "rescue", "such", "embrace", "shipping", "ae", + "preventions", "ab", "fe", "w" }; + + + + TextCollection *csa = TextCollection::InitTextCollection(64); + + + gettimeofday(&t1,NULL); + std::cerr << "Filling collection\n"; + while (not(cin.eof()) && num_str < 100000 ){ + getline(cin,str); // Read line by line. + if (str.compare("----------") == 0){ + csa->InsertText((unsigned char*) buffer.c_str()); + + if (num_str % 10000 == 0){ + gettimeofday(&t2,NULL); + time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 + + (t2.tv_usec - t1.tv_usec))/1000.0; + std::cerr << "Added " << num_str << " strings in " + << time << " ms\n"; + gettimeofday(&t1,NULL); + }; + + num_str++; + if (max_str < buffer.size()) + max_str = buffer.size(); + buffer.clear(); + + } + else + buffer.append(str); + }; + std::cerr << "Calling MakeStatic()\n"; + csa->MakeStatic(); + std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n"; + int count; + bool is; + TextCollection::document_result res; + for (int i = 0; i < 14; i++){ + gettimeofday(&t1,NULL); + is = csa->IsContains((unsigned char*) words[i].c_str()); + gettimeofday(&t2,NULL); + time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 + + (t2.tv_usec - t1.tv_usec))/1000.0; + + std::cerr << is << ", " << time << ", "; + + + gettimeofday(&t1,NULL); + count = csa->Count((unsigned char*) words[i].c_str()); + gettimeofday(&t2,NULL); + time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 + + (t2.tv_usec - t1.tv_usec))/1000.0; + + std::cerr << count << ", " << time << ", "; + + gettimeofday(&t1,NULL); + count = csa->CountContains((unsigned char*) words[i].c_str()); + gettimeofday(&t2,NULL); + time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 + + (t2.tv_usec - t1.tv_usec))/1000.0; + + std::cerr << count << ", " << time << ", "; + + + gettimeofday(&t1,NULL); + res = csa->Contains((unsigned char*) words[i].c_str()); + gettimeofday(&t2,NULL); + time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 + + (t2.tv_usec - t1.tv_usec))/1000.0; + + std::cerr << time << "\n"; + + + + }; + + return 0; +} -- 2.17.1