X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=timeTextCollection.cpp;h=47d19760ce7119da70ebd5e91dbde12abf6c6cf4;hb=ee8a3e526fe7f39cdc075263824faf6c17389297;hp=dbe16646e810026c0e455595928053a143f28cc7;hpb=95fded64d90aca79f7179f147a5184e9ba3176af;p=SXSI%2FTextCollection.git diff --git a/timeTextCollection.cpp b/timeTextCollection.cpp index dbe1664..47d1976 100644 --- a/timeTextCollection.cpp +++ b/timeTextCollection.cpp @@ -9,9 +9,18 @@ using std::string; #include #include +static struct timeval t1; +static struct timeval t2; -#include "TextCollection.h" +#define STARTTIMER() (gettimeofday(&t1,NULL)) +#define STOPTIMER() (gettimeofday(&t2,NULL)) +#define GETTIME() (((t2.tv_sec - t1.tv_sec) * 1000000.0 + (t2.tv_usec - t1.tv_usec))/1000.0) + + + +#include "TextCollectionBuilder.h" using SXSI::TextCollection; +using SXSI::TextCollectionBuilder; int main(int argc, char**argv) { @@ -19,8 +28,6 @@ int main(int argc, char**argv) string buffer; unsigned int max_str = 0; unsigned int num_str = 0; - struct timeval t1; - struct timeval t2; double time; string words[] = { "abcd", "abc", "mirrors", "attires", "mature", @@ -29,23 +36,23 @@ int main(int argc, char**argv) - TextCollection *csa = TextCollection::InitTextCollection(64); + TextCollectionBuilder *tcb = new TextCollectionBuilder(64); - gettimeofday(&t1,NULL); + STARTTIMER(); std::cerr << "Filling collection\n"; + // read only 100000 strings while (not(cin.eof()) && num_str < 100000 ){ getline(cin,str); // Read line by line. if (str.compare("----------") == 0){ - csa->InsertText((unsigned char*) buffer.c_str()); + tcb->InsertText((unsigned char*) buffer.c_str()); if (num_str % 10000 == 0){ - gettimeofday(&t2,NULL); - time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 - + (t2.tv_usec - t1.tv_usec))/1000.0; - std::cerr << "Added " << num_str << " strings in " - << time << " ms\n"; - gettimeofday(&t1,NULL); + STOPTIMER(); + time = GETTIME(); + std::cerr << "Added " << num_str << " strings in " + << time << " ms\n"; + STARTTIMER(); }; num_str++; @@ -58,47 +65,46 @@ int main(int argc, char**argv) buffer.append(str); }; std::cerr << "Calling MakeStatic()\n"; - csa->MakeStatic(); + + TextCollection *tc = tcb->InitTextCollection(); + std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n"; int count; bool is; TextCollection::document_result res; - for (int i = 0; i < 14; i++){ - gettimeofday(&t1,NULL); - is = csa->IsContains((unsigned char*) words[i].c_str()); - gettimeofday(&t2,NULL); - time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 - + (t2.tv_usec - t1.tv_usec))/1000.0; + for (unsigned int i = 0; i < (sizeof(words)/sizeof(char*)) ; i++){ + + STARTTIMER(); + is = tc->IsContains((unsigned char*) words[i].c_str()); + STOPTIMER(); + time = GETTIME(); std::cerr << is << ", " << time << ", "; - gettimeofday(&t1,NULL); - count = csa->Count((unsigned char*) words[i].c_str()); - gettimeofday(&t2,NULL); - time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 - + (t2.tv_usec - t1.tv_usec))/1000.0; + STARTTIMER(); + count = tc->Count((unsigned char*) words[i].c_str()); + STOPTIMER(); + time = GETTIME(); std::cerr << count << ", " << time << ", "; - gettimeofday(&t1,NULL); - count = csa->CountContains((unsigned char*) words[i].c_str()); - gettimeofday(&t2,NULL); - time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 - + (t2.tv_usec - t1.tv_usec))/1000.0; + + STARTTIMER(); + count = tc->CountContains((unsigned char*) words[i].c_str()); + STOPTIMER(); + time = GETTIME(); std::cerr << count << ", " << time << ", "; - gettimeofday(&t1,NULL); - res = csa->Contains((unsigned char*) words[i].c_str()); - gettimeofday(&t2,NULL); - time = ((t2.tv_sec - t1.tv_sec) * 1000000.0 - + (t2.tv_usec - t1.tv_usec))/1000.0; + STARTTIMER(); + res = tc->Contains((unsigned char*) words[i].c_str()); + STOPTIMER(); + time = GETTIME(); std::cerr << time << "\n"; - };