From 3ed1b1733c658a81543314fcb8cd1e99a27e3585 Mon Sep 17 00:00:00 2001 From: kim Date: Thu, 16 Apr 2009 09:32:47 +0000 Subject: [PATCH] Fixed typo git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/TextCollection@315 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- timeTextCollection.cpp | 222 +++++++++++++++++++++++++---------------- 1 file changed, 138 insertions(+), 84 deletions(-) diff --git a/timeTextCollection.cpp b/timeTextCollection.cpp index 40abfac..74c9615 100644 --- a/timeTextCollection.cpp +++ b/timeTextCollection.cpp @@ -9,151 +9,205 @@ using std::string; #include #include #include +#include static struct timeval t1; static struct timeval t2; + #define STARTTIMER() (gettimeofday(&t1,NULL)) #define STOPTIMER() (gettimeofday(&t2,NULL)) #define GETTIME() (((t2.tv_sec - t1.tv_sec) * 1000000.0 + (t2.tv_usec - t1.tv_usec))/1000.0) - +void * last_brk = NULL; + +long int get_mem(){ + void * current_brk = sbrk(0); + long int mem = ((long int) current_brk ) - ((long int) last_brk); + //last_brk = current_brk; + return (mem/1024/1024); +} #include "TextCollectionBuilder.h" using SXSI::TextCollection; using SXSI::TextCollectionBuilder; +string words[] = { "Bakst", + "ruminants", "morphine", "AUSTRALIA", + "molecule" ,"brain", "human", "blood","from", + "with", " in", "the", "of", + "a", + "\n" }; + +unsigned int NWORDS = 15; + + +void time_tc(TextCollection *tc){ + double time; + int count; + bool is; + TextCollection::document_result res; + for (unsigned int i = 0; i < NWORDS; i++){ + std::cerr << "\"" << words[i] << "\": "; + STARTTIMER(); + is = tc->IsContains((unsigned char*) words[i].c_str()); + STOPTIMER(); + time = GETTIME(); + + std::cerr << is << ", " << time << ", "; + + + STARTTIMER(); + count = tc->Count((unsigned char*) words[i].c_str()); + STOPTIMER(); + time = GETTIME(); + + std::cerr << count << ", " << time << ", "; + + + STARTTIMER(); + count = tc->CountContains((unsigned char*) words[i].c_str()); + STOPTIMER(); + time = GETTIME(); + + std::cerr << count << ", " << time << ", "; + + + STARTTIMER(); + res = tc->Contains((unsigned char*) words[i].c_str()); + STOPTIMER(); + time = GETTIME(); + + std::cerr << time << ", max_mem = " << get_mem() << "\n" ; + }; +} int main(int argc, char**argv) { - string str; - string buffer; + string * str = new string("Foo"); + string * buffer = new string("Foo"); unsigned int text_size = 0; unsigned int max_str = 0; unsigned int num_str = 0; double time; - - string words[] = { "Bakst", - "ruminants", "morphine", "AUSTRALIA","molecule" ,"brain", "human", "blood","from", "with", " in", "the", "of", - "a", - "\n" }; - unsigned int NWORDS = 15; + FILE* file; + TextCollection * tc; + TextCollectionBuilder *tcb64 = new TextCollectionBuilder(64); TextCollectionBuilder *tcb2 = new TextCollectionBuilder(5); STARTTIMER(); - std::cerr << "Filling collection\n"; + last_brk= sbrk(0); + std::cerr << "Filling collection\n"; while (not(cin.eof()) ){ - getline(cin,str); // Read line by line. - str.append("\n"); + std::getline(cin, *str ); // Read line by line. + str->append("\n"); - if (str.compare("----------\n") == 0 ){ - tcb64->InsertText((unsigned char*) buffer.c_str()); - tcb2->InsertText((unsigned char*) buffer.c_str()); + if (str->compare("----------\n") == 0 ){ + tcb64->InsertText((unsigned char*) buffer->c_str()); + tcb2->InsertText((unsigned char*) buffer->c_str()); if (num_str % 10000 == 0){ STOPTIMER(); time = GETTIME(); std::cerr << "Added " << num_str << " strings in " - << time << " ms\n"; + << time << " ms, max_mem=" << get_mem() << "\n"; std::cerr.flush(); //STARTTIMER(); }; num_str++; - if (max_str < buffer.size()) - max_str = buffer.size(); - text_size += buffer.size(); - buffer.clear(); + if (max_str < buffer->size()) + max_str = buffer->size(); + text_size += buffer->size(); + buffer->clear(); } else - buffer.append(str); + buffer->append(*str); }; + delete str; + delete buffer; + buffer = NULL; + str = NULL; + + std::cerr << "Freeing text buffers : max_mem = " << get_mem() << "\n"; + std::cerr << "Number of bytes inserted : " << text_size << "\n"; + std::cerr << "Calling InitTextCollection() for sf=64: "; STARTTIMER(); - TextCollection *tc64 = tcb64->InitTextCollection(); - STOPTIMER(); - time = GETTIME(); - std::cerr << time << "ms\n"; - std::cerr << "Calling InitTextCollection() for sf=5: "; - STARTTIMER(); - TextCollection *tc2 = tcb2->InitTextCollection(); + tc = tcb64->InitTextCollection(); STOPTIMER(); time = GETTIME(); - std::cerr << time << "ms\n"; - FILE* file; + std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ; + delete tcb64; + tcb64 = NULL; file = fopen("index_64.tc","w+"); std::cerr << "Saving to index_64.tc "; STARTTIMER(); - tc64->Save(file); + tc->Save(file); STOPTIMER(); time = GETTIME(); - std::cerr << time << "ms\n"; + std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ; fclose(file); + delete tc; + tc = NULL; + std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n"; + + std::cerr << "Calling InitTextCollection() for sf=5: "; + STARTTIMER(); + tc = tcb2->InitTextCollection(); + STOPTIMER(); + time = GETTIME(); + std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ; + free(tcb2); + tcb2=NULL; + file = fopen("index_05.tc","w+"); std::cerr << "Saving to index_05.tc "; STARTTIMER(); - tc2->Save(file); + tc->Save(file); STOPTIMER(); time = GETTIME(); - std::cerr << time << "ms\n"; + std::cerr << time << "ms, max_mem = " << get_mem() << "\n"; fclose(file); - - + delete tc; + tc = NULL; + std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n"; std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n"; - int count; - bool is; - TextCollection::document_result res; - TextCollection *tc; - tc = tc64; - std::cerr << "Sampling rate 64\n"; - for (unsigned int num = 0; num < 2; num ++){ - - for (unsigned int i = 0; i < NWORDS ; i++){ - - std::cerr << "\"" << words[i] << ": "; - STARTTIMER(); - is = tc->IsContains((unsigned char*) words[i].c_str()); - STOPTIMER(); - time = GETTIME(); - - std::cerr << is << ", " << time << ", "; - - - STARTTIMER(); - count = tc->Count((unsigned char*) words[i].c_str()); - STOPTIMER(); - time = GETTIME(); - - std::cerr << count << ", " << time << ", "; - - - STARTTIMER(); - count = tc->CountContains((unsigned char*) words[i].c_str()); - STOPTIMER(); - time = GETTIME(); - - std::cerr << count << ", " << time << ", "; - - - STARTTIMER(); - res = tc->Contains((unsigned char*) words[i].c_str()); - STOPTIMER(); - time = GETTIME(); - - std::cerr << time << "\n"; - - - }; - tc = tc2; - std::cerr << "---------------------------\n"; - std::cerr << "Sampling rate 5\n"; - }; + + + std::cerr << "Loading sf=5 TextCollection "; + STARTTIMER(); + file = fopen("index_05.tc","r"); + tc = TextCollection::Load(file,5); // sample rate is not used. + STOPTIMER(); + time = GETTIME(); + std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ; + fclose(file); + std::cerr << "-----------------\nSampling rate 5\n"; + time_tc(tc); + delete tc; + tc = NULL; + std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n"; + + std::cerr << "Loading sf=64 TextCollection "; + STARTTIMER(); + file = fopen("index_64.tc","r"); + tc = TextCollection::Load(file,64); + STOPTIMER(); + time = GETTIME(); + std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ; + fclose(file); + std::cerr << "-----------------\nSampling rate 64\n"; + time_tc(tc); + delete tc; + tc = NULL; + std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n"; return 0; } -- 2.17.1