#include <sys/time.h>
#include <time.h>
#include <stdio.h>
+#include <unistd.h>
static struct timeval t1;
static struct timeval t2;
+
#define STARTTIMER() (gettimeofday(&t1,NULL))
#define STOPTIMER() (gettimeofday(&t2,NULL))
#define GETTIME() (((t2.tv_sec - t1.tv_sec) * 1000000.0 + (t2.tv_usec - t1.tv_usec))/1000.0)
-
+void * last_brk = NULL;
+
+long int get_mem(){
+ void * current_brk = sbrk(0);
+ long int mem = ((long int) current_brk ) - ((long int) last_brk);
+ //last_brk = current_brk;
+ return (mem/1024/1024);
+}
#include "TextCollectionBuilder.h"
using SXSI::TextCollection;
using SXSI::TextCollectionBuilder;
+string words[] = { "Bakst",
+ "ruminants", "morphine", "AUSTRALIA",
+ "molecule" ,"brain", "human", "blood","from",
+ "with", " in", "the", "of",
+ "a",
+ "\n" };
+
+unsigned int NWORDS = 15;
+
+
+void time_tc(TextCollection *tc){
+ double time;
+ int count;
+ bool is;
+ TextCollection::document_result res;
+ for (unsigned int i = 0; i < NWORDS; i++){
+ std::cerr << "\"" << words[i] << "\": ";
+ STARTTIMER();
+ is = tc->IsContains((unsigned char*) words[i].c_str());
+ STOPTIMER();
+ time = GETTIME();
+
+ std::cerr << is << ", " << time << ", ";
+
+
+ STARTTIMER();
+ count = tc->Count((unsigned char*) words[i].c_str());
+ STOPTIMER();
+ time = GETTIME();
+
+ std::cerr << count << ", " << time << ", ";
+
+
+ STARTTIMER();
+ count = tc->CountContains((unsigned char*) words[i].c_str());
+ STOPTIMER();
+ time = GETTIME();
+
+ std::cerr << count << ", " << time << ", ";
+
+
+ STARTTIMER();
+ res = tc->Contains((unsigned char*) words[i].c_str());
+ STOPTIMER();
+ time = GETTIME();
+
+ std::cerr << time << ", max_mem = " << get_mem() << "\n" ;
+ };
+}
int main(int argc, char**argv)
{
- string str;
- string buffer;
+ string * str = new string("Foo");
+ string * buffer = new string("Foo");
unsigned int text_size = 0;
unsigned int max_str = 0;
unsigned int num_str = 0;
double time;
-
- string words[] = { "Bakst",
- "ruminants", "morphine", "AUSTRALIA","molecule" ,"brain", "human", "blood","from", "with", " in", "the", "of",
- "a",
- "\n" };
- unsigned int NWORDS = 15;
+ FILE* file;
+ TextCollection * tc;
+
TextCollectionBuilder *tcb64 = new TextCollectionBuilder(64);
TextCollectionBuilder *tcb2 = new TextCollectionBuilder(5);
STARTTIMER();
- std::cerr << "Filling collection\n";
+ last_brk= sbrk(0);
+ std::cerr << "Filling collection\n";
while (not(cin.eof()) ){
- getline(cin,str); // Read line by line.
- str.append("\n");
+ std::getline(cin, *str ); // Read line by line.
+ str->append("\n");
- if (str.compare("----------\n") == 0 ){
- tcb64->InsertText((unsigned char*) buffer.c_str());
- tcb2->InsertText((unsigned char*) buffer.c_str());
+ if (str->compare("----------\n") == 0 ){
+ tcb64->InsertText((unsigned char*) buffer->c_str());
+ tcb2->InsertText((unsigned char*) buffer->c_str());
if (num_str % 10000 == 0){
STOPTIMER();
time = GETTIME();
std::cerr << "Added " << num_str << " strings in "
- << time << " ms\n";
+ << time << " ms, max_mem=" << get_mem() << "\n";
std::cerr.flush();
//STARTTIMER();
};
num_str++;
- if (max_str < buffer.size())
- max_str = buffer.size();
- text_size += buffer.size();
- buffer.clear();
+ if (max_str < buffer->size())
+ max_str = buffer->size();
+ text_size += buffer->size();
+ buffer->clear();
}
else
- buffer.append(str);
+ buffer->append(*str);
};
+ delete str;
+ delete buffer;
+ buffer = NULL;
+ str = NULL;
+
+ std::cerr << "Freeing text buffers : max_mem = " << get_mem() << "\n";
+
std::cerr << "Number of bytes inserted : " << text_size << "\n";
+
std::cerr << "Calling InitTextCollection() for sf=64: ";
STARTTIMER();
- TextCollection *tc64 = tcb64->InitTextCollection();
- STOPTIMER();
- time = GETTIME();
- std::cerr << time << "ms\n";
- std::cerr << "Calling InitTextCollection() for sf=5: ";
- STARTTIMER();
- TextCollection *tc2 = tcb2->InitTextCollection();
+ tc = tcb64->InitTextCollection();
STOPTIMER();
time = GETTIME();
- std::cerr << time << "ms\n";
- FILE* file;
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ delete tcb64;
+ tcb64 = NULL;
file = fopen("index_64.tc","w+");
std::cerr << "Saving to index_64.tc ";
STARTTIMER();
- tc64->Save(file);
+ tc->Save(file);
STOPTIMER();
time = GETTIME();
- std::cerr << time << "ms\n";
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
fclose(file);
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
+
+ std::cerr << "Calling InitTextCollection() for sf=5: ";
+ STARTTIMER();
+ tc = tcb2->InitTextCollection();
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ free(tcb2);
+ tcb2=NULL;
+
file = fopen("index_05.tc","w+");
std::cerr << "Saving to index_05.tc ";
STARTTIMER();
- tc2->Save(file);
+ tc->Save(file);
STOPTIMER();
time = GETTIME();
- std::cerr << time << "ms\n";
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n";
fclose(file);
-
-
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n";
- int count;
- bool is;
- TextCollection::document_result res;
- TextCollection *tc;
- tc = tc64;
- std::cerr << "Sampling rate 64\n";
- for (unsigned int num = 0; num < 2; num ++){
-
- for (unsigned int i = 0; i < NWORDS ; i++){
-
- std::cerr << "\"" << words[i] << ": ";
- STARTTIMER();
- is = tc->IsContains((unsigned char*) words[i].c_str());
- STOPTIMER();
- time = GETTIME();
-
- std::cerr << is << ", " << time << ", ";
-
-
- STARTTIMER();
- count = tc->Count((unsigned char*) words[i].c_str());
- STOPTIMER();
- time = GETTIME();
-
- std::cerr << count << ", " << time << ", ";
-
-
- STARTTIMER();
- count = tc->CountContains((unsigned char*) words[i].c_str());
- STOPTIMER();
- time = GETTIME();
-
- std::cerr << count << ", " << time << ", ";
-
-
- STARTTIMER();
- res = tc->Contains((unsigned char*) words[i].c_str());
- STOPTIMER();
- time = GETTIME();
-
- std::cerr << time << "\n";
-
-
- };
- tc = tc2;
- std::cerr << "---------------------------\n";
- std::cerr << "Sampling rate 5\n";
- };
+
+
+ std::cerr << "Loading sf=5 TextCollection ";
+ STARTTIMER();
+ file = fopen("index_05.tc","r");
+ tc = TextCollection::Load(file,5); // sample rate is not used.
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ fclose(file);
+ std::cerr << "-----------------\nSampling rate 5\n";
+ time_tc(tc);
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
+
+ std::cerr << "Loading sf=64 TextCollection ";
+ STARTTIMER();
+ file = fopen("index_64.tc","r");
+ tc = TextCollection::Load(file,64);
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ fclose(file);
+ std::cerr << "-----------------\nSampling rate 64\n";
+ time_tc(tc);
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
return 0;
}