using std::string;
#include <sys/time.h>
#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
static struct timeval t1;
static struct timeval t2;
+
#define STARTTIMER() (gettimeofday(&t1,NULL))
#define STOPTIMER() (gettimeofday(&t2,NULL))
#define GETTIME() (((t2.tv_sec - t1.tv_sec) * 1000000.0 + (t2.tv_usec - t1.tv_usec))/1000.0)
-
+void * last_brk = NULL;
+
+long int get_mem(){
+ void * current_brk = sbrk(0);
+ long int mem = ((long int) current_brk ) - ((long int) last_brk);
+ //last_brk = current_brk;
+ return (mem/1024/1024);
+}
#include "TextCollectionBuilder.h"
using SXSI::TextCollection;
using SXSI::TextCollectionBuilder;
+string words[] = { "Bakst",
+ "ruminants", "morphine", "AUSTRALIA",
+ "molecule" ,"brain", "human", "blood","from",
+ "with", " in", "the", "of",
+ "a",
+ "\n" };
+
+
+unsigned int NWORDS = 15;
+
+
+void time_tc(TextCollection *tc){
+ double time;
+ int count;
+ bool is;
+ TextCollection::document_result res;
+ for (unsigned int i = 0; i < NWORDS; i++){
+ std::cerr << "\"" << words[i] << "\": ";
+ STARTTIMER();
+ is = tc->IsContains((unsigned char*) words[i].c_str());
+ STOPTIMER();
+ time = GETTIME();
+
+ std::cerr << is << ", " << time << ", ";
+
+
+ STARTTIMER();
+ count = tc->Count((unsigned char*) words[i].c_str());
+ STOPTIMER();
+ time = GETTIME();
+
+ std::cerr << count << ", " << time << ", ";
+
+
+ STARTTIMER();
+ count = tc->CountContains((unsigned char*) words[i].c_str());
+ STOPTIMER();
+ time = GETTIME();
+
+ std::cerr << count << ", " << time << ", ";
+
+
+ STARTTIMER();
+ res = tc->Contains((unsigned char*) words[i].c_str());
+ STOPTIMER();
+ time = GETTIME();
+
+ std::cerr << time << ", max_mem = " << get_mem() << "\n" ;
+ };
+}
int main(int argc, char**argv)
{
- string str;
- string buffer;
+ string * str = new string("Foo");
+ string * buffer = new string("Foo");
+ unsigned int text_size = 0;
unsigned int max_str = 0;
unsigned int num_str = 0;
double time;
+ FILE* file;
+ TextCollection * tc;
+
- string words[] = { "abcd", "abc", "mirrors", "attires", "mature",
- "rescue", "such", "embrace", "shipping", "ae",
- "preventions", "ab", "fe", "w" };
-
-
-
- TextCollectionBuilder *tcb = new TextCollectionBuilder(64);
-
+ TextCollectionBuilder *tcb64 = new TextCollectionBuilder(64);
+ TextCollectionBuilder *tcb2 = new TextCollectionBuilder(5);
STARTTIMER();
- std::cerr << "Filling collection\n";
- // read only 100000 strings
- while (not(cin.eof()) && num_str < 100000 ){
- getline(cin,str); // Read line by line.
- if (str.compare("----------") == 0){
- tcb->InsertText((unsigned char*) buffer.c_str());
-
- if (num_str % 10000 == 0){
- STOPTIMER();
- time = GETTIME();
- std::cerr << "Added " << num_str << " strings in "
- << time << " ms\n";
- STARTTIMER();
- };
-
- num_str++;
- if (max_str < buffer.size())
- max_str = buffer.size();
- buffer.clear();
-
- }
- else
- buffer.append(str);
- };
- std::cerr << "Calling MakeStatic()\n";
-
- TextCollection *tc = tcb->InitTextCollection();
-
- std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n";
- int count;
- bool is;
- TextCollection::document_result res;
- for (unsigned int i = 0; i < (sizeof(words)/sizeof(char*)) ; i++){
-
- STARTTIMER();
- is = tc->IsContains((unsigned char*) words[i].c_str());
- STOPTIMER();
- time = GETTIME();
+ last_brk= sbrk(0);
- std::cerr << is << ", " << time << ", ";
+ std::cerr << "Filling collection\n";
+ while (not(cin.eof()) ){
+ std::getline(cin, *str ); // Read line by line.
+ str->append("\n");
+
+ if (str->compare("----------\n") == 0 ){
+ tcb64->InsertText((unsigned char*) buffer->c_str());
+ tcb2->InsertText((unsigned char*) buffer->c_str());
+
+ if (num_str % 10000 == 0){
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << "Added " << num_str << " strings in "
+ << time << " ms, max_mem=" << get_mem() << "\n";
+ std::cerr.flush();
+ //STARTTIMER();
+ };
+
+ num_str++;
+ if (max_str < buffer->size())
+ max_str = buffer->size();
+ text_size += buffer->size();
+ buffer->clear();
+ }
+ else
+ buffer->append(*str);
+ };
+ delete str;
+ delete buffer;
+ buffer = NULL;
+ str = NULL;
- STARTTIMER();
- count = tc->Count((unsigned char*) words[i].c_str());
- STOPTIMER();
- time = GETTIME();
+ std::cerr << "Freeing text buffers : max_mem = " << get_mem() << "\n";
- std::cerr << count << ", " << time << ", ";
+ std::cerr << "Number of bytes inserted : " << text_size << "\n";
+ std::cerr << "Calling InitTextCollection() for sf=64: ";
+ STARTTIMER();
+ tc = tcb64->InitTextCollection();
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ delete tcb64;
+ tcb64 = NULL;
+
+ file = fopen("index_64.tc","w+");
+ std::cerr << "Saving to index_64.tc ";
+ STARTTIMER();
+ tc->Save(file);
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ fclose(file);
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
+
+ std::cerr << "Calling InitTextCollection() for sf=5: ";
+ STARTTIMER();
+ tc = tcb2->InitTextCollection();
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ free(tcb2);
+ tcb2=NULL;
- STARTTIMER();
- count = tc->CountContains((unsigned char*) words[i].c_str());
- STOPTIMER();
- time = GETTIME();
- std::cerr << count << ", " << time << ", ";
-
+ file = fopen("index_05.tc","w+");
+ std::cerr << "Saving to index_05.tc ";
+ STARTTIMER();
+ tc->Save(file);
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n";
+ fclose(file);
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
+ std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n";
- STARTTIMER();
- res = tc->Contains((unsigned char*) words[i].c_str());
- STOPTIMER();
- time = GETTIME();
-
- std::cerr << time << "\n";
-
-
- };
+ std::cerr << "Loading sf=5 TextCollection ";
+ STARTTIMER();
+ file = fopen("index_05.tc","r");
+ tc = TextCollection::Load(file,5); // sample rate is not used.
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ fclose(file);
+ std::cerr << "-----------------\nSampling rate 5\n";
+ time_tc(tc);
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
+
+ std::cerr << "Loading sf=64 TextCollection ";
+ STARTTIMER();
+ file = fopen("index_64.tc","r");
+ tc = TextCollection::Load(file,64);
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ fclose(file);
+ std::cerr << "-----------------\nSampling rate 64\n";
+ time_tc(tc);
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
return 0;
}