1 // Test driver for text collection
14 static struct timeval t1;
15 static struct timeval t2;
18 #define STARTTIMER() (gettimeofday(&t1,NULL))
19 #define STOPTIMER() (gettimeofday(&t2,NULL))
20 #define GETTIME() (((t2.tv_sec - t1.tv_sec) * 1000000.0 + (t2.tv_usec - t1.tv_usec))/1000.0)
22 void * last_brk = NULL;
25 void * current_brk = sbrk(0);
26 long int mem = ((long int) current_brk ) - ((long int) last_brk);
27 //last_brk = current_brk;
28 return (mem/1024/1024);
31 #include "TextCollectionBuilder.h"
32 using SXSI::TextCollection;
33 using SXSI::TextCollectionBuilder;
34 string words[] = { "Bakst",
35 "ruminants", "morphine", "AUSTRALIA",
36 "molecule" ,"brain", "human", "blood","from",
37 "with", " in", "the", "of",
42 unsigned int NWORDS = 15;
45 void time_tc(TextCollection *tc){
49 TextCollection::document_result res;
50 for (unsigned int i = 0; i < NWORDS; i++){
51 std::cerr << "\"" << words[i] << "\": ";
53 is = tc->IsContains((unsigned char*) words[i].c_str());
57 std::cerr << is << ", " << time << ", ";
61 count = tc->Count((unsigned char*) words[i].c_str());
65 std::cerr << count << ", " << time << ", ";
69 count = tc->CountContains((unsigned char*) words[i].c_str());
73 std::cerr << count << ", " << time << ", ";
77 res = tc->Contains((unsigned char*) words[i].c_str());
81 std::cerr << time << ", max_mem = " << get_mem() << "\n" ;
85 int main(int argc, char**argv)
87 string * str = new string("Foo");
88 string * buffer = new string("Foo");
89 unsigned int text_size = 0;
90 unsigned int max_str = 0;
91 unsigned int num_str = 0;
97 TextCollectionBuilder *tcb64 = new TextCollectionBuilder(64);
98 TextCollectionBuilder *tcb2 = new TextCollectionBuilder(5);
103 std::cerr << "Filling collection\n";
105 while (not(cin.eof()) ){
106 std::getline(cin, *str ); // Read line by line.
109 if (str->compare("----------\n") == 0 ){
110 tcb64->InsertText((unsigned char*) buffer->c_str());
111 tcb2->InsertText((unsigned char*) buffer->c_str());
113 if (num_str % 10000 == 0){
116 std::cerr << "Added " << num_str << " strings in "
117 << time << " ms, max_mem=" << get_mem() << "\n";
123 if (max_str < buffer->size())
124 max_str = buffer->size();
125 text_size += buffer->size();
129 buffer->append(*str);
136 std::cerr << "Freeing text buffers : max_mem = " << get_mem() << "\n";
138 std::cerr << "Number of bytes inserted : " << text_size << "\n";
140 std::cerr << "Calling InitTextCollection() for sf=64: ";
142 tc = tcb64->InitTextCollection();
145 std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
149 file = fopen("index_64.tc","w+");
150 std::cerr << "Saving to index_64.tc ";
155 std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
159 std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
161 std::cerr << "Calling InitTextCollection() for sf=5: ";
163 tc = tcb2->InitTextCollection();
166 std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
171 file = fopen("index_05.tc","w+");
172 std::cerr << "Saving to index_05.tc ";
177 std::cerr << time << "ms, max_mem = " << get_mem() << "\n";
181 std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
182 std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n";
185 std::cerr << "Loading sf=5 TextCollection ";
187 file = fopen("index_05.tc","r");
188 tc = TextCollection::Load(file,5); // sample rate is not used.
191 std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
193 std::cerr << "-----------------\nSampling rate 5\n";
197 std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
199 std::cerr << "Loading sf=64 TextCollection ";
201 file = fopen("index_64.tc","r");
202 tc = TextCollection::Load(file,64);
205 std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
207 std::cerr << "-----------------\nSampling rate 64\n";
211 std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";