1 // Test driver for text collection
13 static struct timeval t1;
14 static struct timeval t2;
16 #define STARTTIMER() (gettimeofday(&t1,NULL))
17 #define STOPTIMER() (gettimeofday(&t2,NULL))
18 #define GETTIME() (((t2.tv_sec - t1.tv_sec) * 1000000.0 + (t2.tv_usec - t1.tv_usec))/1000.0)
22 #include "TextCollectionBuilder.h"
23 using SXSI::TextCollection;
24 using SXSI::TextCollectionBuilder;
27 int main(int argc, char**argv)
31 unsigned int text_size = 0;
32 unsigned int max_str = 0;
33 unsigned int num_str = 0;
36 string words[] = { "Bakst",
37 "ruminants", "morphine", "AUSTRALIA","molecule" ,"brain", "human", "blood","from", "with", " in", "the", "of",
40 unsigned int NWORDS = 15;
42 TextCollectionBuilder *tcb64 = new TextCollectionBuilder(64);
43 TextCollectionBuilder *tcb2 = new TextCollectionBuilder(5);
46 std::cerr << "Filling collection\n";
49 while (not(cin.eof()) ){
50 getline(cin,str); // Read line by line.
53 if (str.compare("----------\n") == 0 ){
54 tcb64->InsertText((unsigned char*) buffer.c_str());
55 tcb2->InsertText((unsigned char*) buffer.c_str());
57 if (num_str % 10000 == 0){
60 std::cerr << "Added " << num_str << " strings in "
67 if (max_str < buffer.size())
68 max_str = buffer.size();
69 text_size += buffer.size();
75 std::cerr << "Number of bytes inserted : " << text_size << "\n";
76 std::cerr << "Calling InitTextCollection() for sf=64: ";
78 TextCollection *tc64 = tcb64->InitTextCollection();
81 std::cerr << time << "ms\n";
82 std::cerr << "Calling InitTextCollection() for sf=5: ";
84 TextCollection *tc2 = tcb2->InitTextCollection();
87 std::cerr << time << "ms\n";
90 file = fopen("index_64.tc","w+");
91 std::cerr << "Saving to index_64.tc ";
96 std::cerr << time << "ms\n";
99 file = fopen("index_05.tc","w+");
100 std::cerr << "Saving to index_05.tc ";
105 std::cerr << time << "ms\n";
109 std::cerr << "Statistics: " << num_str << " strings, " << max_str << " = max length\n";
112 TextCollection::document_result res;
115 std::cerr << "Sampling rate 64\n";
116 for (unsigned int num = 0; num < 2; num ++){
118 for (unsigned int i = 0; i < NWORDS ; i++){
120 std::cerr << "\"" << words[i] << ": ";
122 is = tc->IsContains((unsigned char*) words[i].c_str());
126 std::cerr << is << ", " << time << ", ";
130 count = tc->Count((unsigned char*) words[i].c_str());
134 std::cerr << count << ", " << time << ", ";
138 count = tc->CountContains((unsigned char*) words[i].c_str());
142 std::cerr << count << ", " << time << ", ";
146 res = tc->Contains((unsigned char*) words[i].c_str());
150 std::cerr << time << "\n";
155 std::cerr << "---------------------------\n";
156 std::cerr << "Sampling rate 5\n";