TextCollection_obs = TextCollection.o TextCollectionBuilder.o FMIndexBuilder.o RLCSABuilder.o FMIndex.o Tools.o \
TextStorage.o Query.o EditDistance.o ResultSet.o
-TCDebug_obs = bittree.o rbtree.o dynFMI.o
+TCDebug_obs = bittree.o rbtree.o dynFMI.o
TEXTCOLLECTION_A=libTextCollection.a
timeTextCollection: timeTextCollection.o $(TextCollection_obs) $(LIBS) $(TCDebug_obs)
$(CC) -o timeTextCollection timeTextCollection.o $(TextCollection_obs) $(TCDebug_obs)
+createIndex: createIndex.o $(TextCollection_obs) $(LIBS)
+ $(CC) -o createIndex createIndex.o $(TextCollection_obs) $(LIBS)
+
test2dRange: test2dRange.o ${LIBCDSA}
$(CC) -o test2dRange test2dRange.o ${LIBCDSA}
--- /dev/null
+// Test driver for text collection
+#include <iostream>
+#include <cstdlib>
+#include <string>
+using std::cout;
+using std::endl;
+using std::cin;
+using std::string;
+#include <sys/time.h>
+#include <time.h>
+#include <stdio.h>
+#include <unistd.h>
+
+static struct timeval t1;
+static struct timeval t2;
+
+
+#define STARTTIMER() (gettimeofday(&t1,NULL))
+#define STOPTIMER() (gettimeofday(&t2,NULL))
+#define GETTIME() (((t2.tv_sec - t1.tv_sec) * 1000000.0 + (t2.tv_usec - t1.tv_usec))/1000.0)
+
+void * last_brk = NULL;
+
+long int get_mem(){
+ void * current_brk = sbrk(0);
+ long int mem = ((long int) current_brk ) - ((long int) last_brk);
+ //last_brk = current_brk;
+ return (mem/1024/1024);
+}
+
+#include "TextCollectionBuilder.h"
+using SXSI::TextCollection;
+using SXSI::TextCollectionBuilder;
+
+
+
+int main(int argc, char**argv)
+{
+ string str = string("");
+ unsigned int text_size = 0;
+ unsigned int max_str = 0;
+ unsigned int num_str = 0;
+ double time;
+ FILE* file;
+ TextCollection * tc;
+
+ TextCollectionBuilder *tcb =
+ TextCollectionBuilder::create(64, TextCollectionBuilder::index_type_swcsa);
+
+ STARTTIMER();
+ last_brk= sbrk(0);
+
+ std::cerr << "Filling collection\n";
+
+ while (not(cin.eof()) ){
+ std::getline(cin, str); // Read line by line.
+ if (str.compare("------") != 0 ){
+ if (!str.empty())
+ tcb->InsertText((unsigned char*) str.c_str());
+
+
+ if (num_str % 10000 == 0){
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << "Added " << num_str << " strings in "
+ << time << " ms, max_mem=" << get_mem() << "\n";
+ std::cerr.flush();
+ //STARTTIMER();
+ };
+
+ num_str++;
+ if (max_str < str.size())
+ max_str = str.size();
+ text_size += str.size();
+ str.clear();
+ }
+ else
+ str.clear();
+ };
+
+ std::cerr << "Number of bytes inserted : " << text_size << "\n";
+
+ std::cerr << "Calling InitTextCollection() for sf=64: ";
+ STARTTIMER();
+ tc = tcb->InitTextCollection();
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ delete tcb;
+ tcb = NULL;
+
+ file = fopen("index_64.tc","w+");
+ std::cerr << "Saving to index_64.tc ";
+ STARTTIMER();
+ tc->Save(file,"index_64.tc");
+ STOPTIMER();
+ time = GETTIME();
+ std::cerr << time << "ms, max_mem = " << get_mem() << "\n" ;
+ fclose(file);
+ delete tc;
+ tc = NULL;
+ std::cerr << "Freeing memory : max_mem = " << get_mem() << "\n";
+ std::cerr << "Loading TextCollection from saved file ... ";
+ file = fopen("index_64.tc","r");
+ tc = TextCollection::Load(file, "index_64.tc", TextCollection::index_mode_default);
+ std::cerr << "ok\n";
+ delete tc;
+
+ return 0;
+}