// Test driver for text collection
#include <iostream>
+#include <cstdlib>
+#include <string>
+#include "HeapProfiler.h"
using std::cout;
using std::endl;
+using std::cin;
+using std::string;
#include "TextCollection.h"
using SXSI::TextCollection;
int main()
{
- uchar *text = (uchar*) "acabab";
- TextCollection *csa = TextCollection::InitTextCollection(1);
- csa->InsertText(text);
- text = (uchar*) "abaca";
- csa->InsertText(text);
- text = (uchar*) "abacb";
- csa->InsertText(text);
+ std::string kbd;
+ string str;
+ uchar* data;
+ int i = 0 ,j = 0;
+ int heap_base = HeapProfiler::GetHeapConsumption (5); // Avoid small samplerates ;)
+ std::cerr << "Initial heap usage : " << heap_base << "\n";
+ TextCollection *csa = TextCollection::InitTextCollection();
+ heap_base = HeapProfiler::GetHeapConsumption ();
+ std::cerr << "Heap usage after InitTextCollection : " << heap_base << "\n";
- csa->MakeStatic();
-
- text = csa->GetText(0);
- cout << "Text 0: \"" << text << "\"" << endl;
- delete [] text;
- text = csa->GetText(1);
- cout << "Text 1: \"" << text << "\"" << endl;
- delete [] text;
- text = csa->GetText(2);
- cout << "Text 2: \"" << text << "\"" << endl;
- delete [] text;
-
- text = csa->GetText(2, 2, 4);
- cout << "Substring of Text 3: \"" << text << "\"" << endl;
- delete [] text;
-
- printf("n:o contains: %u\n", csa->CountContains((uchar *)"ac"));
- printf("n:o suffix: %u\n", csa->CountSuffix((uchar *)"b"));
- printf("n:o equal: %u\n", csa->CountEqual((uchar *)"acabab"));
- printf("is equal: %u\n", csa->IsEqual((uchar *)"abacb"));
+
+ while (not(cin.eof())){
+ getline(cin,str); // Read line by line.
+// cin >> str; // Read word by word.
+ data = (uchar *) str.c_str();
+ csa->InsertText(data);
+ i++;
+ j+= str.size();
+ str.clear();
+ if ( i % 1000 == 0) {
+ std::cerr << "Inserted : " << i << " strings\n";
+ std::cerr << "Number of bytes inserted : " << j << "b \n";
+ std::cerr << "Heap usage used for strings: " << HeapProfiler::GetHeapConsumption() - heap_base
+ << "bytes\n";
+ std::cerr << "Ratio is : " << (float) (HeapProfiler::GetHeapConsumption() - heap_base) / ((float) j) <<"\n";
+
+ };
- TextCollection::document_result dr;
- dr = csa->Contains((uchar*)"ab");
- printDocumentResult(dr);
+ };
- TextCollection::full_result fr;
- fr = csa->FullContains((uchar *)"ab");
- printFullResult(fr);
+/* the whole file as one string:
+ uchar *temp = Tools::GetFileContents("data.txt", 0);
+ csa->InsertText(temp);
+ delete [] temp;*/
- delete csa;
+ std::cerr << "Creating new text collection with " << i << " strings (total " << j/1024 << " kb)\n";
+ std::cerr << "Before MakeStatic() [press enter]\n";
+ std::cin >> kbd;
+ // This will print the maximum mem usage during construction time:
+ std::cerr << "max heap usage: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
+ csa->MakeStatic();
+ std::cerr << "After MakeStatic() [press enter]\n";
+ // This will print the maximum mem usage during MakeStatic():
+ std::cerr << "max heap usage: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
+ std::cin >> kbd;
+ std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
+ delete csa;
+ std::cerr << "After Delete [press enter]\n";
+ std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
+ std::cin >> kbd;
+ return 0;
}