#include <iostream>
#include <cstdlib>
#include <string>
-#include "HeapProfiler.h"
using std::cout;
using std::endl;
using std::cin;
using std::string;
-#include "TextCollection.h"
+#include "TextCollectionBuilder.h"
+#include "HeapProfiler.h"
+using SXSI::TextCollectionBuilder;
using SXSI::TextCollection;
void printDocumentResult(TextCollection::document_result dr)
int i = 0 ,j = 0;
int heap_base = HeapProfiler::GetHeapConsumption();
std::cerr << "Initial heap usage : " << heap_base << "\n";
- TextCollection *csa = TextCollection::InitTextCollection(5); // Avoid small samplerates ;)
+ TextCollectionBuilder *tcb = new TextCollectionBuilder(32);
heap_base = HeapProfiler::GetHeapConsumption ();
std::cerr << "Heap usage after InitTextCollection : " << heap_base << "\n";
-
+ Tools::StartTimer();
while (not(cin.eof())){
getline(cin,str); // Read line by line.
// cin >> str; // Read word by word.
data = (uchar *) str.c_str();
- csa->InsertText(data);
+ if (str.size() == 0)
+ continue;
+
+ tcb->InsertText(data);
i++;
j+= str.size();
str.clear();
- if ( i % 1000 == 0) {
+ if ( i % 100000 == 0) {
std::cerr << "Inserted : " << i << " strings\n";
std::cerr << "Number of bytes inserted : " << j << "b \n";
std::cerr << "Heap usage used for strings: " << HeapProfiler::GetHeapConsumption() - heap_base
};
};
+/**/
+ //the whole file as 20 strings:
+ /* uchar *temp = Tools::GetFileContents("data/english.100MB", 0);
+ ulong n = strlen((char *)temp);
+ std::cout << "n = " << n << std::endl;
+ ulong offset = n/40;
+ uchar *it = temp;
+ for (i = 0; i < 5; ++i)
+ {
+ it[offset] = '\0';
+ tcb->InsertText(it);
+ std::cout << "inserted " << strlen((char *)it) << " bytes." << std::endl;
+ it += offset +1;
+ }
+ it -= offset+1;
-/* the whole file as one string:
- uchar *temp = Tools::GetFileContents("data.txt", 0);
- csa->InsertText(temp);
+ if (it > temp + n)
+ std::cout << "over bounds" << std::endl;
delete [] temp;*/
+
+ HeapProfiler::ResetMaxHeapConsumption();
std::cerr << "Creating new text collection with " << i << " strings (total " << j/1024 << " kb)\n";
std::cerr << "Before MakeStatic() [press enter]\n";
- std::cin >> kbd;
+ //std::cin >> kbd;
// This will print the maximum mem usage during construction time:
std::cerr << "max heap usage: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
- csa->MakeStatic();
+ TextCollection* tc = tcb->InitTextCollection();
+ delete tcb; tcb = 0;
std::cerr << "After MakeStatic() [press enter]\n";
// This will print the maximum mem usage during MakeStatic():
std::cerr << "max heap usage: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
- std::cin >> kbd;
+ //std::cin >> kbd;
std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
- delete csa;
+ delete tc;
std::cerr << "After Delete [press enter]\n";
- std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl;
- std::cin >> kbd;
+ std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption() << " bytes" << std::endl;
+ //std::cin >> kbd;
return 0;
}