X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=testTextCollection.cpp;h=59b6b43c11f2d64ef305605e0ecdf67c2b3fb912;hb=54b24bcc08f601be0c264fefc65af50bfb3837e5;hp=d89a3285409064d2e5aaf2464020208e72131810;hpb=ccc18959e93d8986c0ce81209865a0a2b5f42be6;p=SXSI%2FTextCollection.git diff --git a/testTextCollection.cpp b/testTextCollection.cpp index d89a328..59b6b43 100644 --- a/testTextCollection.cpp +++ b/testTextCollection.cpp @@ -1,9 +1,14 @@ // Test driver for text collection #include +#include +#include using std::cout; using std::endl; +using std::cin; +using std::string; #include "TextCollection.h" +#include "HeapProfiler.h" using SXSI::TextCollection; void printDocumentResult(TextCollection::document_result dr) @@ -28,47 +33,55 @@ void printFullResult(TextCollection::full_result fr) int main() { - uchar *text = (uchar*) "acabab"; - TextCollection *csa = TextCollection::InitTextCollection(1); - csa->InsertText(text); - text = (uchar*) "abaca"; - csa->InsertText(text); - text = (uchar*) "abacb"; - csa->InsertText(text); + std::string kbd; + string str; + uchar* data; + int i = 0 ,j = 0; + int heap_base = HeapProfiler::GetHeapConsumption(); + std::cerr << "Initial heap usage : " << heap_base << "\n"; + TextCollection *csa = TextCollection::InitTextCollection(5); // Avoid small samplerates ;) + heap_base = HeapProfiler::GetHeapConsumption (); + std::cerr << "Heap usage after InitTextCollection : " << heap_base << "\n"; - csa->MakeStatic(); -// FILE *pFile = fopen ( "mysave.txt" , "rb" ); -// csa->Load(pFile); - - text = csa->GetText(0); - cout << "Text 0: \"" << text << "\"" << endl; - delete [] text; - text = csa->GetText(1); - cout << "Text 1: \"" << text << "\"" << endl; - delete [] text; - text = csa->GetText(2); - cout << "Text 2: \"" << text << "\"" << endl; - delete [] text; - - text = csa->GetText(2, 2, 4); - cout << "Substring of Text 3: \"" << text << "\"" << endl; - delete [] text; - - printf("n:o contains: %u\n", csa->CountContains((uchar *)"ac")); - printf("n:o suffix: %u\n", csa->CountSuffix((uchar *)"b")); - printf("n:o equal: %u\n", csa->CountEqual((uchar *)"acabab")); - printf("is equal: %u\n", csa->IsEqual((uchar *)"abacb")); - - TextCollection::document_result dr; - dr = csa->Contains((uchar*)"ab"); - printDocumentResult(dr); - TextCollection::full_result fr; - fr = csa->FullContains((uchar *)"ab"); - printFullResult(fr); + while (not(cin.eof())){ + getline(cin,str); // Read line by line. +// cin >> str; // Read word by word. + data = (uchar *) str.c_str(); + csa->InsertText(data); + i++; + j+= str.size(); + str.clear(); + if ( i % 1000 == 0) { + std::cerr << "Inserted : " << i << " strings\n"; + std::cerr << "Number of bytes inserted : " << j << "b \n"; + std::cerr << "Heap usage used for strings: " << HeapProfiler::GetHeapConsumption() - heap_base + << "bytes\n"; + std::cerr << "Ratio is : " << (float) (HeapProfiler::GetHeapConsumption() - heap_base) / ((float) j) <<"\n"; + + }; + + }; -// FILE *pFile2 = fopen ( "mysave.txt" , "wb" ); -// csa->Save(pFile2); +/* the whole file as one string: + uchar *temp = Tools::GetFileContents("data.txt", 0); + csa->InsertText(temp); + delete [] temp;*/ - delete csa; + std::cerr << "Creating new text collection with " << i << " strings (total " << j/1024 << " kb)\n"; + std::cerr << "Before MakeStatic() [press enter]\n"; + std::cin >> kbd; + // This will print the maximum mem usage during construction time: + std::cerr << "max heap usage: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; + csa->MakeStatic(); + std::cerr << "After MakeStatic() [press enter]\n"; + // This will print the maximum mem usage during MakeStatic(): + std::cerr << "max heap usage: " << HeapProfiler::GetMaxHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; + std::cin >> kbd; + std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; + delete csa; + std::cerr << "After Delete [press enter]\n"; + std::cerr << "heap usage: " << HeapProfiler::GetHeapConsumption()/(1024*1024) << " Mbytes" << std::endl; + std::cin >> kbd; + return 0; }