+ string str;
+ uchar* data;
+ int i = 0 ,j = 0;
+ int heap_base = HeapProfiler::GetHeapConsumption();
+ std::cerr << "Initial heap usage : " << heap_base << "\n";
+ TextCollectionBuilder *tcb = TextCollectionBuilder::create(5);
+ heap_base = HeapProfiler::GetHeapConsumption ();
+ std::cerr << "Heap usage after InitTextCollection : " << heap_base << "\n";
+ Tools::StartTimer();
+
+ while (not(cin.eof())){
+ getline(cin,str); // Read line by line.
+// cin >> str; // Read word by word.
+ data = (uchar *) str.c_str();
+ if (str.size() == 0)
+ continue;
+
+ tcb->InsertText(data);
+ i++;
+ j+= str.size();
+ str.clear();
+ if ( i % 100000 == 0) {
+ std::cerr << "Inserted : " << i << " strings\n";
+ std::cerr << "Number of bytes inserted : " << j << "b \n";
+ std::cerr << "Heap usage used for strings: " << HeapProfiler::GetHeapConsumption() - heap_base
+ << "bytes\n";
+ std::cerr << "Ratio is : " << (float) (HeapProfiler::GetHeapConsumption() - heap_base) / ((float) j) <<"\n";
+
+ };
+
+ };
+/**/
+ //the whole file as 20 strings:
+ /* uchar *temp = Tools::GetFileContents("data/english.100MB", 0);
+ ulong n = strlen((char *)temp);
+ std::cout << "n = " << n << std::endl;
+ ulong offset = n/40;
+ uchar *it = temp;
+ for (i = 0; i < 5; ++i)
+ {
+ it[offset] = '\0';
+ tcb->InsertText(it);
+ std::cout << "inserted " << strlen((char *)it) << " bytes." << std::endl;
+ it += offset +1;
+ }
+ it -= offset+1;