12 This program writes run-length encoded PLCP of the collection into a file.
17 countRuns(usint prev, uchar* buffer, usint length)
21 for(usint i = 0; i < length; i++)
26 if(buffer[i] != 0) { runs++; }
35 main(int argc, char** argv)
37 std::cout << "RLCSA to BWT converter" << std::endl;
40 std::cout << "Usage: read_bwt base_name [buffer_size]" << std::endl;
44 std::string base_name = argv[1];
45 std::string bwt_name = base_name + ".bwt";
46 std::cout << "BWT: " << bwt_name << std::endl;
47 std::ofstream bwt_file(bwt_name.c_str(), std::ios_base::binary);
50 std::cerr << "Error creating BWT file!" << std::endl;
53 std::cout << std::endl;
55 RLCSA rlcsa(base_name);
56 clock_t start = clock();
57 usint buffer_size = 0;
58 if(argc > 2) { buffer_size = atoi(argv[2]); }
59 usint n = rlcsa.getSize() + rlcsa.getNumberOfSequences();
61 usint runs = 0, prev = CHARS;
64 for(usint i = 0; i < n; i += buffer_size)
66 pair_type range(i, std::min(i + buffer_size - 1, n - 1));
67 uchar* bwt = rlcsa.readBWT(range);
70 runs += countRuns(prev, bwt, length(range));
71 prev = bwt[length(range) - 1];
72 bwt_file.write((char*)bwt, length(range));
79 uchar* bwt = rlcsa.readBWT();
82 runs = countRuns(prev, bwt, n);
83 bwt_file.write((char*)bwt, n);
88 clock_t stop = clock();
89 double time = ((stop - start) / (double)CLOCKS_PER_SEC);
90 double megabytes = n / (double)MEGABYTE;
91 std::cout << megabytes << " megabytes in " << time << " seconds (" << (megabytes / time) << " MB/s)" << std::endl;
92 std::cout << std::endl;
94 // Testing direct reporting of the number of runs.
95 // This is as expensive as reading the BWT.
96 std::cout << "Number of runs: " << runs << std::endl;
97 runs = rlcsa.countRuns();
98 std::cout << "Number of runs (direct count): " << runs << std::endl;
99 std::cout << std::endl;