6 #include "rlcsa_builder.h"
7 #include "misc/utils.h"
15 lineByLineRLCSA(std::string base_name, usint block_size, usint sample_rate, usint buffer_size)
17 Parameters parameters;
18 parameters.set(RLCSA_BLOCK_SIZE.first, block_size);
19 parameters.set(SAMPLE_RATE.first, sample_rate);
23 parameters.set(SUPPORT_LOCATE.first, 1);
24 parameters.set(SUPPORT_DISPLAY.first, 1);
28 parameters.set(SUPPORT_LOCATE.first, 0);
29 parameters.set(SUPPORT_DISPLAY.first, 0);
32 std::string parameters_name = base_name + PARAMETERS_EXTENSION;
34 parameters.write(parameters_name);
36 std::cout << "Input: " << base_name << std::endl;
37 std::ifstream input_file(base_name.c_str(), std::ios_base::binary);
40 std::cerr << "Error opening input file!" << std::endl;
43 std::cout << "Buffer size: " << buffer_size << " MB" << std::endl;
44 std::cout << std::endl;
46 double start = readTimer();
47 RLCSABuilder builder(parameters.get(RLCSA_BLOCK_SIZE), parameters.get(SAMPLE_RATE), buffer_size * MEGABYTE);
49 usint lines = 0, total = 0;
52 char buffer[16384]; // FIXME What if lines are longer? Probably fails.
53 input_file.getline(buffer, 16384);
54 usint chars = input_file.gcount();
55 lines++; total += chars;
56 if(chars >= 16383) { std::cout << lines << ": " << chars << " chars read!" << std::endl; }
57 if(chars > 1) { builder.insertSequence(buffer, chars - 1, false); }
63 rlcsa = builder.getRLCSA();
64 rlcsa->writeTo(base_name);
68 std::cerr << "Error: RLCSA construction failed!" << std::endl;
72 double time = readTimer() - start;
73 double build_time = builder.getBuildTime();
74 double search_time = builder.getSearchTime();
75 double sort_time = builder.getSortTime();
76 double merge_time = builder.getMergeTime();
78 double megabytes = rlcsa->getSize() / (double)MEGABYTE;
79 usint sequences = rlcsa->getNumberOfSequences();
80 std::cout << sequences << " sequences" << std::endl;
81 std::cout << megabytes << " megabytes in " << time << " seconds (" << (megabytes / time) << " MB/s)" << std::endl;
82 std::cout << "(build " << build_time << " s, search " << search_time << "s, sort " << sort_time << " s, merge " << merge_time << " s)" << std::endl;
83 std::cout << std::endl;
91 main(int argc, char** argv)
93 std::cout << "Line-by-line RLCSA builder" << std::endl;
96 std::cout << "Usage: build_rlcsa base_name buffer_size [block_size [sample_rate]]" << std::endl;
99 std::cout << std::endl;
101 int name_arg = 1, buffer_arg = 2, block_arg = 3, sample_arg = 4;
102 usint block_size = (argc > block_arg ? atoi(argv[block_arg]) : RLCSA_BLOCK_SIZE.second);
103 usint sample_rate = (argc > sample_arg ? atoi(argv[sample_arg]) : SAMPLE_RATE.second);
104 return lineByLineRLCSA(argv[name_arg], block_size, sample_rate, atoi(argv[buffer_arg]));