5 #include "../misc/definitions.h"
11 std::string gapChars = " =-_";
14 concatenateStrain(std::string &text, std::ifstream &file, usint strain)
17 file.seekg(0, std::ios::beg);
23 if(line.empty() || line[0] == '>') { continue; }
24 char t = toupper(line[strain * 2]);
25 if(gapChars.find(t) == std::string::npos) { text.push_back(t); }
31 main(int argc, char** argv)
33 std::cout << "Extracting concatenated text from alignment" << std::endl;
36 std::cout << "Usage: extract_text alignment_file output_file" << std::endl;
40 std::cout << "Alignment file: " << argv[1] << std::endl;
41 std::ifstream alignment_file(argv[1], std::ios_base::binary);
44 std::cerr << "Error opening alignment file!" << std::endl;
48 std::cout << "Output file: " << argv[2] << std::endl;
49 std::ofstream output_file(argv[2], std::ios_base::binary);
52 std::cerr << "Error creating output file!" << std::endl;
55 std::cout << std::endl;
58 std::getline(alignment_file, line); // Skipping header line.
59 std::getline(alignment_file, line);
60 usint strains = line.size() / 2 - 1;
61 std::cout << "Number of strains: " << strains << std::endl;
64 concatenateStrain(line, alignment_file, 0);
65 usint n = line.size();
66 std::cout << "Reference sequence length: " << n << std::endl;
68 for(usint i = 1; i <= strains; i++)
70 concatenateStrain(line, alignment_file, i);
72 usint N = line.size() + 1;
73 std::cout << "Total length: " << N << std::endl;
75 output_file.write(line.c_str(), N - 1);
77 output_file.write(&end, 1);
80 alignment_file.close();