3 #include "rlcsa_builder.h"
10 RLCSABuilder::RLCSABuilder(usint _block_size, usint _sample_rate, usint _buffer_size) :
11 block_size(_block_size), sample_rate(_sample_rate), buffer_size(_buffer_size),
17 RLCSABuilder::~RLCSABuilder()
20 delete[] this->buffer;
23 //--------------------------------------------------------------------------
26 RLCSABuilder::insertSequence(char* sequence, usint length, bool delete_sequence)
28 if(sequence == 0 || length == 0 || !this->ok)
30 if(delete_sequence) { delete[] sequence; }
36 clock_t start = clock();
37 RLCSA* temp = new RLCSA((uchar*)sequence, length, this->block_size, this->sample_rate, false, false);
38 this->build_time += clock() - start;
39 this->addRLCSA(temp, (uchar*)sequence, length + 1, delete_sequence);
43 if(this->buffer_size - this->chars > length)
45 memcpy(this->buffer + this->chars, sequence, length);
46 if(delete_sequence) { delete[] sequence; }
47 this->chars += length;
48 this->buffer[this->chars] = 0;
54 this->buffer = new uchar[this->buffer_size];
55 if(length >= this->buffer_size - 1)
57 clock_t start = clock();
58 RLCSA* temp = new RLCSA((uchar*)sequence, length, this->block_size, this->sample_rate, false, false);
59 this->build_time += clock() - start;
60 this->addRLCSA(temp, (uchar*)sequence, length + 1, delete_sequence);
64 memcpy(this->buffer + this->chars, sequence, length);
65 if(delete_sequence) { delete[] sequence; }
66 this->chars += length;
67 this->buffer[this->chars] = 0;
74 RLCSABuilder::getRLCSA()
76 if(this->chars > 0) { this->flush(); }
78 RLCSA* temp = this->index;
85 RLCSABuilder::getBWT(usint& length)
90 if(this->buffer_size > 0) { this->buffer = new uchar[this->buffer_size]; }
93 if(this->index == 0 || !(this->ok))
99 length = this->index->getSize() + this->index->getNumberOfSequences();
100 return (char*)(this->index->readBWT());
110 RLCSABuilder::getBuildTime()
112 return this->build_time / (double)CLOCKS_PER_SEC;
116 RLCSABuilder::getSearchTime()
118 return this->search_time / (double)CLOCKS_PER_SEC;
122 RLCSABuilder::getMergeTime()
124 return this->merge_time / (double)CLOCKS_PER_SEC;
127 //--------------------------------------------------------------------------
130 RLCSABuilder::flush()
132 clock_t start = clock();
133 RLCSA* temp = new RLCSA(this->buffer, this->chars, this->block_size, this->sample_rate, true, (this->index == 0));
134 this->build_time += clock() - start;
135 this->addRLCSA(temp, this->buffer, this->chars, true);
136 this->buffer = 0; this->chars = 0;
140 RLCSABuilder::addRLCSA(RLCSA* increment, uchar* sequence, usint length, bool delete_sequence)
144 clock_t start = clock();
146 usint* positions = new usint[length];
148 for(usint i = 0; i < length - 1; i++)
152 this->index->reportPositions(&(sequence[begin]), i - begin, &(positions[begin]));
156 this->index->reportPositions(&(sequence[begin]), length - 1 - begin, &(positions[begin]));
158 std::sort(positions, positions + length);
159 for(usint i = 0; i < length; i++)
161 positions[i] += i + 1; // +1 because the insertion will be after positions[i]
163 if(delete_sequence) { delete[] sequence; }
165 clock_t mark = clock();
166 this->search_time += mark - start;
168 RLCSA* merged = new RLCSA(*(this->index), *increment, positions, this->block_size);
172 this->index = merged;
174 this->merge_time += clock() - mark;
178 this->index = increment;
181 this->ok &= this->index->isOk();
185 RLCSABuilder::reset()
189 if(this->buffer_size != 0)
191 this->buffer = new uchar[this->buffer_size];