X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=incbwt%2Fbits%2Fvectors.h;fp=incbwt%2Fbits%2Fvectors.h;h=bc0aac0120b7009892841fb2b4f72f6dd317c83a;hb=13e254b7c0ee22dffbc7c3125cee0408f9b375da;hp=dfd4a21a20dc61a7bd417395f8970937aff301af;hpb=e4b6bdc7cc2a1372e4d4dae50acac55cebcc7e9b;p=SXSI%2FTextCollection.git diff --git a/incbwt/bits/vectors.h b/incbwt/bits/vectors.h index dfd4a21..bc0aac0 100644 --- a/incbwt/bits/vectors.h +++ b/incbwt/bits/vectors.h @@ -1,9 +1,6 @@ #ifndef VECTORS_H #define VECTORS_H -#include "deltavector.h" -#include "rlevector.h" - namespace CSA { @@ -14,9 +11,150 @@ namespace CSA The original vectors are deleted. */ -RLEVector* mergeVectors(RLEVector* first, RLEVector* second, usint* positions, usint n, usint size, usint block_size); +template +void +handleOne(E& encoder, pair_type& run, usint position) +{ + if(run.second == 0) + { + run.first = position; + run.second = 1; + return; + } + if(position == run.first + run.second) + { + run.second++; + return; + } + encoder.setRun(run.first, run.second); + run.first = position; + run.second = 1; +} + + +template +void +handleRun(E& encoder, pair_type& run, pair_type& next, usint limit) +{ + if(run.second == 0) + { + run.first = next.first; + run.second = std::min(limit - run.first, next.second); + next.first += run.second; + next.second -= run.second; + return; + } + + if(next.first == run.first + run.second) + { + usint cont = std::min(limit - next.first, next.second); + run.second += cont; + next.first += cont; + next.second -= cont; + return; + } + + encoder.setRun(run.first, run.second); + run.first = next.first; + run.second = std::min(limit - run.first, next.second);; + next.first += run.second; + next.second -= run.second; +} + + +template +V* +mergeVectors(V* first, V* second, usint* positions, usint n, usint size, usint block_size) +{ + if((first == 0 && second == 0) || positions == 0) { return 0; } + + I* first_iter = 0; + I* second_iter = 0; + + pair_type first_run; + bool first_finished; + if(first == 0) + { + first_run = pair_type(size, 0); + first_finished = true; + } + else + { + first_iter = new I(*first); + first_run = first_iter->selectRun(0, size); + first_run.second++; + first_finished = false; + } + + usint second_bit; + if(second == 0) + { + second_bit = n; + } + else + { + second_iter = new I(*second); + second_bit = second_iter->select(0); + } + + E encoder(block_size); + pair_type run = pair_type(size, 0); + for(usint i = 0; i < n; i++, first_run.first++) + { + while(!first_finished && first_run.first < positions[i]) + { + handleRun(encoder, run, first_run, positions[i]); + if(first_run.second == 0) + { + if(first_iter->hasNext()) + { + first_run = first_iter->selectNextRun(size); + first_run.first += i; + first_run.second++; + } + else + { + first_finished = true; + } + } + } + + if(i == second_bit) // positions[i] is one + { + handleOne(encoder, run, positions[i]); + second_bit = second_iter->selectNext(); + } + else // positions[i] is zero + { + if(run.second != 0) + { + encoder.setRun(run.first, run.second); + run.second = 0; + } + } + } + + while(!first_finished) + { + handleRun(encoder, run, first_run, size); + if(first_iter->hasNext()) + { + first_run = first_iter->selectNextRun(size); + first_run.first += n; + first_run.second++; + } + else { break; } + } + + if(run.second != 0) + { + encoder.setRun(run.first, run.second); + } -DeltaVector* mergeVectors(DeltaVector* first, DeltaVector* second, usint* positions, usint n, usint size, usint block_size); + delete first_iter; delete second_iter; + delete first; delete second; + return new V(encoder, size); +} } // namespace CSA