X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=incbwt%2Fsasamples.cpp;h=6747370e6a7870639291c89a03618f6c0325027d;hb=338b9d601d0bae8ee76813852ea146ef7ba901d8;hp=8b1aef32debf4adbbcffc8a9fe66fd6c73dab1ca;hpb=40ddf9aca842bdc081b6350a4ebfe36b066c94c9;p=SXSI%2FTextCollection.git diff --git a/incbwt/sasamples.cpp b/incbwt/sasamples.cpp index 8b1aef3..6747370 100644 --- a/incbwt/sasamples.cpp +++ b/incbwt/sasamples.cpp @@ -1,10 +1,17 @@ #include #include #include +#include #include "sasamples.h" #include "misc/utils.h" +#ifdef MULTITHREAD_SUPPORT +#ifndef _GLIBCXX_PARALLEL +#include +#endif +#endif + namespace CSA { @@ -19,38 +26,91 @@ SASamples::SASamples(std::ifstream& sample_file, usint sample_rate) : this->items = indexes->getNumberOfItems(); this->integer_bits = length(this->items - 1); - this->samples = new FastBitBuffer(sample_file, this->items, this->integer_bits); + this->samples = new ReadBuffer(sample_file, this->items, this->integer_bits); this->buildInverseSamples(); } -SASamples::SASamples(usint* array, usint data_size, usint sample_rate) : +SASamples::SASamples(uint* array, usint data_size, usint sample_rate) : rate(sample_rate), size(data_size) { this->items = (this->size - 1) / this->rate + 1; this->integer_bits = length(this->items - 1); - this->samples = new FastBitBuffer(this->items, this->integer_bits); - DeltaEncoder encoder(SASamples::BLOCK_SIZE); + WriteBuffer sample_buffer(this->items, this->integer_bits); + DeltaEncoder encoder(INDEX_BLOCK_SIZE); for(usint i = 0; i < this->size; i++) { - if(array[i] % rate == 0) + if(array[i] % sample_rate == 0) { encoder.setBit(i); - this->samples->writeItem(array[i] / sample_rate); + sample_buffer.writeItem(array[i] / sample_rate); } } + this->indexes = new DeltaVector(encoder, this->size); + this->samples = sample_buffer.getReadBuffer(); + this->buildInverseSamples(); +} + +SASamples::SASamples(uint* inverse, DeltaVector* end_points, usint data_size, usint sample_rate) : + rate(sample_rate), + items(0) +{ + usint sequences = end_points->getNumberOfItems(); + + DeltaVector::Iterator iter(*(end_points)); + + // Determine the samples, insert them into a vector, and sort them. + usint start = 0, end = iter.select(0); // Closed range in padded collection. + usint seq_start = 0, seq_end = end; // Closed range in inverse SA. + std::vector* vec = new std::vector(); + for(usint i = 0; i < sequences; i++) + { + for(usint j = seq_start; j <= seq_end; j += this->rate) + { + vec->push_back(pair_type(inverse[j] - sequences - 1, this->items)); + this->items++; + } + start = nextMultipleOf(this->rate, end); + end = iter.selectNext(); + seq_start = seq_end + 2; + seq_end = seq_start + end - start; + } + #ifdef MULTITHREAD_SUPPORT + #ifdef _GLIBCXX_PARALLEL + std::sort(vec->begin(), vec->end(), __gnu_parallel::sequential_tag()); + #else + std::sort(vec->begin(), vec->end(), mcstl::sequential_tag()); + #endif + #else + std::sort(vec->begin(), vec->end()); + #endif + + // Compress the samples. + this->integer_bits = length(this->items - 1); + this->size = end + 1; + WriteBuffer sample_buffer(this->items, this->integer_bits); + DeltaEncoder encoder(INDEX_BLOCK_SIZE); + for(usint i = 0; i < this->items; i++) + { + pair_type sample = (*vec)[i]; + encoder.setBit(sample.first); + sample_buffer.writeItem(sample.second); + } + delete vec; + this->indexes = new DeltaVector(encoder, this->size); + this->samples = sample_buffer.getReadBuffer(); this->buildInverseSamples(); } -SASamples::SASamples(SASamples& index, SASamples& increment, usint* positions, usint number_of_positions) : +SASamples::SASamples(SASamples& index, SASamples& increment, usint* positions, usint number_of_positions, usint number_of_sequences) : rate(index.rate), size(index.size + increment.size), items(index.items + increment.items) { - this->mergeSamples(index, increment, positions, number_of_positions); + this->mergeSamples(index, increment, positions, number_of_positions, number_of_sequences); index.indexes = 0; index.samples = 0; @@ -69,37 +129,19 @@ SASamples::~SASamples() delete this->inverse_samples; } -void -SASamples::writeTo(std::ofstream& sample_file) -{ - this->indexes->writeTo(sample_file); - this->samples->writeBuffer(sample_file, false); -} - //-------------------------------------------------------------------------- -usint -SASamples::inverseSA(usint value) -{ - if(value >= this->size) { return this->size; } - usint i = this->inverse_samples->readItem(value / this->rate); - return this->indexes->select(i); -} - -pair_type -SASamples::getFirstSampleAfter(usint index) +void +SASamples::writeTo(std::ofstream& sample_file) const { - if(index >= this->size) { return pair_type(this->size, this->size); } - - return this->indexes->valueAfter(index); + this->indexes->writeTo(sample_file); + this->samples->writeBuffer(sample_file); } -//-------------------------------------------------------------------------- - usint -SASamples::reportSize() +SASamples::reportSize() const { - usint bytes = sizeof(*this) - sizeof(this->indexes); + usint bytes = sizeof(*this); bytes += this->indexes->reportSize(); bytes += this->samples->reportSize(); bytes += this->inverse_samples->reportSize(); @@ -111,46 +153,47 @@ SASamples::reportSize() void SASamples::buildInverseSamples() { - this->inverse_samples = new FastBitBuffer(this->items, this->integer_bits); + WriteBuffer inverse_buffer(this->items, this->integer_bits); this->samples->goToItem(0); for(usint i = 0; i < this->items; i++) { - this->inverse_samples->goToItem(this->samples->readItem()); - this->inverse_samples->writeItem(i); + inverse_buffer.goToItem(this->samples->readItem()); + inverse_buffer.writeItem(i); } + + this->inverse_samples = inverse_buffer.getReadBuffer(); } //-------------------------------------------------------------------------- - void -SASamples::mergeSamples(SASamples& index, SASamples& increment, usint* positions, usint n) +SASamples::mergeSamples(SASamples& index, SASamples& increment, usint* positions, usint n, usint skip) { - DeltaVector* first = index.indexes; - DeltaVector* second = increment.indexes; - FastBitBuffer* first_samples = index.samples; - FastBitBuffer* second_samples = increment.samples; + DeltaVector::Iterator first(*(index.indexes)); + DeltaVector::Iterator second(*(increment.indexes)); + ReadBuffer* first_samples = index.samples; + ReadBuffer* second_samples = increment.samples; - usint first_bit = first->select(0); + usint first_bit = first.select(0); bool first_finished = false; - usint second_bit = second->select(0); + usint second_bit = second.select(0); usint sum = index.items; first_samples->goToItem(0); second_samples->goToItem(0); - DeltaEncoder encoder(SASamples::BLOCK_SIZE); + DeltaEncoder encoder(INDEX_BLOCK_SIZE); this->integer_bits = length(this->items - 1); - this->samples = new FastBitBuffer(this->items, this->integer_bits); + WriteBuffer sample_buffer(this->items, this->integer_bits); for(usint i = 0; i < n; i++, first_bit++) { - while(!first_finished && first_bit < positions[i]) + while(!first_finished && first_bit < positions[i] - skip) { encoder.setBit(first_bit); - this->samples->writeItem(first_samples->readItem()); - if(first->hasNext()) + sample_buffer.writeItem(first_samples->readItem()); + if(first.hasNext()) { - first_bit = first->selectNext() + i; + first_bit = first.selectNext() + i; } else { @@ -160,18 +203,18 @@ SASamples::mergeSamples(SASamples& index, SASamples& increment, usint* positions if(i == second_bit) // positions[i] is one { - encoder.setBit(positions[i]); - this->samples->writeItem(second_samples->readItem() + sum); - second_bit = second->selectNext(); + encoder.setBit(positions[i] - skip); + sample_buffer.writeItem(second_samples->readItem() + sum); + second_bit = second.selectNext(); } } while(!first_finished) { encoder.setBit(first_bit); - this->samples->writeItem(first_samples->readItem()); - if(!first->hasNext()) { break; } - first_bit = first->selectNext() + n; + sample_buffer.writeItem(first_samples->readItem()); + if(!first.hasNext()) { break; } + first_bit = first.selectNext() + n; } delete index.indexes; @@ -182,6 +225,7 @@ SASamples::mergeSamples(SASamples& index, SASamples& increment, usint* positions delete increment.inverse_samples; this->indexes = new DeltaVector(encoder, size); + this->samples = sample_buffer.getReadBuffer(); }