X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=incbwt%2Fsasamples.h;fp=incbwt%2Fsasamples.h;h=da154d0cb738aa7f6064e436a81399089865d4f5;hb=13e254b7c0ee22dffbc7c3125cee0408f9b375da;hp=bb4eee828c6218179ccfa3aea62315d20a7edec6;hpb=e4b6bdc7cc2a1372e4d4dae50acac55cebcc7e9b;p=SXSI%2FTextCollection.git diff --git a/incbwt/sasamples.h b/incbwt/sasamples.h index bb4eee8..da154d0 100644 --- a/incbwt/sasamples.h +++ b/incbwt/sasamples.h @@ -15,37 +15,52 @@ namespace CSA class SASamples { public: - const static usint BLOCK_SIZE = 16; + const static usint INDEX_BLOCK_SIZE = 16; SASamples(std::ifstream& sample_file, usint sample_rate); - SASamples(usint* array, usint data_size, usint sample_rate); + + // These assume < 2 GB data. Use the second one when there are multiple sequences. + SASamples(uint* array, usint data_size, usint sample_rate); + SASamples(uint* inverse, DeltaVector* end_points, usint data_size, usint sample_rate); + ~SASamples(); // Destroys contents of index and increment. // We assume index and increment have same sample rate. - SASamples(SASamples& index, SASamples& increment, usint* positions, usint number_of_positions); + // positions must not containt the positions of end of sequence markers. + // number_of_sequences is subtracted from each position before the value is used. + SASamples(SASamples& index, SASamples& increment, usint* positions, usint number_of_positions, usint number_of_sequences); - void writeTo(std::ofstream& sample_file); + void writeTo(std::ofstream& sample_file) const; // Returns i such that SA[i] = value. // If SA[i] is not sampled, returns the next sampled value. (Don't try!) // Value is actual 0-based suffix array value. // Returns size if value is too large. - usint inverseSA(usint value); + inline usint inverseSA(usint value) const + { + if(value >= this->size) { return this->size; } + DeltaVector::Iterator iter(*(this->indexes)); + return iter.select(this->inverse_samples->readItemConst(value / this->rate)); + } // Returns the value of ith sample in suffix array order. - inline usint getSample(usint i) + inline usint getSample(usint i) const { - return std::min(this->samples->readItem(i) * this->rate, this->size - 1); + return std::min(this->samples->readItemConst(i) * this->rate, this->size - 1); } // Returns (ind, sample number) where ind >= index or (size, ???). - pair_type getFirstSampleAfter(usint index); + inline pair_type getFirstSampleAfter(usint index) const + { + DeltaVector::Iterator iter(*(this->indexes)); + return iter.valueAfter(index); + } - inline usint getSampleRate() { return this->rate; } - inline usint getNumberOfSamples() { return this->items; } + inline usint getSampleRate() const { return this->rate; } + inline usint getNumberOfSamples() const { return this->items; } - usint reportSize(); + usint reportSize() const; private: usint integer_bits; @@ -53,13 +68,18 @@ class SASamples DeltaVector* indexes; - FastBitBuffer* samples; - FastBitBuffer* inverse_samples; + ReadBuffer* samples; + ReadBuffer* inverse_samples; void buildInverseSamples(); // Note: contents of original samples are deleted. - void mergeSamples(SASamples& index, SASamples& increment, usint* positions, usint n); + void mergeSamples(SASamples& index, SASamples& increment, usint* positions, usint n, usint skip); + + // These are not allowed. + SASamples(); + SASamples(const SASamples&); + SASamples& operator = (const SASamples&); };