Jouni's Incremental BWT integrated into TextCollection
[SXSI/TextCollection.git] / incbwt / sasamples.cpp
1 #include <algorithm>
2 #include <fstream>
3 #include <iostream>
4
5 #include "sasamples.h"
6 #include "misc/utils.h"
7
8
9 namespace CSA
10 {
11
12
13 SASamples::SASamples(std::ifstream& sample_file, usint sample_rate) :
14   rate(sample_rate)
15 {
16   this->indexes = new DeltaVector(sample_file);
17
18   this->size = indexes->getSize();
19   this->items = indexes->getNumberOfItems();
20   this->integer_bits = length(this->items - 1);
21
22   this->samples = new FastBitBuffer(sample_file, this->items, this->integer_bits);
23   this->buildInverseSamples();
24 }
25
26 SASamples::SASamples(usint* array, usint data_size, usint sample_rate) :
27   rate(sample_rate),
28   size(data_size)
29 {
30   this->items = (this->size - 1) / this->rate + 1;
31   this->integer_bits = length(this->items - 1);
32
33   this->samples = new FastBitBuffer(this->items, this->integer_bits);
34   DeltaEncoder encoder(SASamples::BLOCK_SIZE);
35   for(usint i = 0; i < this->size; i++)
36   {
37     if(array[i] % rate == 0)
38     {
39       encoder.setBit(i);
40       this->samples->writeItem(array[i] / sample_rate);
41     }
42   }
43   this->indexes = new DeltaVector(encoder, this->size);
44
45   this->buildInverseSamples();
46 }
47
48 SASamples::SASamples(SASamples& index, SASamples& increment, usint* positions, usint number_of_positions) :
49   rate(index.rate),
50   size(index.size + increment.size),
51   items(index.items + increment.items)
52 {
53   this->mergeSamples(index, increment, positions, number_of_positions);
54
55   index.indexes = 0;
56   index.samples = 0;
57   index.inverse_samples = 0;
58   increment.indexes = 0;
59   increment.samples = 0;
60   increment.inverse_samples = 0;
61
62   this->buildInverseSamples();
63 }
64
65 SASamples::~SASamples()
66 {
67   delete this->indexes;
68   delete this->samples;
69   delete this->inverse_samples;
70 }
71
72 void
73 SASamples::writeTo(std::ofstream& sample_file)
74 {
75   this->indexes->writeTo(sample_file);
76   this->samples->writeBuffer(sample_file, false);
77 }
78
79 //--------------------------------------------------------------------------
80
81 usint
82 SASamples::inverseSA(usint value)
83 {
84   if(value >= this->size) { return this->size; }
85   usint i = this->inverse_samples->readItem(value / this->rate);
86   return this->indexes->select(i);
87 }
88
89 pair_type
90 SASamples::getFirstSampleAfter(usint index)
91 {
92   if(index >= this->size) { return pair_type(this->size, this->size); }
93
94   return this->indexes->valueAfter(index);
95 }
96
97 //--------------------------------------------------------------------------
98
99 usint
100 SASamples::reportSize()
101 {
102   usint bytes = sizeof(*this) - sizeof(this->indexes);
103   bytes += this->indexes->reportSize();
104   bytes += this->samples->reportSize();
105   bytes += this->inverse_samples->reportSize();
106   return bytes;
107 }
108
109 //--------------------------------------------------------------------------
110
111 void
112 SASamples::buildInverseSamples()
113 {
114   this->inverse_samples = new FastBitBuffer(this->items, this->integer_bits);
115   this->samples->goToItem(0);
116   for(usint i = 0; i < this->items; i++)
117   {
118     this->inverse_samples->goToItem(this->samples->readItem());
119     this->inverse_samples->writeItem(i);
120   }
121 }
122
123
124 //--------------------------------------------------------------------------
125
126
127 void
128 SASamples::mergeSamples(SASamples& index, SASamples& increment, usint* positions, usint n)
129 {
130   DeltaVector* first = index.indexes;
131   DeltaVector* second = increment.indexes;
132   FastBitBuffer* first_samples = index.samples;
133   FastBitBuffer* second_samples = increment.samples;
134
135   usint first_bit = first->select(0);
136   bool first_finished = false;
137   usint second_bit = second->select(0);
138   usint sum = index.items;
139   first_samples->goToItem(0);
140   second_samples->goToItem(0);
141
142   DeltaEncoder encoder(SASamples::BLOCK_SIZE);
143   this->integer_bits = length(this->items - 1);
144   this->samples = new FastBitBuffer(this->items, this->integer_bits);
145   for(usint i = 0; i < n; i++, first_bit++)
146   {
147     while(!first_finished && first_bit < positions[i])
148     {
149       encoder.setBit(first_bit);
150       this->samples->writeItem(first_samples->readItem());
151       if(first->hasNext())
152       {
153         first_bit = first->selectNext() + i;
154       }
155       else
156       {
157         first_finished = true;
158       }
159     }
160
161     if(i == second_bit) // positions[i] is one
162     {
163       encoder.setBit(positions[i]);
164       this->samples->writeItem(second_samples->readItem() + sum);
165       second_bit = second->selectNext();
166     }
167   }
168
169   while(!first_finished)
170   {
171     encoder.setBit(first_bit);
172     this->samples->writeItem(first_samples->readItem());
173     if(!first->hasNext()) { break; }
174     first_bit = first->selectNext() + n;
175   }
176
177   delete index.indexes;
178   delete index.samples;
179   delete index.inverse_samples;
180   delete increment.indexes;
181   delete increment.samples;
182   delete increment.inverse_samples;
183
184   this->indexes = new DeltaVector(encoder, size);
185 }
186
187
188 } // namespace CSA