ac619a164a4a36eeb5b030811ffdb64f34020cd7
[SXSI/TextCollection.git] / swcsa / intIndex / psiHuffmanRLE.c
1 #include "psiHuffmanRLE.h"
2
3 // IMPLEMENTACION DAS FUNCIONS
4
5 void destroyHuffmanCompressedPsi(HuffmanCompressedPsi *compressedPsi) {
6         freeHuff2(compressedPsi->diffsHT);
7         free(compressedPsi->samples);
8         free(compressedPsi->samplePointers);
9         free (compressedPsi->stream);
10 }
11
12 HuffmanCompressedPsi huffmanCompressPsi(unsigned int *Psi, unsigned int psiSize, unsigned int T, unsigned int nS) {
13         
14         HuffmanCompressedPsi cPsi;
15         
16         int absolute_value;
17         register unsigned int index, ptr, samplesPtr, samplePointersPtr;
18         unsigned int runLenght, binaryLenght;
19         
20         int *diffs;     
21         unsigned int *huffmanDst;
22         
23         // Estructuras da funcion comprimida (para logo asignar)
24         // Tam�n se podian almacenar directamente
25         THuff diffsHT;
26         unsigned int numberOfSamples;
27         unsigned int *samples;
28         unsigned int sampleSize;
29         unsigned int *samplePointers;
30         unsigned int pointerSize;
31         unsigned int *stream;
32         unsigned int streamSize;
33         
34         // Variables que marcan os intervalos dentro do vector de frecuencias
35         unsigned int runLenghtStart = nS - 64 - T;      // Inicio das Runs
36         unsigned int negStart = nS - 64;                        // Inicio dos Negativos
37         unsigned int bigStart = nS - 32;                        // Inicio dos Grandes (>runLenghtStart)
38         
39         // Para estadistica
40         unsigned int totalSize;
41         
42         // Reservamos espacio para a distribuci�n de valores de Psi
43         huffmanDst = (unsigned int *)malloc(sizeof(int)*nS);
44         for(index=0;index<nS;index++) huffmanDst[index]=0;
45         
46         // Inicializamos diferencias    
47         diffs = (int *)malloc(sizeof(int)*psiSize);     
48         diffs[0] = 0;
49         for(index=1; index<psiSize; index++) 
50                 diffs[index] = Psi[index] - Psi[index-1];
51         
52         // Calculamos a distribucion de frecuencias
53         runLenght = 0;
54         for(index=0; index<psiSize; index++) {
55                 
56                 if(index%T) {
57                         
58                         if(diffs[index]==1) {
59                                 runLenght++;
60                         } else {        // Non estamos nun run
61                                 if(runLenght) {
62                                         huffmanDst[runLenght+runLenghtStart]++;
63                                         runLenght = 0;
64                                 }
65                                 if(diffs[index]>1 && diffs[index]<runLenghtStart) 
66                                         huffmanDst[diffs[index]]++;
67                                 else
68                                         if(diffs[index]<0) {    // Valor negativo
69                                                 absolute_value = -diffs[index];
70                                                 binaryLenght = bits(absolute_value);
71                                                 huffmanDst[binaryLenght+negStart-1]++;
72                                         } else {                                // Valor grande >= 128
73                                                 absolute_value = diffs[index];
74                                                 binaryLenght = bits(absolute_value);
75                                                 huffmanDst[binaryLenght+bigStart-1]++;
76                                         }
77                         }
78                         
79                 } else { // Rompemos o run porque atopamos unha mostra
80                         if(runLenght) {
81                                 huffmanDst[runLenght+runLenghtStart]++;
82                                 runLenght = 0;
83                         }
84                 }
85                 
86         }
87         
88         if(runLenght) huffmanDst[runLenght+runLenghtStart]++;
89         
90         // Creamos o arbol de Huffman
91         diffsHT = createHuff(huffmanDst,nS-1,UNSORTED);
92         
93         // Calculamos o espacio total ocupado pola secuencia Huffman + RLE
94         streamSize = diffsHT.total;
95         for(index=negStart;index<bigStart;index++) 
96                 streamSize += huffmanDst[index]*(index-negStart+1);     // Negativos
97         for(index=bigStart;index<nS;index++) 
98                 streamSize += huffmanDst[index]*(index-bigStart+1);             // Grandes      
99         
100         // Calculamos o numero de mostras e o espacio ocupado por cada mostra e por cada punteiro
101         numberOfSamples = (psiSize+T-1)/T;      
102         sampleSize = bits(psiSize);
103         pointerSize = bits(streamSize); 
104
105         // Reservamos espacio para a secuencia e para as mostras e punteiros
106         samples = (unsigned int *)malloc(sizeof(uint)*((numberOfSamples*sampleSize+31)/32));    
107                 samples[((numberOfSamples*sampleSize+31)/32)-1] =0000; //initialized only to avoid valgrind warnings
108         samplePointers = (unsigned int *)malloc(sizeof(int)*((numberOfSamples*pointerSize+31)/32));
109                 samplePointers[((numberOfSamples*pointerSize+31)/32)-1] = 0000;  //initialized only to avoid valgrind warnings
110         stream = (unsigned int *)malloc(sizeof(int)*((streamSize+31)/32));
111                 stream[((streamSize+31)/32)-1]=0000;//initialized only to avoid valgrind warnings
112         
113         // Comprimimos secuencialmente (haber� que levar un punteiro desde o inicio)
114         ptr = 0;
115         samplesPtr = 0;
116         samplePointersPtr = 0;
117         runLenght = 0;
118         for(index=0; index<psiSize; index++) {
119                 
120                 if(index%T) {
121                         
122                         if(diffs[index]==1) {
123                                 runLenght++;
124                         } else {        // Non estamos nun run
125                                 if(runLenght) {
126                                         ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
127                                         runLenght = 0;
128                                 }
129                                 if(diffs[index]>1 && diffs[index]<runLenghtStart) {                             
130                                         ptr = encodeHuff(diffsHT,diffs[index],stream,ptr);      
131                                 }       
132                                 else
133                                         if(diffs[index]<0) {    // Valor negativo
134                                                 absolute_value = -diffs[index];
135                                                 binaryLenght = bits(absolute_value);
136                                                 ptr = encodeHuff(diffsHT,binaryLenght+negStart-1,stream,ptr);
137                                                 bitwrite(stream,ptr,binaryLenght,absolute_value);
138                                                 ptr += binaryLenght;                                            
139                                         } else {                                // Valor grande >= 128
140                                                 absolute_value = diffs[index];
141                                                 binaryLenght = bits(absolute_value);                                    
142                                                 ptr = encodeHuff(diffsHT,binaryLenght+bigStart-1,stream,ptr);
143                                                 bitwrite(stream,ptr,binaryLenght,absolute_value);                                               
144                                                 ptr += binaryLenght;
145                                         }
146                         }
147                         
148                 } else { // Rompemos o run porque atopamos unha mostra
149                         if(runLenght) {                         
150                                 ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
151                                 runLenght = 0;
152                         }
153                         bitwrite(samples,samplesPtr,sampleSize,Psi[index]);
154                         samplesPtr += sampleSize;
155                         bitwrite(samplePointers,samplePointersPtr,pointerSize,ptr);
156                         samplePointersPtr += pointerSize;
157                 }
158                 
159         }
160         
161         if(runLenght) { 
162                 ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
163         }
164         
165         // Amosamos o espacio ocupado
166         totalSize = sizeof(HuffmanCompressedPsi) + 
167                 sizeof(int)*((numberOfSamples*sampleSize+31)/32) + 
168                 sizeof(int)*((numberOfSamples*pointerSize+31)/32) +
169                 sizeof(int)*((streamSize+31)/32) + sizeHuff2(diffsHT);
170
171         printf("\n\t Compressed Psi size = %d bytes, with %d different symbols.", totalSize, nS);
172         
173         // Necesario antes de decodificar
174         prepareToDecode(&diffsHT);
175         
176         // Asignamos os valores a cPsi e devolvemolo
177         cPsi.T = T;
178         cPsi.diffsHT = diffsHT;
179         cPsi.nS = nS;
180         cPsi.numberOfSamples = numberOfSamples;
181         cPsi.samples = samples;
182         cPsi.sampleSize = sampleSize;
183         cPsi.samplePointers = samplePointers;
184         cPsi.pointerSize = pointerSize;
185         cPsi.stream = stream;
186         cPsi.streamSize = streamSize;
187         cPsi.totalMem = totalSize;
188         
189         //frees resources not needed in advance
190         free(diffs);
191         free(huffmanDst);
192         
193         //returns the data structure that holds the compressed psi.
194         return cPsi;    
195 }
196
197
198 unsigned int getHuffmanPsiValue(HuffmanCompressedPsi *cPsi, unsigned int position) {
199         
200         register unsigned int index;
201         unsigned int sampleIndex, ptr, psiValue, huffmanCode, positionsSinceSample; 
202         unsigned int absolute_value, binaryLenght, runLenght;
203         
204         unsigned int runLenghtStart = cPsi->nS - 64 - cPsi->T;
205         unsigned int negStart = cPsi->nS - 64;
206         unsigned int bigStart = cPsi->nS - 32;  
207         
208         sampleIndex = position / cPsi->T;
209         psiValue = bitread(cPsi->samples,sampleIndex*cPsi->sampleSize,cPsi->sampleSize);
210         ptr = bitread(cPsi->samplePointers,sampleIndex*cPsi->pointerSize,cPsi->pointerSize);
211         
212         positionsSinceSample = position%cPsi->T;
213         
214         for(index=0;index<positionsSinceSample;index++) {
215                 
216                 ptr = decodeHuff(&cPsi->diffsHT,&huffmanCode,cPsi->stream,ptr);
217                 
218                 if(huffmanCode < runLenghtStart) {      // Incremento directo
219                         psiValue += huffmanCode;
220                 }       
221                 else 
222                         if(huffmanCode < negStart) {    // Estamos nun run
223                                 runLenght = huffmanCode - runLenghtStart;
224                                 if(index+runLenght>=positionsSinceSample)
225                                         return psiValue+positionsSinceSample-index;
226                                 else {
227                                         psiValue += runLenght;
228                                         index += runLenght-1;
229                                 }
230                         }
231                         else
232                                 if(huffmanCode < bigStart) {    // Negativo
233                                         binaryLenght = huffmanCode-negStart+1;
234                                         absolute_value = bitread(cPsi->stream,ptr,binaryLenght);
235                                         ptr += binaryLenght;
236                                         psiValue -= absolute_value;     
237                                 }
238                                 else {  // Grande
239                                         binaryLenght = huffmanCode-bigStart+1;
240                                         absolute_value = bitread(cPsi->stream,ptr,binaryLenght);
241                                         ptr += binaryLenght;
242                                         psiValue += absolute_value;                              
243                                 }
244                                 
245         }
246         
247         return psiValue;
248
249 }
250
251
252 void storeHuffmanCompressedPsi(HuffmanCompressedPsi *compressedPsi, char *filename) {
253
254         int file;
255         THuff H;
256
257         if( (file = open(filename, O_WRONLY|O_CREAT, 0700)) < 0) {
258                 printf("Cannot open file %s\n", filename);
259                 exit(0);
260         }
261         write(file, &(compressedPsi->T), sizeof(int));
262         // Almacenar o arbol de huffman
263         H = compressedPsi->diffsHT;
264         write(file, &H.max, sizeof(int));
265         write(file, &H.lim, sizeof(int));
266         write(file, &H.depth, sizeof(int));
267 //      write(file, H.s.spos, (H.lim+1)*sizeof(int));
268         write(file, H.s.symb, (H.lim+1)*sizeof(int));   
269         write(file, H.num, (H.depth+1)*sizeof(int));
270         write(file, H.fst, (H.depth+1)*sizeof(int));
271         // Fin de almacenar o arbol de huffman
272         write(file, &(compressedPsi->nS), sizeof(int));
273         write(file, &(compressedPsi->numberOfSamples), sizeof(int));
274         write(file, &(compressedPsi->sampleSize), sizeof(int));
275         write(file,     compressedPsi->samples, ((compressedPsi->numberOfSamples*compressedPsi->sampleSize+31)/32)*sizeof(int));        
276         write(file, &(compressedPsi->pointerSize), sizeof(int));
277         write(file,     compressedPsi->samplePointers, ((compressedPsi->numberOfSamples*compressedPsi->pointerSize+31)/32)*sizeof(int));
278         write(file, &(compressedPsi->streamSize), sizeof(int));
279         write(file,     compressedPsi->stream, ((compressedPsi->streamSize+31)/32)*sizeof(int));
280         write(file, &(compressedPsi->totalMem), sizeof(int));
281         
282         close(file);    
283
284 }
285
286
287 HuffmanCompressedPsi loadHuffmanCompressedPsi(char *filename) {
288         
289         HuffmanCompressedPsi compressedPsi;
290
291         THuff H;
292      
293         int file;
294
295         if( (file = open(filename, O_RDONLY)) < 0) {
296                 printf("Cannot read file %s\n", filename);
297                 exit(0);
298         }
299         read(file, &(compressedPsi.T), sizeof(int));
300         // Cargamos o arbol de Huffman
301         read(file, &H.max, sizeof(int));
302         read(file, &H.lim, sizeof(int));
303         read(file, &H.depth, sizeof(int));
304         //H.s.spos = (unsigned int *) malloc((H.lim+1)*sizeof(int));
305         //H.s.spos =H.s.symb = (unsigned int *) malloc((H.lim+1)*sizeof(int));
306         H.s.symb = (unsigned int *) malloc((H.lim+1)*sizeof(int));
307         H.num = (unsigned int *) malloc((H.depth+1)*sizeof(int));       
308         H.fst = (unsigned int *) malloc((H.depth+1)*sizeof(int));       
309
310         //read(file, H.s.spos, (H.lim+1)*sizeof(int));
311         //fprintf(stderr," \n read %d spos bytes\n", (H.lim+1)*sizeof(int));
312         read(file, H.s.symb, (H.lim+1)*sizeof(int));    
313
314         read(file, H.num, (H.depth+1)*sizeof(int));
315         read(file, H.fst, (H.depth+1)*sizeof(int));     
316         compressedPsi.diffsHT = H;
317         // Fin da carga do arbol de Huffman
318         read(file, &(compressedPsi.nS), sizeof(int));
319         read(file, &(compressedPsi.numberOfSamples), sizeof(int));      
320         read(file, &(compressedPsi.sampleSize), sizeof(int));
321         compressedPsi.samples = (unsigned int *)malloc(((compressedPsi.numberOfSamples*compressedPsi.sampleSize+31)/32)*sizeof(int));
322         read(file, compressedPsi.samples, ((compressedPsi.numberOfSamples*compressedPsi.sampleSize+31)/32)*sizeof(int));
323         read(file, &(compressedPsi.pointerSize), sizeof(int));
324         compressedPsi.samplePointers = (unsigned int *)malloc(((compressedPsi.numberOfSamples*compressedPsi.pointerSize+31)/32)*sizeof(int));
325         read(file, compressedPsi.samplePointers, ((compressedPsi.numberOfSamples*compressedPsi.pointerSize+31)/32)*sizeof(int));
326         read(file, &(compressedPsi.streamSize), sizeof(int));
327         compressedPsi.stream = (unsigned int *)malloc(((compressedPsi.streamSize+31)/32)*sizeof(int));
328         read(file, compressedPsi.stream, ((compressedPsi.streamSize+31)/32)*sizeof(int));
329         read(file, &(compressedPsi.totalMem), sizeof(int));
330         
331         close(file);
332                 
333         return compressedPsi;
334
335 }