8757c73d89fa737552591c5e68f1484d51cde8f4
[SXSI/TextCollection.git] / swcsa / intIndex / psiHuffmanRLE.h
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <malloc.h>
4 #include "../utils/huff.h"
5
6 /*
7 Compresion de PSI utilizando codificación incremental e RLE para os runs.
8
9 Utilizamos códigos Huffman, entre os que distinguimos 4 grupos.
10
11 G1: Incrementos frecuentes: Entre 2 e Total - 32 negativos - 32 grandes - (máxima lonxitude dun Run == periodo de muestreo - 1))
12 G2: Código que representa que hai un run e a súa lonxitude (periodo de muestreo códigos)
13 G3: Numeros negativos (32 caracteres de escape, que representan a lonxitude da representación binaria do valor absoluto do negativo)
14 G4: Numeros grandes, maiores que os de G1 (32 caracteres de escape representando novamente a lonxitude da representación binaria do seu valor)
15
16 Os de G1 obtéñense directamente, tras decodificar Huffman.
17 Os de G2 transfórmanse nunha run da lonxitude obtida tras decodificar con Huffman.
18 Os de G3 van seguidos da representación binaria do valor absoluto do seu número.
19 Os de G4 van seguidos da súa representación binaria.
20 */
21
22 // ESTRUCTURA DE PSI COMPRIMIDA
23 typedef struct {
24         unsigned int T;                                 // Periodo de muestreo de PSI
25         THuff diffsHT;                                  // Arbol de Huffman (codifica stream)
26         unsigned int nS;                                // Numero de simbolos para Huffman
27         unsigned int numberOfSamples;
28         unsigned int sampleSize;                // Bits que ocupa cada mostra
29         unsigned int *samples;                  // Vector de mostras
30         unsigned int pointerSize;               // Bits que ocupa cada punteiro 
31         unsigned int *samplePointers;   // Punteiros das mostras a stream
32         unsigned int streamSize;                // Bits que ocupa stream        
33         unsigned int *stream;                   // Secuencia Huffman + RLE
34         unsigned int totalMem;                  // the size in bytes used;
35 } HuffmanCompressedPsi;
36
37
38 // PROTOTIPOS DE FUNCIÓNS
39
40 //      Crea as estructuras de Psi comprimida:
41 //      
42 //      Psi: Funcion Psi original
43 //      psiSize: Numero de elementos de Psi
44 //      T: Periodo de muestreo en Psi
45 //      nS: Numero de simbolos que se utilizaran no arbol de Huffman
46 //
47 //      Devolve unha estructura CompressedPSI que representa a Psi comprimida
48 HuffmanCompressedPsi huffmanCompressPsi(unsigned int *Psi, unsigned int psiSize, unsigned int T, unsigned int nS);
49
50 //      Obtén un valor de Psi
51 //
52 //      cPsi: A estructura que representa a Psi comprimida
53 //      position: A posicion da que queremos obter o valor de Psi
54 unsigned int getHuffmanPsiValue(HuffmanCompressedPsi *cPsi, unsigned int position);
55
56 void storeHuffmanCompressedPsi(HuffmanCompressedPsi *compressedPsi, char *filename);
57 HuffmanCompressedPsi loadHuffmanCompressedPsi(char *filename);
58
59 //frees the memory used.        
60 void destroyHuffmanCompressedPsi(HuffmanCompressedPsi *compressedPsi);
61         
62         
63 //      
64 //// IMPLEMENTACIÓN DAS FUNCIÓNS
65 //
66 //void destroyHuffmanCompressedPsi(HuffmanCompressedPsi *compressedPsi) {
67 //      freeHuff(compressedPsi->diffsHT);
68 //      free(compressedPsi->samples);
69 //      free(compressedPsi->samplePointers);
70 //      free (compressedPsi->stream);
71 //}
72 //
73 //HuffmanCompressedPsi huffmanCompressPsi(unsigned int *Psi, unsigned int psiSize, unsigned int T, unsigned int nS) {
74 //      
75 //      HuffmanCompressedPsi cPsi;
76 //      
77 //      int absolute_value;
78 //      register unsigned int index, ptr, samplesPtr, samplePointersPtr;
79 //      unsigned int runLenght, binaryLenght;
80 //      
81 //      int *diffs;     
82 //      unsigned int *huffmanDst;
83 //      
84 //      // Estructuras da funcion comprimida (para logo asignar)
85 //      // Tamén se podian almacenar directamente
86 //      THuff diffsHT;
87 //      unsigned int numberOfSamples;
88 //      unsigned int *samples;
89 //      unsigned int sampleSize;
90 //      unsigned int *samplePointers;
91 //      unsigned int pointerSize;
92 //      unsigned int *stream;
93 //      unsigned int streamSize;
94 //      
95 //      // Variables que marcan os intervalos dentro do vector de frecuencias
96 //      unsigned int runLenghtStart = nS - 64 - T;      // Inicio das Runs
97 //      unsigned int negStart = nS - 64;                        // Inicio dos Negativos
98 //      unsigned int bigStart = nS - 32;                        // Inicio dos Grandes (>runLenghtStart)
99 //      
100 //      // Para estadistica
101 //      unsigned int totalSize;
102 //      
103 //      // Reservamos espacio para a distribución de valores de Psi
104 //      huffmanDst = (unsigned int *)malloc(sizeof(int)*nS);
105 //      for(index=0;index<nS;index++) huffmanDst[index]=0;
106 //      
107 //      // Inicializamos diferencias    
108 //      diffs = (int *)malloc(sizeof(int)*psiSize);     
109 //      diffs[0] = 0;
110 //      for(index=1; index<psiSize; index++) 
111 //              diffs[index] = Psi[index] - Psi[index-1];
112 //      
113 //      // Calculamos a distribucion de frecuencias
114 //      runLenght = 0;
115 //      for(index=0; index<psiSize; index++) {
116 //              
117 //              if(index%T) {
118 //                      
119 //                      if(diffs[index]==1) {
120 //                              runLenght++;
121 //                      } else {        // Non estamos nun run
122 //                              if(runLenght) {
123 //                                      huffmanDst[runLenght+runLenghtStart]++;
124 //                                      runLenght = 0;
125 //                              }
126 //                              if(diffs[index]>1 && diffs[index]<runLenghtStart) 
127 //                                      huffmanDst[diffs[index]]++;
128 //                              else
129 //                                      if(diffs[index]<0) {    // Valor negativo
130 //                                              absolute_value = -diffs[index];
131 //                                              binaryLenght = bits(absolute_value);
132 //                                              huffmanDst[binaryLenght+negStart-1]++;
133 //                                      } else {                                // Valor grande >= 128
134 //                                              absolute_value = diffs[index];
135 //                                              binaryLenght = bits(absolute_value);
136 //                                              huffmanDst[binaryLenght+bigStart-1]++;
137 //                                      }
138 //                      }
139 //                      
140 //              } else { // Rompemos o run porque atopamos unha mostra
141 //                      if(runLenght) {
142 //                              huffmanDst[runLenght+runLenghtStart]++;
143 //                              runLenght = 0;
144 //                      }
145 //              }
146 //              
147 //      }
148 //      
149 //      if(runLenght) huffmanDst[runLenght+runLenghtStart]++;
150 //      
151 //      // Creamos o arbol de Huffman
152 //      diffsHT = createHuff(huffmanDst,nS-1,UNSORTED);
153 //      
154 //      // Calculamos o espacio total ocupado pola secuencia Huffman + RLE
155 //      streamSize = diffsHT.total;
156 //      for(index=negStart;index<bigStart;index++) 
157 //              streamSize += huffmanDst[index]*(index-negStart+1);     // Negativos
158 //      for(index=bigStart;index<nS;index++) 
159 //              streamSize += huffmanDst[index]*(index-bigStart+1);             // Grandes      
160 //      
161 //      // Calculamos o numero de mostras e o espacio ocupado por cada mostra e por cada punteiro
162 //      numberOfSamples = (psiSize+T-1)/T;      
163 //      sampleSize = bits(psiSize);
164 //      pointerSize = bits(streamSize); 
165 //
166 //      // Reservamos espacio para a secuencia e para as mostras e punteiros
167 //      samples = (unsigned int *)malloc(sizeof(int)*((numberOfSamples*sampleSize+31)/32));     
168 //      samplePointers = (unsigned int *)malloc(sizeof(int)*((numberOfSamples*pointerSize+31)/32));
169 //      stream = (unsigned int *)malloc(sizeof(int)*((streamSize+31)/32));
170 //      
171 //      // Comprimimos secuencialmente (haberá que levar un punteiro desde o inicio)
172 //      ptr = 0;
173 //      samplesPtr = 0;
174 //      samplePointersPtr = 0;
175 //      runLenght = 0;
176 //      for(index=0; index<psiSize; index++) {
177 //              
178 //              if(index%T) {
179 //                      
180 //                      if(diffs[index]==1) {
181 //                              runLenght++;
182 //                      } else {        // Non estamos nun run
183 //                              if(runLenght) {
184 //                                      ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
185 //                                      runLenght = 0;
186 //                              }
187 //                              if(diffs[index]>1 && diffs[index]<runLenghtStart) {                             
188 //                                      ptr = encodeHuff(diffsHT,diffs[index],stream,ptr);      
189 //                              }       
190 //                              else
191 //                                      if(diffs[index]<0) {    // Valor negativo
192 //                                              absolute_value = -diffs[index];
193 //                                              binaryLenght = bits(absolute_value);
194 //                                              ptr = encodeHuff(diffsHT,binaryLenght+negStart-1,stream,ptr);
195 //                                              bitwrite(stream,ptr,binaryLenght,absolute_value);
196 //                                              ptr += binaryLenght;                                            
197 //                                      } else {                                // Valor grande >= 128
198 //                                              absolute_value = diffs[index];
199 //                                              binaryLenght = bits(absolute_value);                                    
200 //                                              ptr = encodeHuff(diffsHT,binaryLenght+bigStart-1,stream,ptr);
201 //                                              bitwrite(stream,ptr,binaryLenght,absolute_value);                                               
202 //                                              ptr += binaryLenght;
203 //                                      }
204 //                      }
205 //                      
206 //              } else { // Rompemos o run porque atopamos unha mostra
207 //                      if(runLenght) {                         
208 //                              ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
209 //                              runLenght = 0;
210 //                      }
211 //                      bitwrite(samples,samplesPtr,sampleSize,Psi[index]);
212 //                      samplesPtr += sampleSize;
213 //                      bitwrite(samplePointers,samplePointersPtr,pointerSize,ptr);
214 //                      samplePointersPtr += pointerSize;
215 //              }
216 //              
217 //      }
218 //      
219 //      if(runLenght) { 
220 //              ptr = encodeHuff(diffsHT,runLenght+runLenghtStart,stream,ptr);
221 //      }
222 //      
223 //      // Amosamos o espacio ocupado
224 //      totalSize = sizeof(int)*((numberOfSamples*sampleSize+31)/32) + 
225 //              sizeof(int)*((numberOfSamples*pointerSize+31)/32) +
226 //              sizeof(int)*((streamSize+31)/32) + sizeHuff(diffsHT) +
227 //              6*sizeof(int);
228 //      printf("Compressed Psi size = %d bytes\n", totalSize);
229 //      
230 //      // Necesario antes de decodificar
231 //      prepareToDecode(&diffsHT);
232 //      
233 //      // Asignamos os valores a cPsi e devolvemolo
234 //      cPsi.T = T;
235 //      cPsi.diffsHT = diffsHT;
236 //      cPsi.nS = nS;
237 //      cPsi.numberOfSamples = numberOfSamples;
238 //      cPsi.samples = samples;
239 //      cPsi.sampleSize = sampleSize;
240 //      cPsi.samplePointers = samplePointers;
241 //      cPsi.pointerSize = pointerSize;
242 //      cPsi.stream = stream;
243 //      cPsi.streamSize = streamSize;
244 //      
245 //      //frees resources not needed in advance
246 //      free(diffs);
247 //      free(huffmanDst);
248 //      
249 //      //returns the data structure that holds the compressed psi.
250 //      return cPsi;    
251 //}
252 //
253 //
254 //unsigned int getHuffmanPsiValue(HuffmanCompressedPsi *cPsi, unsigned int position) {
255 //      
256 //      register unsigned int index;
257 //      unsigned int sampleIndex, ptr, psiValue, huffmanCode, positionsSinceSample; 
258 //      unsigned int absolute_value, binaryLenght, runLenght;
259 //      
260 //      unsigned int runLenghtStart = cPsi->nS - 64 - cPsi->T;
261 //      unsigned int negStart = cPsi->nS - 64;
262 //      unsigned int bigStart = cPsi->nS - 32;  
263 //      
264 //      sampleIndex = position / cPsi->T;
265 //      psiValue = bitread(cPsi->samples,sampleIndex*cPsi->sampleSize,cPsi->sampleSize);
266 //      ptr = bitread(cPsi->samplePointers,sampleIndex*cPsi->pointerSize,cPsi->pointerSize);
267 //      
268 //      positionsSinceSample = position%cPsi->T;
269 //      
270 //      for(index=0;index<positionsSinceSample;index++) {
271 //              
272 //              ptr = decodeHuff(&cPsi->diffsHT,&huffmanCode,cPsi->stream,ptr);
273 //              
274 //              if(huffmanCode < runLenghtStart) {      // Incremento directo
275 //                      psiValue += huffmanCode;
276 //              }       
277 //              else 
278 //                      if(huffmanCode < negStart) {    // Estamos nun run
279 //                              runLenght = huffmanCode - runLenghtStart;
280 //                              if(index+runLenght>=positionsSinceSample)
281 //                                      return psiValue+positionsSinceSample-index;
282 //                              else {
283 //                                      psiValue += runLenght;
284 //                                      index += runLenght-1;
285 //                              }
286 //                      }
287 //                      else
288 //                              if(huffmanCode < bigStart) {    // Negativo
289 //                                      binaryLenght = huffmanCode-negStart+1;
290 //                                      absolute_value = bitread(cPsi->stream,ptr,binaryLenght);
291 //                                      ptr += binaryLenght;
292 //                                      psiValue -= absolute_value;     
293 //                              }
294 //                              else {  // Grande
295 //                                      binaryLenght = huffmanCode-bigStart+1;
296 //                                      absolute_value = bitread(cPsi->stream,ptr,binaryLenght);
297 //                                      ptr += binaryLenght;
298 //                                      psiValue += absolute_value;                              
299 //                              }
300 //                              
301 //      }
302 //      
303 //      return psiValue;
304 //
305 //}
306 //
307 //
308 //void storeHuffmanCompressedPsi(HuffmanCompressedPsi *compressedPsi, char *filename) {
309 //
310 //      int file;
311 //      THuff H;
312 //
313 //      if( (file = open(filename, O_WRONLY|O_CREAT, 0700)) < 0) {
314 //              printf("Cannot open file %s\n", filename);
315 //              exit(0);
316 //      }
317 //      write(file, &(compressedPsi->T), sizeof(int));
318 //      // Almacenar o arbol de huffman
319 //      H = compressedPsi->diffsHT;
320 //      write(file, &H.max, sizeof(int));
321 //      write(file, &H.lim, sizeof(int));
322 //      write(file, &H.depth, sizeof(int));
323 ////    write(file, H.s.spos, (H.lim+1)*sizeof(int));
324 //      write(file, H.s.symb, (H.lim+1)*sizeof(int));   
325 //      write(file, H.num, (H.depth+1)*sizeof(int));
326 //      write(file, H.fst, (H.depth+1)*sizeof(int));
327 //      // Fin de almacenar o arbol de huffman
328 //      write(file, &(compressedPsi->nS), sizeof(int));
329 //      write(file, &(compressedPsi->numberOfSamples), sizeof(int));
330 //      write(file, &(compressedPsi->sampleSize), sizeof(int));
331 //      write(file,     compressedPsi->samples, ((compressedPsi->numberOfSamples*compressedPsi->sampleSize+31)/32)*sizeof(int));        
332 //      write(file, &(compressedPsi->pointerSize), sizeof(int));
333 //      write(file,     compressedPsi->samplePointers, ((compressedPsi->numberOfSamples*compressedPsi->pointerSize+31)/32)*sizeof(int));
334 //      write(file, &(compressedPsi->streamSize), sizeof(int));
335 //      write(file,     compressedPsi->stream, ((compressedPsi->streamSize+31)/32)*sizeof(int));
336 //      
337 //      close(file);    
338 //
339 //}
340 //
341 //
342 //HuffmanCompressedPsi loadHuffmanCompressedPsi(char *filename) {
343 //      
344 //      HuffmanCompressedPsi compressedPsi;
345 //
346 //      THuff H;
347 //     
348 //      int file;
349 //
350 //      if( (file = open(filename, O_RDONLY)) < 0) {
351 //              printf("Cannot read file %s\n", filename);
352 //              exit(0);
353 //      }
354 //      read(file, &(compressedPsi.T), sizeof(int));
355 //      // Cargamos o arbol de Huffman
356 //      read(file, &H.max, sizeof(int));
357 //      read(file, &H.lim, sizeof(int));
358 //      read(file, &H.depth, sizeof(int));
359 //      //H.s.spos = (unsigned int *) malloc((H.lim+1)*sizeof(int));
360 //      //H.s.spos =H.s.symb = (unsigned int *) malloc((H.lim+1)*sizeof(int));
361 //      H.s.symb = (unsigned int *) malloc((H.lim+1)*sizeof(int));
362 //      H.num = (unsigned int *) malloc((H.depth+1)*sizeof(int));       
363 //      H.fst = (unsigned int *) malloc((H.depth+1)*sizeof(int));       
364 //
365 //      //read(file, H.s.spos, (H.lim+1)*sizeof(int));
366 //      fprintf(stderr," \n read %d spos bytes\n", (H.lim+1)*sizeof(int));
367 //      read(file, H.s.symb, (H.lim+1)*sizeof(int));    
368 //
369 //      read(file, H.num, (H.depth+1)*sizeof(int));
370 //      read(file, H.fst, (H.depth+1)*sizeof(int));     
371 //      compressedPsi.diffsHT = H;
372 //      // Fin da carga do arbol de Huffman
373 //      read(file, &(compressedPsi.nS), sizeof(int));
374 //      read(file, &(compressedPsi.numberOfSamples), sizeof(int));      
375 //      read(file, &(compressedPsi.sampleSize), sizeof(int));
376 //      compressedPsi.samples = (unsigned int *)malloc(((compressedPsi.numberOfSamples*compressedPsi.sampleSize+31)/32)*sizeof(int));
377 //      read(file, compressedPsi.samples, ((compressedPsi.numberOfSamples*compressedPsi.sampleSize+31)/32)*sizeof(int));
378 //      read(file, &(compressedPsi.pointerSize), sizeof(int));
379 //      compressedPsi.samplePointers = (unsigned int *)malloc(((compressedPsi.numberOfSamples*compressedPsi.pointerSize+31)/32)*sizeof(int));
380 //      read(file, compressedPsi.samplePointers, ((compressedPsi.numberOfSamples*compressedPsi.pointerSize+31)/32)*sizeof(int));
381 //      read(file, &(compressedPsi.streamSize), sizeof(int));
382 //      compressedPsi.stream = (unsigned int *)malloc(((compressedPsi.streamSize+31)/32)*sizeof(int));
383 //      read(file, compressedPsi.stream, ((compressedPsi.streamSize+31)/32)*sizeof(int));
384 //      
385 //      close(file);
386 //              
387 //      return compressedPsi;
388 //
389 //}