1 #include "buildFacade.h"
3 /**------------------------------------------------------------------
5 *------------------------------------------------------------------ */
7 int main(int argc, char* argv[])
10 char *infile, *outbasename, *stopwordsfile; // Name of in/out files
18 printf("\n*Presentation level for CSA (simple WCSA)");
19 printf("\n*CopyRight (c) 2007 [LBD & G.N.]\n\n");
21 // Reads input parameters from command line.
23 printf("Use: %s <in file> <out basename> [build_options]\n", argv[0]);
27 // Reads params (input file, output basename, and stopwords file)
29 outbasename = argv[2];
30 stopwordsfile = argv[3];
32 finsize= fileSize(infile);
35 printf( "\nFILE EMPTY OR FILE NOT FOUND %s !!\nSkipping processement ...\n",infile);
39 // Opening the input text file.
40 if( (f_in = open(infile, O_RDONLY)) < 0) {
41 printf("Cannot read file %s\n", infile);
44 inputBuffer = (byte *) malloc(finsize *sizeof(byte));// +1);
45 read (f_in,inputBuffer,finsize);
50 //printf("\n parametros <<%s>>\n\n",stopwordsfile);
51 // build_WCSA (inputBuffer, finsize, stopwordsfile, NULL,outbasename);
52 build_index (inputBuffer, finsize, stopwordsfile, &Index); /** building the index */
55 /** saving the index to disk*/
57 save_index (Index, outbasename);
58 fprintf(stderr,"Index saved !! ");
60 /** tells the mem used by the index */
62 index_size(Index, &indexsize);
63 fprintf(stderr,"Index occupied %d bytes, 2 extra mallocs = %d",indexsize,2* sizeof(uint));
66 /** recovering the source text from the index */
71 get_length(Index, &size);
73 fprintf(stderr, "\nRecovering source file "); fflush(stderr);
74 char ext1[10]=".source";
75 recoverSourceText1((twcsa*) Index, outbasename,ext1, size);
77 fprintf(stderr, " time: %.3f secs\n", end-start );
80 char ext2[10]=".source2";
81 fprintf(stderr, "\nRecovering source file "); fflush(stderr);
82 recoverSourceText2((twcsa*) Index, outbasename,ext2,size);
84 fprintf(stderr, " time: %.3f secs\n", end-start );
85 //fprintf(stderr, "\nRecovering source file time: %.3f secs\n", end-start );
88 // DISPLAYING THE OCCURRENCES OF A TEXT PATTERN (word/phrase).
89 {unsigned char textPattern[MAX_TEXT_PATTERN_SIZE];
91 ulong numocc,numc, length, i, *snippet_len, tot_numcharext = 0, numpatt;
92 uchar *pattern, *snippet_text;
94 pattern = textPattern;
95 printf("\nSEARCH TEST for DISPLAY (pizzachili interface)\n");
97 printf("Intro string: ");
98 fgets((char*)textPattern, MAX_TEXT_PATTERN_SIZE, stdin);
99 if (!strcmp((char*)textPattern,"\n") ) break;
100 textPattern[strlen((char*)textPattern)-1] = '\0';
102 length = strlen( (char*)textPattern);
105 // error = display (Index, textPattern, length, numc, &numocc,
106 // &snippet_text, &snippet_len);
107 error = displayWords (Index, textPattern, length, numc, &numocc,
108 &snippet_text, &snippet_len,1);
110 if (error){ fprintf(stderr, "%s\n", "Hubo un error durante display");exit(0);}
112 fprintf(stderr,"\n acabou display");fflush(stderr);
114 ulong j, len = length + 2*numc;
116 fprintf(stderr,"\n length = %d",length);
117 fprintf(stderr,"\n pattern = %s",pattern);fflush(stderr);
118 fprintf(stderr,"\n numocc = %d",numocc);fflush(stderr);
119 fprintf(stderr,"\n snippet len = %d",len);fflush(stderr);
120 fprintf(stderr,"\n =========");fflush(stderr);
121 for (i = 0; i < numocc; i++){
122 fprintf(stderr,"\n[%2d][len=%3d]<<",i+1,snippet_len[i]);fflush(stderr);
123 fwrite(snippet_text+len*i,sizeof(uchar),snippet_len[i],stderr);fflush(stderr);
124 fprintf(stderr,">>");fflush(stderr);
129 for(i=0; i<numocc; i++) {
130 tot_numcharext += snippet_len[i];
138 printf("Ocurrences = %d\n", numocc);
139 if (!strcmp((char*)textPattern,"\n") ) break;
145 // SEARCHING FOR A TEXT PATTERN (word/phrase).
146 {unsigned char textPattern[MAX_TEXT_PATTERN_SIZE];
151 printf("\nSEARCH TEST for LOCATE\n");
153 printf("Intro string: ");
154 fgets((char*)textPattern, MAX_TEXT_PATTERN_SIZE, stdin);
155 len = strlen((char*)textPattern);
156 if (!strcmp((char*)textPattern,"\n") ) break;
157 textPattern[len-1] = '\0';
160 //occs = locateTextOcurrences(wcsa,textPattern,&occ);
161 // locate(Index, textPattern, len, (ulong **)&occs, (ulong *)&occ);
162 locateWord(Index, textPattern, len, (ulong **)&occs, (ulong *)&occ, 0);
164 printf("\n*** %s occurs %d times: In the source text in positions:\n\t",textPattern,occ);
165 /* for (i=0;i<occ;i++)
166 printf("[%u]",occs[i]);
168 if (occ >0) free(occs);
170 if (!strcmp((char*)textPattern,"\n") ) break;
177 /** freeing the index */
212 // bG = createBitmap (B,len);
213 // bE = createBitmapEdu (B,len);
214 // //fprintf(stderr,"\n RANK DE GONZALO DA %u, de EDU DA %u\n",rank(bG,33),rank1Edu(bE,33));
215 // //fprintf(stderr,"\n SELECT1(2) DE GONZALO DA %u, de EDU DA %u\n",bselect(bG,4),bselect(bE,4));
217 // showBitVector(bitvector,34);
226 // char a[20]="beginnings";char b[20]="HOSTIAS";
230 // i = inHashTable(stopwordshash,w, strlen(w), &addrInTH );
231 // if (!i) insertElement (stopwordshash, w, strlen(w), &addrInTH);
232 // else stopwordshash->hash[addrInTH].freq++;
233 // //fprintf(stderr,"\n i = %ld, addrInTh = %ld ",i,addrInTH);
234 // //fprintf(stderr,"\n word in hash[%ld]= %s , freq = %ld, posinvoc =%ld",addrInTH, stopwordshash->hash[addrInTH].word, stopwordshash->hash[addrInTH].freq, stopwordshash->hash[addrInTH].posInVoc);
239 /// ENCODING THE separators ...
241 freeHuff(gapsHuffman);
248 len = 1000; //number of bits
249 bitvector = (uint *) malloc ((len/32 +1)* sizeof(uint));
251 byte texto[100] = "####@?*";
254 //fprintf(stderr,"\n este es el texto a codificar: %s",texto);
255 for (i=0;i<256;i++) freqs[i]=0;
256 for (i=0;i<strlen(texto);i++) freqs[texto[i]]++;
257 gapsHuffman = createHuff (freqs,255,UNSORTED);
260 for (i=0;i<strlen(texto);i++) {
261 //fprintf(stderr,"\n ENCODING seprators !!\n");
262 //fprintf(stderr,"%d. \n",ptr=encodeHuff(gapsHuffman, texto[i],bitvector,ptr) );
265 prepareToDecode(&(gapsHuffman));
267 showBitVector(bitvector,bitvectorSize);
269 //fprintf(stderr,"\n DECODING !!\n");
271 while (ptr < bitvectorSize) {
272 ptr=decodeHuff (gapsHuffman, &pos, bitvector, ptr);
273 //fprintf(stderr,"\n DECODING pos is %ld!!\n",pos);
274 //fprintf(stderr,"%c. \n",pos);
279 /// ENCODING THE CANONICAL WORDS ...
286 len = 1000; //number of bits
287 bitvector = (uint *) malloc ((len/32 +1)* sizeof(uint));
291 //fprintf(stderr,"\n ENCODING VARIANTS !!\n");
292 //fprintf(stderr,"%d. \n",ptr=encodeHuff(posInHT[0].huffman, 0,bitvector,ptr) );
293 //fprintf(stderr,"%d. \n",ptr=encodeHuff(posInHT[0].huffman, 1,bitvector,ptr) );
294 //fprintf(stderr,"%d. \n",ptr=encodeHuff(posInHT[0].huffman, 2,bitvector,ptr) );
295 //fprintf(stderr,"%d. \n",ptr=encodeHuff(posInHT[0].huffman, 3,bitvector,ptr) );
296 //fprintf(stderr,"%d. \n",ptr=encodeHuff(posInHT[0].huffman, 4,bitvector,ptr) );
297 //fprintf(stderr,"%d. \n",ptr=encodeHuff(posInHT[0].huffman, 4,bitvector,ptr) );
300 // f = fopen("huff","w");
301 // saveHuff(posInHT[0].huffman,f);
304 // f = fopen("huff","r");
305 // posInHT[0].huffman = loadHuff (f,0);
309 prepareToDecode(&(posInHT[0].huffman));
312 f = fopen("huff","w");
313 saveHuffAfterDecode(posInHT[0].huffman,f);
316 f = fopen("huff","r");
317 //posInHT[0].huffman = loadHuffAfterDecode(f,0);
318 loadHuffAfterDecode2 (&(posInHT[0].huffman),f,0);
324 showBitVector(bitvector,bitvectorSize);
326 //fprintf(stderr,"\n DECODING !!\n");
328 while (ptr < bitvectorSize) {
329 ptr=decodeHuff (posInHT[0].huffman, &pos, bitvector, ptr);
330 //fprintf(stderr,"\n DECODING pos is %ld!!\n",pos);
331 //fprintf(stderr,"%s. \n",posInHT[0].variants[pos]);
342 len = 101; //number of bits
343 bitvector = (uint *) malloc ((len/32 +1)* sizeof(uint));
346 bitzero (bitvector,0,101-1);
347 for (i=0; i<len;i++) setBit (bitvector,len,i,0);
352 bitset(bitvector,10);
353 bitset(bitvector,12);
354 //activateBit(bitvector,1);
355 //activateBit(bitvector,10);
357 bG = createBitmap (bitvector,len);
358 bE = createBitmapEdu (bitvector,len);
359 //fprintf(stderr,"\n RANK DE GONZALO DA %u, de EDU DA %u\n",rank(bG,33),rank1Edu(bE,33));
360 //fprintf(stderr,"\n SELECT1(2) DE GONZALO DA %u, de EDU DA %u\n",bselect(bG,4),bselect(bE,4));
362 showBitVector(bitvector,34);