+ //-----------------------------------------------------------------
+ //1st pass (processing the file)
+ {
+ byte *pbeg,*pend,*wordstart,*aWord;
+ register ulong size;
+ register uint i;
+
+ pbeg = inputBuffer;
+ pend = inputBuffer+bytesFileReal;
+
+ while (pbeg <pend)
+ {
+ if (*pbeg == 0)
+ {
+ fprintf(stderr, "buildFacade.c: assert failed, *pbeg == 0\n");
+ exit(1);
+ }
+
+ //parsing either a word or separator.
+ size=0;
+ wordstart = pbeg;
+ if (_Valid[*pbeg]) { //alphanumerical data
+ while ( (size<MAX_SIZE_OF_WORD) && (pbeg<pend)&& ( _Valid[*pbeg] )) {size++;pbeg++;}
+ }
+ else
+ {
+ if (*pbeg != ' ') { //a separator comes starting in ' ' comes, so it is a new word
+ while ( (size<MAX_SIZE_OF_GAP) && (pbeg<pend) && (!_Valid[*pbeg] && *pbeg != 0)) {size++;pbeg++;}
+ }
+ else { //a SPACE comes, so we have to test if next character is alphanumerical or not
+ pbeg++;
+ if (pbeg >= pend) {size++;} // a unique BLANK at the end of the file.
+ else {
+ if (_Valid [*pbeg] ) {
+ wordstart = pbeg; //So skipping 1 blank character
+ while ( (size<MAX_SIZE_OF_WORD) && (pbeg<pend)&& ( _Valid[*pbeg] )) {size++;pbeg++;}
+ }
+ else { // a "separator word" ...
+ size++; //the prev BLANK...
+ while ( (size<MAX_SIZE_OF_GAP) && (pbeg<pend) && (!_Valid[*pbeg] && *pbeg != 0)) {size++;pbeg++;}
+ }//else { // a "separator word"
+ }//else ... not a unique BLANK AT THE END.
+ }//else ... starting by a BLANK...
+ }
+
+ if (pbeg < pend && *pbeg == 0)
+ pbeg ++; // Skip the 0-bytes
+
+ if (size == 0)
+ {
+ fprintf(stderr, "buildFacade.c: assert failed, size == 0\n");
+ exit(1);
+ }
+
+ //The parsed word/separator is is "wordstart", and its length is "size"...
+ aWord=wordstart;
+
+ //Processement done for each word word
+ i = inHashTable(hash,aWord, size, &addrInTH );
+ if (!i){
+ insertElement (hash,aWord, size, &addrInTH);
+ if (zeroNode >= size_posInHT){
+ size_posInHT *= 2;
+ posInHT = (tposInHT*) realloc(posInHT, size_posInHT * sizeof(tposInHT));
+ }
+ posInHT[zeroNode].slot=addrInTH;
+ posInHT[zeroNode].word=hash->hash[addrInTH].word;
+ hash->hash[addrInTH].posInVoc = zeroNode;
+ zeroNode++;
+ totallenWords += size +1; // +1 due to the '\0' char...
+ //fprintf(stderr,"\n Adding word: <<%s>> (size=%d) to the hashTable",hash->hash[addrInTH].word,size);
+ }
+ seSize ++;
+
+ }//while pbeg<pend
+
+ fprintf(stderr,"\n1st pass ends: TOTAL distinct words: %lu totalWords = %u",zeroNode,seSize);
+
+ }//1st pass ends