X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=swcsa%2FbuildFacade.c;h=b581059f37b25f6f6f2166231863967fcfb8139c;hb=f23d86be251c79353f66701f10d69020845050c8;hp=768e9439d65cf0278f0d373c8e79733b9cd80086;hpb=102e33b134075765e6d4e0c38bc1307568ce5602;p=SXSI%2FTextCollection.git diff --git a/swcsa/buildFacade.c b/swcsa/buildFacade.c index 768e943..b581059 100755 --- a/swcsa/buildFacade.c +++ b/swcsa/buildFacade.c @@ -482,7 +482,7 @@ int extractWords (void *index, ulong fromWord, ulong toWord, uchar **text, uint avgWordLen =7; uint i, j;//, tmplen; - uint prevValid; + uint prevValid = 0; byte *src, *dst, *buff; uint tmplen =0; @@ -617,62 +617,79 @@ int build_WCSA (uchar *text, ulong length, char *build_options, void **index) { //----------------------------------------------------------------- //1st pass (processing the file) { - byte *pbeg,*pend,*wordstart,*aWord; - register ulong size; - register uint i; - - pbeg = inputBuffer; - pend = inputBuffer+bytesFileReal; - - while (pbeg = pend) {size++;} // a unique BLANK at the end of the file. - else { - if (_Valid [*pbeg] ) { - wordstart = pbeg; //So skipping 1 blank character - while ( (sizehash[addrInTH].word; - hash->hash[addrInTH].posInVoc = zeroNode; - zeroNode++; - totallenWords += size +1; // +1 due to the '\0' char... - //fprintf(stderr,"\n Adding word: <<%s>> (size=%d) to the hashTable",hash->hash[addrInTH].word,size); - } - seSize ++; - }//while pbeg= pend) {size++;} // a unique BLANK at the end of the file. + else { + if (_Valid [*pbeg] ) { + wordstart = pbeg; //So skipping 1 blank character + while ( (sizehash[addrInTH].word; + hash->hash[addrInTH].posInVoc = zeroNode; + zeroNode++; + totallenWords += size +1; // +1 due to the '\0' char... + //fprintf(stderr,"\n Adding word: <<%s>> (size=%d) to the hashTable",hash->hash[addrInTH].word,size); + } + seSize ++; + + }//while pbeg= pend) {size++;} // a unique BLANK at the end of the file. - else { - if (_Valid [*pbeg] ) { - wordstart = pbeg; //So skipping 1 blank character - while ( (sizehash[addrInTH].posInVoc+1; // !!!! - countValidWords++; - - }// while pbeg= pend) {size++;} // a unique BLANK at the end of the file. + else { + if (_Valid [*pbeg] ) { + wordstart = pbeg; //So skipping 1 blank character + while ( (sizehash[addrInTH].posInVoc+1; // !!!! + countValidWords++; + + }// while pbegn = zeroNode; @@ -792,7 +823,7 @@ int build_WCSA (uchar *text, ulong length, char *build_options, void **index) { tmpOffsets[zeroNode]=tmpOffset; //setting pointer to the "virtual" word {zeroNode+1}^{th} //kbit encoding of the offsets - uint elemSize = bits(tmpOffset); + uint elemSize = _bits(tmpOffset); wcsa->wordsData.elemSize = elemSize; wcsa->wordsData.words = (uint *) malloc (((((zeroNode +1)*elemSize)+W-1) /W) * sizeof(uint)); //with 1 extra slot !. wcsa->wordsData.words[((((zeroNode +1)*elemSize)+W-1) /W) -1 ] =0000; @@ -1369,7 +1400,7 @@ int printInfo(void *index) { printf("\n Summary of Presentation layer:"); printf("\n Number of valid words (SEsize) = %u",wcsa->seSize); printf("\n Number of different words = %ld",wcsa->n); - printf("\n WCSA structure = %d bytes", sizeof(twcsa)); + printf("\n WCSA structure = %lu bytes", sizeof(twcsa)); uint totalpointers = ((((wcsa->n+1)* (wcsa->wordsData.elemSize))+W-1) /W) * (sizeof(uint)); uint totalasciizone = wcsa->wordsData.wordsZoneMem.size * sizeof(byte) ;