-/* **NO REVISADO TAMAÑO FILE.
-int buildIntIndexFromFile (char *filename, char *build_options,void **index) {
- //char filename[255];
- int file;
- struct stat f_stat;
- //sprintf(filename, "%s.%s", basename,SE_FILE_EXT);
-
- if( (file = open(filename, O_RDONLY)) < 0) {
- printf("Cannot open file %s\n", filename);
- exit(0);
- }
- if( fstat(file, &f_stat) < 0) {
- printf("Cannot read information from file %s\n", filename); exit(0);
- }
- uint sizeFile = (f_stat.st_size)/sizeof(uint);
+/* Transforms the alphabet of x by attempting to aggregate several symbols into
+ one, while preserving the suffix order of x. The alphabet may also be
+ compacted, so that x on output comprises all integers of the new alphabet
+ with no skipped numbers.
+
+ Input: x is an array of size n+1 whose first n elements are positive
+ integers in the range l...k-1. p is array of size n+1, used for temporary
+ storage. q controls aggregation and compaction by defining the maximum value
+ for any symbol during transformation: q must be at least k-l; if q<=n,
+ compaction is guaranteed; if k-l>n, compaction is never done; if q is
+ INT_MAX, the maximum number of symbols are aggregated into one.
+
+ Output: Returns an integer j in the range 1...q representing the size of the
+ new alphabet. If j<=n+1, the alphabet is compacted. The global variable r is
+ set to the number of old symbols grouped into one. Only x[n] is 0.*/
+
+static int transform(int *x, int *p, int n, int k, int l, int q)
+{
+ int b, c, d, e, i, j, m, s;
+ int *pi, *pj;
+
+ for (s=0, i=k-l; i; i>>=1)
+ ++s; /* s is number of bits in old symbol.*/
+ e=INT_MAX>>s; /* e is for overflow checking.*/
+ for (b=d=r=0; r<n && d<=e && (c=d<<s|(k-l))<=q; ++r) {
+ b=b<<s|(x[r]-l+1); /* b is start of x in chunk alphabet.*/
+ d=c; /* d is max symbol in chunk alphabet.*/
+ }
+ m=(1<<(r-1)*s)-1; /* m masks off top old symbol from chunk.*/
+ x[n]=l-1; /* emulate zero terminator.*/
+ if (d<=n) { /* if bucketing possible, compact alphabet.*/
+ for (pi=p; pi<=p+d; ++pi)
+ *pi=0; /* zero transformation table.*/
+ for (pi=x+r, c=b; pi<=x+n; ++pi) {
+ p[c]=1; /* mark used chunk symbol.*/
+ c=(c&m)<<s|(*pi-l+1); /* shift in next old symbol in chunk.*/
+ }
+ for (i=1; i<r; ++i) { /* handle last r-1 positions.*/
+ p[c]=1; /* mark used chunk symbol.*/
+ c=(c&m)<<s; /* shift in next old symbol in chunk.*/
+ }
+ for (pi=p, j=1; pi<=p+d; ++pi)
+ if (*pi)
+ *pi=j++; /* j is new alphabet size.*/
+ for (pi=x, pj=x+r, c=b; pj<=x+n; ++pi, ++pj) {
+ *pi=p[c]; /* transform to new alphabet.*/
+ c=(c&m)<<s|(*pj-l+1); /* shift in next old symbol in chunk.*/
+ }
+ while (pi<x+n) { /* handle last r-1 positions.*/
+ *pi++=p[c]; /* transform to new alphabet.*/
+ c=(c&m)<<s; /* shift right-end zero in chunk.*/
+ }
+ } else { /* bucketing not possible, don't compact.*/
+ for (pi=x, pj=x+r, c=b; pj<=x+n; ++pi, ++pj) {
+ *pi=c; /* transform to new alphabet.*/
+ c=(c&m)<<s|(*pj-l+1); /* shift in next old symbol in chunk.*/
+ }
+ while (pi<x+n) { /* handle last r-1 positions.*/
+ *pi++=c; /* transform to new alphabet.*/
+ c=(c&m)<<s; /* shift right-end zero in chunk.*/
+ }
+ j=d+1; /* new alphabet size.*/
+ }
+ x[n]=0; /* end-of-string symbol is zero.*/
+ return j; /* return new alphabet size.*/
+}