2 // This code implements the data structure to get
3 // "real" text positions at search time.
10 position createPosition(void *Taux, uint text_length)
13 lztrie T = (lztrie) Taux;
15 ulong n, M, superblock_size, current_superblock,
16 starting_pos, i, depth, pos;
19 P = malloc(sizeof(struct tpos));
21 P->SBlock_size = 32;//bits(n-1); // superblock size is log n
22 P->nbitsSB = bits(text_length-1);
23 P->nSuperBlock = (ulong)ceil((double)n/P->SBlock_size); // number of superblocks
24 P->SuperBlock = malloc(((P->nbitsSB*P->nSuperBlock+W-1)/W)*sizeof(uint));
25 P->Tlength = text_length;
26 M = 0; // maximum superblock size (in number of characters)
28 current_superblock = 0;
32 Offset = malloc(n*sizeof(uint)); // array for temporary use
34 bitput(P->SuperBlock, 0, P->nbitsSB, 0);
36 for (i = 0, pos = 0; i < n; i++) {
37 node = mapto(T->Node, i);
38 depth = depthLZTrie(T, node);
43 if ((superblock_size > P->SBlock_size) || (i==0)) {
45 bitput(P->SuperBlock,P->nbitsSB*(++current_superblock),P->nbitsSB,pos);
46 if (i!=0 && Offset[i-1]>M) M = Offset[i-1];
51 Offset[i] = pos-starting_pos;
54 if (Offset[i-1]>M) M = Offset[i-1];
56 P->nbitsOffs = bits(M-1);
57 P->Offset = malloc(((P->nOffset*P->nbitsOffs+W-1)/W)*sizeof(uint));
58 for (i = 0; i < n; i++)
59 bitput(P->Offset, i*P->nbitsOffs, P->nbitsOffs, Offset[i]);
67 // given a phrase id, gets the corresponding text position
69 ulong getPosition(position P, uint id)
71 if (id > P->nOffset) return P->Tlength;
73 ulong posSB = (id-1)>>5;///P->SBlock_size;//(ulong)ceil((double)id/P->SBlock_size)-1;
74 return bitget(P->SuperBlock,posSB*P->nbitsSB,P->nbitsSB)
75 + bitget(P->Offset,(id-1)*P->nbitsOffs,P->nbitsOffs);
80 // given a text position, gets the identifier of the
81 // LZ78 phrase containing that position
83 ulong getphrasePosition(position P, ulong text_pos)
85 ulong li, ls, nbits, elem, med, SB, temp, i;
88 ls = P->nSuperBlock-1;
90 while ((ls-li+1) > 0) {
92 elem = bitget(P->SuperBlock,med*nbits,nbits);
93 if (elem == text_pos) break;
94 if (elem < text_pos) li = med+1;
97 if (elem > text_pos && med) med--;
98 SB = bitget(P->SuperBlock,med*nbits,nbits);
101 li = med*P->SBlock_size;
103 if (med+1 == P->nSuperBlock)
106 ls = (med+1)*P->SBlock_size - 1;
108 nbits = P->nbitsOffs;
110 while ((ls-li+1) > 8) {
112 elem = bitget(P->Offset,med*nbits,nbits)+SB;
113 if (elem == text_pos) break;
114 if (elem < text_pos) li = med+1;
118 for (i = li; i <= ls; i++)
119 if ((elem=bitget(P->Offset,i*nbits,nbits) + SB) >= text_pos) break;
121 if (elem > text_pos) i--;
123 return i+(elem>text_pos);
127 ulong sizeofPosition(position P)
129 return sizeof(struct tpos)
130 + ((P->nbitsSB*P->nSuperBlock+W-1)/W)*sizeof(uint)
131 + ((P->nOffset*P->nbitsOffs+W-1)/W)*sizeof(uint);
134 void destroyPosition(position P)
143 position loadPosition(FILE *f, uint text_length)
148 P = malloc (sizeof(struct tpos));
150 if (fread(&P->nSuperBlock,sizeof(uint),1,f) != 1) {
151 fprintf(stderr,"Error: Cannot read LZTrie from file\n");
155 P->nbitsSB = bits(text_length-1);
156 aux = (((unsigned long long) P->nbitsSB*P->nSuperBlock+W-1)/W);
157 P->SuperBlock = malloc(aux*sizeof(uint));
159 if (fread(P->SuperBlock,sizeof(uint),aux,f) != aux) {
160 fprintf(stderr,"Error: Cannot read LZTrie from file\n");
166 P->Tlength = text_length;
168 if (fread(&P->nOffset,sizeof(uint),1,f) != 1) {
169 fprintf(stderr,"Error: Cannot read LZTrie from file\n");
173 if (fread(&P->nbitsOffs,sizeof(uint),1,f) != 1) {
174 fprintf(stderr,"Error: Cannot read LZTrie from file\n");
178 aux = (((unsigned long long)P->nOffset*P->nbitsOffs+W-1)/W);
179 P->Offset = malloc(aux*sizeof(uint));
180 if (fread(P->Offset,sizeof(uint),aux,f) != aux) {
181 fprintf(stderr,"Error: Cannot read LZTrie from file\n");
189 void savePosition(FILE *f, position P)
193 if (fwrite(&P->nSuperBlock,sizeof(uint),1,f) != 1) {
194 fprintf(stderr,"Error: Cannot write LZTrie on file\n");
198 aux = (((unsigned long long) P->nbitsSB*P->nSuperBlock+W-1)/W);
200 if (fwrite(P->SuperBlock,sizeof(uint),aux,f) != aux) {
201 fprintf(stderr,"Error: Cannot write LZTrie on file\n");
205 if (fwrite(&P->nOffset,sizeof(uint),1,f) != 1) {
206 fprintf(stderr,"Error: Cannot write LZTrie on file\n");
210 if (fwrite(&P->nbitsOffs,sizeof(uint),1,f) != 1) {
211 fprintf(stderr,"Error: Cannot write LZTrie on file\n");
215 aux = (((unsigned long long)P->nOffset*P->nbitsOffs+W-1)/W);
217 if (fwrite(P->Offset,sizeof(uint),aux,f) != aux) {
218 fprintf(stderr,"Error: Cannot write LZTrie on file\n");