3 // Implements the LZtrie data structure
7 // creates a lztrie structure from a parentheses bitstring,
8 // a letter array in preorder, and an id array in preorder,
9 // all of which except the latter become owned
10 // n is the total number of nodes (n letters/ids, 2n parentheses)
12 lztrie createLZTrie(uint *string, byte *letters, uint *Node, uint n, uint text_length)
16 T = malloc(sizeof(struct slztrie));
18 T->pdata = createParentheses(string,2*n,true);
22 T->Node = createNodemap(Node, n, n);
23 T->TPos = createPosition(T, text_length);
24 T->text_length = text_length;
26 T->boost = malloc(256*sizeof(trieNode));
27 for (i=0;i<256;i++) T->boost[i] = NULLT;
28 i = 1; // shortcut for first child of root
29 while (i != 2*n-1) { // shortcut for its closing parenthesis
30 T->boost[T->letters[i-rank(T->pdata->bdata,i)]] = i;
31 // shortcut for leftrankLZTrie
32 i = findclose(T->pdata,i)+1;
37 // builds an lztrie from a text. "s" is the text terminator.
38 lztrie buildLZTrie(byte *text, byte s, uint text_length)
46 // first creates a full trie T
49 text = insertTrie(T,text);
51 while (text[-1] != s);
54 parent = malloc(((2*n+W-1)/W)*sizeof(uint));
55 letters = malloc(n*sizeof(byte));
56 // Aqui pedir espacio para Node, ver como lo voy a construir en vez de sacar solo los ids
57 Node = malloc(n*sizeof(uint));
58 representTrie(T,parent,letters,Node);
61 LZT = createLZTrie(parent,letters,Node,n,text_length);
67 // frees LZTrie structure, including the owned data
69 void destroyLZTrie(lztrie T)
71 destroyParentheses(T->pdata);
73 destroyNodemap(T->Node);
74 destroyPosition(T->TPos);
79 // stores lztrie T on file f
81 void saveLZTrie (lztrie T, FILE *f)
83 if (fwrite (&T->n,sizeof(uint),1,f) != 1) {
84 fprintf (stderr,"Error: Cannot write LZTrie on file\n");
87 if (fwrite (T->data,sizeof(uint),(2*T->n+W-1)/W,f) != (2*T->n+W-1)/W) {
88 fprintf (stderr,"Error: Cannot write LZTrie on file\n");
91 if (fwrite (T->letters,sizeof(byte),T->n,f) != T->n) {
92 fprintf (stderr,"Error: Cannot write LZTrie on file\n");
96 saveNodemap(f, T->Node);
97 fwrite(&T->text_length, sizeof(uint), 1, f);
98 savePosition(f, T->TPos);
101 // loads lztrie T from file f
103 lztrie loadLZTrie (FILE *f)
107 T = malloc (sizeof(struct slztrie));
108 if (fread (&T->n,sizeof(uint),1,f) != 1) {
109 fprintf (stderr,"Error: Cannot read LZTrie from file\n");
112 T->data = malloc (((2*T->n+W-1)/W)*sizeof(uint));
113 if (fread (T->data,sizeof(uint),(2*T->n+W-1)/W,f) != (2*T->n+W-1)/W) {
114 fprintf (stderr,"Error: Cannot read LZTrie from file\n");
117 T->pdata = createParentheses (T->data,2*T->n,true);
118 T->nbits = bits(T->n-1);
119 T->letters = malloc (T->n*sizeof(byte));
120 if (fread (T->letters,sizeof(byte),T->n,f) != T->n) {
121 fprintf (stderr,"Error: Cannot read LZTrie from file\n");
125 T->Node = loadNodemap(f);
126 fread(&T->text_length, sizeof(uint), 1, f);
127 T->TPos = loadPosition(f, T->text_length);
129 // boost first access
130 T->boost = malloc(256*sizeof(trieNode));
131 for (i=0;i<256;i++) T->boost[i] = NULLT;
132 i = 1; // shortcut for first child of root
133 while (i != 2*T->n-1) { // shortcut for its closing parenthesis
134 T->boost[T->letters[i-rank(T->pdata->bdata,i)]] = i;
135 // shortcut for leftrankLZTrie
136 i = findclose(T->pdata,i)+1;
143 // letter by which node i descends
145 byte letterLZTrie (lztrie T, trieNode i)
147 return T->letters[i-rank(T->pdata->bdata,i)]; // shortcut leftrankLZTrie
150 // go down by letter c, if possible
153 trieNode childLZTrie (lztrie T, trieNode i, byte c)
158 while (!bitget1(T->data,j)) { // there is a child here
159 tc = T->letters[j-rank(T->pdata->bdata,j)];
160 // shortcut for leftrankLZTrie: is a child by letter c?
162 if (tc == c) return j;
163 j = findclose(T->pdata,j)+1;
165 return NULLT; // no child to go down by c
168 // go up, if possible
170 trieNode parentLZTrie (lztrie T, trieNode i)
172 if (i == ROOT) return NULLT; // parent of root
173 if (T->boost[T->letters[i-rank(T->pdata->bdata,i)]] == i) return ROOT;
174 return enclose (T->pdata,i);
179 uint subtreesizeLZTrie (lztrie T, trieNode i)
182 j = findclose (T->pdata,i);
188 uint depthLZTrie (lztrie T, trieNode i)
190 return excess (T->pdata,i);
193 // smallest rank in subtree
195 uint leftrankLZTrie (lztrie T, trieNode i)
197 { return i-rank(T->pdata->bdata,i);
200 // largest rank in subtree
202 uint rightrankLZTrie (lztrie T, trieNode i)
205 j = findclose (T->pdata,i);
206 return j-rank(T->pdata->bdata,j)-1;
209 // is node i ancestor of node j?
211 bool ancestorLZTrie (lztrie T, trieNode i, trieNode j)
213 return (i <= j) && (j < findclose (T->pdata,i));
216 // next node from i, in preorder, adds/decrements depth accordingly
217 // assumes it *can* go on!
219 trieNode nextLZTrie (lztrie T, trieNode i, uint *depth)
221 uint *data = T->data;
223 while (bitget1(data,i)) { i++; (*depth)--; }
229 // extracts text from position "from" up to position "to".
230 // Parameter "snippet_length" is the actual length of the snippet extracted.
231 // The extracted text is stored in array "snippet".
233 void extract(lztrie T,
237 ulong *snippet_length)
239 ulong snpp_pos, posaux, idaux;
243 nodemap Node = T->Node;
245 if (to > (T->text_length-1)) to = T->text_length-1;
246 if (from > (T->text_length-1)) from = T->text_length-1;
248 *snippet_length = to-from+1;
249 *snippet = malloc(*snippet_length);
251 idfrom = getphrasePosition(T->TPos, from);
252 idto = getphrasePosition(T->TPos, to+1);
253 // *snippet_length = to-from+1;
255 posaux = getPosition(T->TPos, idto+1)-1;
256 p = mapto(Node, idto);
258 while (p&&(posaux > to)) {
259 p = parentLZTrie(T, p);
263 snpp_pos = (*snippet_length)-1;
266 for (idaux = idto; idaux != idfrom;) {
268 snppt[snpp_pos--] = letterLZTrie(T, p);
269 p = parentLZTrie(T, p);
271 p = mapto(Node, --idaux);
273 if (idfrom != idto) posaux = getPosition(T->TPos, idfrom+1)-(from!=0);
274 while (p && (posaux >= from)) {
275 snppt[snpp_pos--] = letterLZTrie(T, p);
276 p = parentLZTrie(T, p);
281 uint sizeofLZTrie(lztrie T)
283 return sizeof(struct slztrie) +
284 sizeofParentheses(T->pdata) +
286 sizeofNodemap(T->Node) +
287 sizeofPosition(T->TPos);