3 // Implements the LZtrie data structure
7 // creates a lztrie structure from a parentheses bitstring,
8 // a letter array in preorder, and an id array in preorder,
9 // all of which except the latter become owned
10 // n is the total number of nodes (n letters/ids, 2n parentheses)
12 lztrie createLZTrie(uint *string, byte *letters, uint *Node, uint n, uint text_length)
16 T = malloc(sizeof(struct slztrie));
18 T->pdata = createParentheses(string,2*n,true);
22 T->Node = createNodemap(Node, n, n);
23 T->TPos = createPosition(T, text_length);
24 T->text_length = text_length;
26 T->boost = malloc(256*sizeof(trieNode));
27 for (i=0;i<256;i++) T->boost[i] = NULLT;
28 i = 1; // shortcut for first child of root
29 while (i != 2*n-1) { // shortcut for its closing parenthesis
30 T->boost[T->letters[i-rank(T->pdata->bdata,i)]] = i;
31 // shortcut for leftrankLZTrie
32 i = findclose(T->pdata,i)+1;
37 // builds an lztrie from a text. "s" is the text terminator.
38 lztrie buildLZTrie(byte *text, byte s, uint text_length)
46 // first creates a full trie T
49 text = insertTrie(T,text);
51 while (text[-1] != s);
54 parent = malloc(((2*n+W-1)/W)*sizeof(uint));
55 letters = malloc(n*sizeof(byte));
56 // Aqui pedir espacio para Node, ver como lo voy a construir en vez de sacar solo los ids
57 Node = malloc(n*sizeof(uint));
58 representTrie(T,parent,letters,Node);
61 LZT = createLZTrie(parent,letters,Node,n,text_length);
69 // frees LZTrie structure, including the owned data
71 void destroyLZTrie(lztrie T)
73 destroyParentheses(T->pdata);
75 destroyNodemap(T->Node);
76 destroyPosition(T->TPos);
82 // stores lztrie T on file f
84 void saveLZTrie (lztrie T, FILE *f)
86 if (fwrite (&T->n,sizeof(uint),1,f) != 1) {
87 fprintf (stderr,"Error: Cannot write LZTrie on file\n");
90 if (fwrite (T->data,sizeof(uint),(2*T->n+W-1)/W,f) != (2*T->n+W-1)/W) {
91 fprintf (stderr,"Error: Cannot write LZTrie on file\n");
94 if (fwrite (T->letters,sizeof(byte),T->n,f) != T->n) {
95 fprintf (stderr,"Error: Cannot write LZTrie on file\n");
99 saveNodemap(f, T->Node);
100 fwrite(&T->text_length, sizeof(uint), 1, f);
101 savePosition(f, T->TPos);
104 // loads lztrie T from file f
106 lztrie loadLZTrie (FILE *f)
110 T = malloc (sizeof(struct slztrie));
111 if (fread (&T->n,sizeof(uint),1,f) != 1) {
112 fprintf (stderr,"Error: Cannot read LZTrie from file\n");
115 T->data = malloc (((2*T->n+W-1)/W)*sizeof(uint));
116 if (fread (T->data,sizeof(uint),(2*T->n+W-1)/W,f) != (2*T->n+W-1)/W) {
117 fprintf (stderr,"Error: Cannot read LZTrie from file\n");
120 T->pdata = createParentheses (T->data,2*T->n,true);
121 T->nbits = bits(T->n-1);
122 T->letters = malloc (T->n*sizeof(byte));
123 if (fread (T->letters,sizeof(byte),T->n,f) != T->n) {
124 fprintf (stderr,"Error: Cannot read LZTrie from file\n");
128 T->Node = loadNodemap(f);
129 fread(&T->text_length, sizeof(uint), 1, f);
130 T->TPos = loadPosition(f, T->text_length);
132 // boost first access
133 T->boost = malloc(256*sizeof(trieNode));
134 for (i=0;i<256;i++) T->boost[i] = NULLT;
135 i = 1; // shortcut for first child of root
136 while (i != 2*T->n-1) { // shortcut for its closing parenthesis
137 T->boost[T->letters[i-rank(T->pdata->bdata,i)]] = i;
138 // shortcut for leftrankLZTrie
139 i = findclose(T->pdata,i)+1;
146 // letter by which node i descends
148 byte letterLZTrie (lztrie T, trieNode i)
150 return T->letters[i-rank(T->pdata->bdata,i)]; // shortcut leftrankLZTrie
153 // go down by letter c, if possible
156 trieNode childLZTrie (lztrie T, trieNode i, byte c)
161 while (!bitget1(T->data,j)) { // there is a child here
162 tc = T->letters[j-rank(T->pdata->bdata,j)];
163 // shortcut for leftrankLZTrie: is a child by letter c?
165 if (tc == c) return j;
166 j = findclose(T->pdata,j)+1;
168 return NULLT; // no child to go down by c
171 // go up, if possible
173 trieNode parentLZTrie (lztrie T, trieNode i)
175 if (i == ROOT) return NULLT; // parent of root
176 if (T->boost[T->letters[i-rank(T->pdata->bdata,i)]] == i) return ROOT;
177 return enclose (T->pdata,i);
182 uint subtreesizeLZTrie (lztrie T, trieNode i)
185 j = findclose (T->pdata,i);
191 uint depthLZTrie (lztrie T, trieNode i)
193 return excess (T->pdata,i);
196 // smallest rank in subtree
198 uint leftrankLZTrie (lztrie T, trieNode i)
200 { return i-rank(T->pdata->bdata,i);
203 // largest rank in subtree
205 uint rightrankLZTrie (lztrie T, trieNode i)
208 j = findclose (T->pdata,i);
209 return j-rank(T->pdata->bdata,j)-1;
212 // is node i ancestor of node j?
214 bool ancestorLZTrie (lztrie T, trieNode i, trieNode j)
216 return (i <= j) && (j < findclose (T->pdata,i));
219 // next node from i, in preorder, adds/decrements depth accordingly
220 // assumes it *can* go on!
222 trieNode nextLZTrie (lztrie T, trieNode i, uint *depth)
224 uint *data = T->data;
226 while (bitget1(data,i)) { i++; (*depth)--; }
232 // extracts text from position "from" up to position "to".
233 // Parameter "snippet_length" is the actual length of the snippet extracted.
234 // The extracted text is stored in array "snippet".
236 void extract(lztrie T,
240 ulong *snippet_length)
242 ulong snpp_pos, posaux, idaux;
246 nodemap Node = T->Node;
248 if (to > (T->text_length-1)) to = T->text_length-1;
249 if (from > (T->text_length-1)) from = T->text_length-1;
251 *snippet_length = to-from+1;
252 *snippet = malloc(*snippet_length);
254 idfrom = getphrasePosition(T->TPos, from);
255 idto = getphrasePosition(T->TPos, to+1);
256 // *snippet_length = to-from+1;
258 posaux = getPosition(T->TPos, idto+1)-1;
259 p = mapto(Node, idto);
261 while (p&&(posaux > to)) {
262 p = parentLZTrie(T, p);
266 snpp_pos = (*snippet_length)-1;
269 for (idaux = idto; idaux != idfrom;) {
271 snppt[snpp_pos--] = letterLZTrie(T, p);
272 p = parentLZTrie(T, p);
274 p = mapto(Node, --idaux);
276 if (idfrom != idto) posaux = getPosition(T->TPos, idfrom+1)-(from!=0);
277 while (p && (posaux >= from)) {
278 snppt[snpp_pos--] = letterLZTrie(T, p);
279 p = parentLZTrie(T, p);
284 uint sizeofLZTrie(lztrie T)
286 return sizeof(struct slztrie) +
287 sizeofParentheses(T->pdata) +
289 sizeofNodemap(T->Node) +
290 sizeofPosition(T->TPos);