--- /dev/null
+
+
+// Implements the LZtrie data structure
+
+#include "lztrie.h"
+
+ // creates a lztrie structure from a parentheses bitstring,
+ // a letter array in preorder, and an id array in preorder,
+ // all of which except the latter become owned
+ // n is the total number of nodes (n letters/ids, 2n parentheses)
+
+lztrie createLZTrie(uint *string, byte *letters, uint *Node, uint n, uint text_length)
+ {
+ lztrie T;
+ int i;
+ T = malloc(sizeof(struct slztrie));
+ T->data = string;
+ T->pdata = createParentheses(string,2*n,true);
+ T->n = n;
+ T->nbits = bits(n-1);
+ T->letters = letters;
+ T->Node = createNodemap(Node, n, n);
+ T->TPos = createPosition(T, text_length);
+ T->text_length = text_length;
+ // boost first access
+ T->boost = malloc(256*sizeof(trieNode));
+ for (i=0;i<256;i++) T->boost[i] = NULLT;
+ i = 1; // shortcut for first child of root
+ while (i != 2*n-1) { // shortcut for its closing parenthesis
+ T->boost[T->letters[i-rank(T->pdata->bdata,i)]] = i;
+ // shortcut for leftrankLZTrie
+ i = findclose(T->pdata,i)+1;
+ }
+ return T;
+ }
+
+// builds an lztrie from a text. "s" is the text terminator.
+lztrie buildLZTrie(byte *text, byte s, uint text_length)
+ {
+ trie T;
+ uint n;
+ uint *parent;
+ byte *letters;
+ lztrie LZT;
+ uint *Node;
+ // first creates a full trie T
+ T = createTrie();
+ do {
+ text = insertTrie(T,text);
+ }
+ while (text[-1] != s);
+ // now compresses it
+ n = T->nid;
+ parent = malloc(((2*n+W-1)/W)*sizeof(uint));
+ letters = malloc(n*sizeof(byte));
+ // Aqui pedir espacio para Node, ver como lo voy a construir en vez de sacar solo los ids
+ Node = malloc(n*sizeof(uint));
+ representTrie(T,parent,letters,Node);
+
+ destroyTrie(T);
+ LZT = createLZTrie(parent,letters,Node,n,text_length);
+ return LZT;
+ }
+
+
+
+ // frees LZTrie structure, including the owned data
+
+void destroyLZTrie(lztrie T)
+ {
+ destroyParentheses(T->pdata);
+ free(T->letters);
+ destroyNodemap(T->Node);
+ destroyPosition(T->TPos);
+ free(T->boost);
+ free(T);
+ }
+
+ // stores lztrie T on file f
+
+void saveLZTrie (lztrie T, FILE *f)
+ {
+ if (fwrite (&T->n,sizeof(uint),1,f) != 1) {
+ fprintf (stderr,"Error: Cannot write LZTrie on file\n");
+ exit(1);
+ }
+ if (fwrite (T->data,sizeof(uint),(2*T->n+W-1)/W,f) != (2*T->n+W-1)/W) {
+ fprintf (stderr,"Error: Cannot write LZTrie on file\n");
+ exit(1);
+ }
+ if (fwrite (T->letters,sizeof(byte),T->n,f) != T->n) {
+ fprintf (stderr,"Error: Cannot write LZTrie on file\n");
+ exit(1);
+ }
+
+ saveNodemap(f, T->Node);
+ fwrite(&T->text_length, sizeof(uint), 1, f);
+ savePosition(f, T->TPos);
+ }
+
+ // loads lztrie T from file f
+
+lztrie loadLZTrie (FILE *f)
+ {
+ lztrie T;
+ int i;
+ T = malloc (sizeof(struct slztrie));
+ if (fread (&T->n,sizeof(uint),1,f) != 1) {
+ fprintf (stderr,"Error: Cannot read LZTrie from file\n");
+ exit(1);
+ }
+ T->data = malloc (((2*T->n+W-1)/W)*sizeof(uint));
+ if (fread (T->data,sizeof(uint),(2*T->n+W-1)/W,f) != (2*T->n+W-1)/W) {
+ fprintf (stderr,"Error: Cannot read LZTrie from file\n");
+ exit(1);
+ }
+ T->pdata = createParentheses (T->data,2*T->n,true);
+ T->nbits = bits(T->n-1);
+ T->letters = malloc (T->n*sizeof(byte));
+ if (fread (T->letters,sizeof(byte),T->n,f) != T->n) {
+ fprintf (stderr,"Error: Cannot read LZTrie from file\n");
+ exit(1);
+ }
+
+ T->Node = loadNodemap(f);
+ fread(&T->text_length, sizeof(uint), 1, f);
+ T->TPos = loadPosition(f, T->text_length);
+
+ // boost first access
+ T->boost = malloc(256*sizeof(trieNode));
+ for (i=0;i<256;i++) T->boost[i] = NULLT;
+ i = 1; // shortcut for first child of root
+ while (i != 2*T->n-1) { // shortcut for its closing parenthesis
+ T->boost[T->letters[i-rank(T->pdata->bdata,i)]] = i;
+ // shortcut for leftrankLZTrie
+ i = findclose(T->pdata,i)+1;
+ }
+
+ return T;
+ }
+
+
+ // letter by which node i descends
+
+byte letterLZTrie (lztrie T, trieNode i)
+ {
+ return T->letters[i-rank(T->pdata->bdata,i)]; // shortcut leftrankLZTrie
+ }
+
+ // go down by letter c, if possible
+
+
+trieNode childLZTrie (lztrie T, trieNode i, byte c)
+ {
+ trieNode j;
+ byte tc;
+ j = i+1;
+ while (!bitget1(T->data,j)) { // there is a child here
+ tc = T->letters[j-rank(T->pdata->bdata,j)];
+ // shortcut for leftrankLZTrie: is a child by letter c?
+ if (tc > c) break;
+ if (tc == c) return j;
+ j = findclose(T->pdata,j)+1;
+ }
+ return NULLT; // no child to go down by c
+ }
+
+ // go up, if possible
+
+trieNode parentLZTrie (lztrie T, trieNode i)
+ {
+ if (i == ROOT) return NULLT; // parent of root
+ if (T->boost[T->letters[i-rank(T->pdata->bdata,i)]] == i) return ROOT;
+ return enclose (T->pdata,i);
+ }
+
+ // subtree size
+
+uint subtreesizeLZTrie (lztrie T, trieNode i)
+ {
+ trieNode j;
+ j = findclose (T->pdata,i);
+ return (j-i+1)/2;
+ }
+
+ // depth
+
+uint depthLZTrie (lztrie T, trieNode i)
+ {
+ return excess (T->pdata,i);
+ }
+
+ // smallest rank in subtree
+
+uint leftrankLZTrie (lztrie T, trieNode i)
+
+ { return i-rank(T->pdata->bdata,i);
+ }
+
+ // largest rank in subtree
+
+uint rightrankLZTrie (lztrie T, trieNode i)
+ {
+ trieNode j;
+ j = findclose (T->pdata,i);
+ return j-rank(T->pdata->bdata,j)-1;
+ }
+
+ // is node i ancestor of node j?
+
+bool ancestorLZTrie (lztrie T, trieNode i, trieNode j)
+ {
+ return (i <= j) && (j < findclose (T->pdata,i));
+ }
+
+ // next node from i, in preorder, adds/decrements depth accordingly
+ // assumes it *can* go on!
+
+trieNode nextLZTrie (lztrie T, trieNode i, uint *depth)
+ {
+ uint *data = T->data;
+ i++;
+ while (bitget1(data,i)) { i++; (*depth)--; }
+ (*depth)++;
+ return i;
+ }
+
+
+// extracts text from position "from" up to position "to".
+// Parameter "snippet_length" is the actual length of the snippet extracted.
+// The extracted text is stored in array "snippet".
+//
+void extract(lztrie T,
+ ulong from,
+ ulong to,
+ byte **snippet,
+ ulong *snippet_length)
+ {
+ ulong snpp_pos, posaux, idaux;
+ uint idfrom,idto;
+ trieNode p;
+ byte *snppt;
+ nodemap Node = T->Node;
+
+ if (to > (T->text_length-1)) to = T->text_length-1;
+ if (from > (T->text_length-1)) from = T->text_length-1;
+
+ *snippet_length = to-from+1;
+ *snippet = malloc(*snippet_length);
+
+ idfrom = getphrasePosition(T->TPos, from);
+ idto = getphrasePosition(T->TPos, to+1);
+ // *snippet_length = to-from+1;
+
+ posaux = getPosition(T->TPos, idto+1)-1;
+ p = mapto(Node, idto);
+
+ while (p&&(posaux > to)) {
+ p = parentLZTrie(T, p);
+ posaux--;
+ }
+
+ snpp_pos = (*snippet_length)-1;
+ snppt = *snippet;
+
+ for (idaux = idto; idaux != idfrom;) {
+ while (p) {
+ snppt[snpp_pos--] = letterLZTrie(T, p);
+ p = parentLZTrie(T, p);
+ }
+ p = mapto(Node, --idaux);
+ }
+ if (idfrom != idto) posaux = getPosition(T->TPos, idfrom+1)-(from!=0);
+ while (p && (posaux >= from)) {
+ snppt[snpp_pos--] = letterLZTrie(T, p);
+ p = parentLZTrie(T, p);
+ posaux--;
+ }
+ }
+
+uint sizeofLZTrie(lztrie T)
+ {
+ return sizeof(struct slztrie) +
+ sizeofParentheses(T->pdata) +
+ T->n*sizeof(byte) +
+ sizeofNodemap(T->Node) +
+ sizeofPosition(T->TPos);
+ }