s_tree+=2*sizeof(uint)+sizeof(uint)*uint_len(XML_Tree->tags_blen,XML_Tree->tags_len);\r
s_tree+= XML_Tree->Tags->size();\r
\r
+ /// FIXME:UGLY tests!\r
+ /*uint * seq = new uint[XML_Tree->tags_len];\r
+ for(uint i=0;i<XML_Tree->tags_len;i++)\r
+ seq[i] = get_field(XML_Tree->tags_fix,XML_Tree->tags_blen,i);\r
+ cout << "Tags test: " << XML_Tree->Tags->test(seq,XML_Tree->tags_len) << endl;\r
+ delete [] seq;*/\r
+ /// End ugly tests\r
+\r
s_text = ftell(fp);\r
\r
// loads the texts\r
if (!XML_Tree->disable_tc){\r
- XML_Tree->Text = TextCollection::InitTextCollection(sample_rate_text);\r
- XML_Tree->Text->Load(fp,sample_rate_text);\r
+ XML_Tree->Text = TextCollection::Load(fp,sample_rate_text);\r
int sst;\r
int st;\r
ufread(&sst, sizeof(int),1,fp);\r
Tags = NULL;\r
\r
//Text->~TextCollection();\r
- delete Text;\r
+ delete TextBuilder; \r
+ TextBuilder = NULL;\r
+ delete Text; \r
Text = NULL;\r
\r
initialized = false;\r
else return tagpos2node(s);\r
} \r
\r
+// TaggedFoll(x,tag): returns the first node tagged tag with larger preorder than x and not in\r
+// the subtree of x. Returns NULLT if there is none.\r
+treeNode XMLTree::TaggedFollBelow(treeNode x, TagType tag, treeNode root)\r
+ {\r
+\r
+ int r, s;\r
+ int lim = node2tagpos(find_close(Par,root));\r
+ if (x ==NULLT || x == Root())\r
+ return NULLT;\r
+ \r
+ r = (int) Tags->rank(tag,find_close(Par,x));\r
+ s = (int) Tags->select(tag, r+1); // select returns -1 in case that there is no r+1-th tag.\r
+ if (s==-1 || s >= lim) \r
+ return NULLT;\r
+ else \r
+ return tagpos2node(s);\r
+ } \r
+\r
\r
// TaggedFollowingSibling(x,tag): returns the first node tagged tag with larger preorder than x and not in\r
// the subtree of x. Returns NULLT if there is none.\r
if (!indexing_empty_texts) \r
empty_texts_aux = (unsigned int *)umalloc(sizeof(unsigned int));\r
\r
- \r
- \r
- Text = TextCollection::InitTextCollection((unsigned)sample_rate_text);\r
+ if (disable_tc)\r
+ TextBuilder = 0;\r
+ else \r
+ TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text);\r
+ Text = 0;\r
\r
return 1; // indicates success in the initialization of the data structure\r
}\r
Par = (bp *)umalloc(sizeof(bp));\r
bp_construct(Par, npar, par_aux, OPT_DEGREE|0); \r
// creates structure for tags\r
- //static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
- //static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
- //static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
-\r
\r
// If we found an attribute then "<@>" is present in the tree\r
// if we didn't then it is not. "<$>" is never present in the tree\r
uint max_tag = 0;\r
for(uint i=0;i<(uint)npar-1;i++)\r
max_tag = max(max_tag,tags_aux[i]);\r
- max_tag++;\r
- int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
- tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
- tags_aux[npar++] = max_tag;\r
+ //max_tag++;\r
+ //tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
+ //tags_aux[npar++] = max_tag;\r
+ //int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
+ int ntagsize = 2*ntagnames + 2;\r
\r
- static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
+ //static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
+ //static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
+ //static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
+ static_bitsequence_builder * bmb = new static_bitsequence_builder_sdarray();\r
alphabet_mapper *am = new alphabet_mapper_none();\r
- wt_coder * wc = new wt_coder_huff((uint*)tags_aux,npar,am);\r
- Tags = new static_sequence_wvtree((uint*)tags_aux,npar,wc ,bmb, am);\r
- //Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb);\r
+ //wt_coder * wc = new wt_coder_huff((uint*)tags_aux,npar,am);\r
+ //Tags = new static_sequence_wvtree((uint*)tags_aux,npar,wc ,bmb, am);\r
+ //Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar,ntagsize, bmb, ssb);\r
+ Tags = new static_sequence_bs((uint*)tags_aux,npar,am,bmb);\r
+ \r
+ cout << "Tags test: " << Tags->test((uint*)tags_aux,npar) << endl;\r
+\r
tags_blen = bits(max_tag);\r
tags_len = (uint)npar;\r
tags_fix = new uint[uint_len(tags_blen,tags_len)];\r
- for(uint i=0;i<(uint)npar-1;i++)\r
+ for(uint i=0;i<(uint)npar;i++)\r
set_field(tags_fix,tags_blen,i,tags_aux[i]);\r
\r
delete bmb;\r
//delete pmb;\r
//delete ssb;\r
+\r
+ \r
// makes the text collection static\r
if (!disable_tc)\r
- Text->MakeStatic();\r
- \r
+ {\r
+ assert(Text = 0);\r
+ assert(TextBuilder != 0);\r
+ Text = TextBuilder->InitTextCollection();\r
+ delete TextBuilder;\r
+ TextBuilder = 0;\r
+ }\r
+\r
// creates the data structure marking the non-empty texts (just in the case it is necessary)\r
if (!indexing_empty_texts) {\r
EBVector = new static_bitsequence_rrr02((uint *)empty_texts_aux,(ulong)npar,(uint)32);\r
bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector\r
}\r
\r
- Text->InsertText(s);\r
+ TextBuilder->InsertText(s);\r
string cpps = (char*) s;\r
CachedText.push_back(cpps); \r
\r
\r
bitclean(empty_texts_aux, npar-1); // marks the empty text with a 0 in the bit vector\r
}\r
- else Text->InsertText(&c); // we insert the empty text just in case we index all the texts\r
+ else TextBuilder->InsertText(&c); // we insert the empty text just in case we index all the texts\r
\r
return 1; // success \r
}\r
// this should be changed for more efficient processing\r
for (i=0; i<ntagnames; i++)\r
if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break; \r
- if (i==ntagnames) return ntagnames; //(TagType)NULLT; // tagname does not exists in the table\r
+ if (i==ntagnames) return (TagType)-1; //ntagnames; //(TagType)NULLT; // tagname does not exists in the table\r
else return i;\r
}\r
\r
unsigned char *XMLTree::GetTagName(TagType tagid)\r
{\r
unsigned char *s;\r
-\r
+ if(tagid==(uint)-1) return NULL;\r
if (tagid >= ntagnames) return NULL; // invalid tag identifier\r
s = (unsigned char *)umalloc((strlen((const char *)TagName[tagid])+1)*sizeof(unsigned char));\r
strcpy((char *)s, (const char *)TagName[tagid]);\r
\r
const unsigned char *XMLTree::GetTagNameByRef(TagType tagid)\r
{\r
+ if(tagid==(uint)-1) return NULL;\r
if (tagid >= ntagnames) return NULL; // invalid tag identifier\r
return ((const unsigned char*) TagName[tagid]);\r
}\r