s_tree+=2*sizeof(uint)+sizeof(uint)*uint_len(XML_Tree->tags_blen,XML_Tree->tags_len);\r
s_tree+= XML_Tree->Tags->size();\r
\r
+ /// FIXME:UGLY tests!\r
+ /*uint * seq = new uint[XML_Tree->tags_len];\r
+ for(uint i=0;i<XML_Tree->tags_len;i++)\r
+ seq[i] = get_field(XML_Tree->tags_fix,XML_Tree->tags_blen,i);\r
+ cout << "Tags test: " << XML_Tree->Tags->test(seq,XML_Tree->tags_len) << endl;\r
+ delete [] seq;*/\r
+ /// End ugly tests\r
+\r
s_text = ftell(fp);\r
\r
// loads the texts\r
Par = (bp *)umalloc(sizeof(bp));\r
bp_construct(Par, npar, par_aux, OPT_DEGREE|0); \r
// creates structure for tags\r
- //static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
- //static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
- //static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
-\r
\r
// If we found an attribute then "<@>" is present in the tree\r
// if we didn't then it is not. "<$>" is never present in the tree\r
- uint max_tag = 0;\r
- for(uint i=0;i<(uint)npar-1;i++)\r
- max_tag = max(max_tag,tags_aux[i]);\r
- max_tag++;\r
- int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
- tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
- tags_aux[npar++] = max_tag;\r
-\r
- static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
+ //uint max_tag = 0;\r
+ //for(uint i=0;i<(uint)npar-1;i++)\r
+ // max_tag = max(max_tag,tags_aux[i]);\r
+ //max_tag++;\r
+ //tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
+ //tags_aux[npar++] = max_tag;\r
+ //int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
+ int ntagsize = 2*ntagnames + 2;\r
+\r
+ //static_bitsequence_builder * bmb = new static_bitsequence_builder_brw32(20);\r
+ //static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
+ //static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
+ static_bitsequence_builder * bmb = new static_bitsequence_builder_sdarray();\r
alphabet_mapper *am = new alphabet_mapper_none();\r
- wt_coder * wc = new wt_coder_huff((uint*)tags_aux,npar,am);\r
- Tags = new static_sequence_wvtree((uint*)tags_aux,npar,wc ,bmb, am);\r
- //Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb);\r
+ //wt_coder * wc = new wt_coder_huff((uint*)tags_aux,npar,am);\r
+ //Tags = new static_sequence_wvtree((uint*)tags_aux,npar,wc ,bmb, am);\r
+ //Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar,ntagsize, bmb, ssb);\r
+ Tags = new static_sequence_bs((uint*)tags_aux,npar,am,bmb);\r
+ \r
+ cout << "Tags test: " << Tags->test((uint*)tags_aux,npar) << endl;\r
+\r
tags_blen = bits(max_tag);\r
tags_len = (uint)npar;\r
tags_fix = new uint[uint_len(tags_blen,tags_len)];\r
- for(uint i=0;i<(uint)npar-1;i++)\r
+ for(uint i=0;i<(uint)npar;i++)\r
set_field(tags_fix,tags_blen,i,tags_aux[i]);\r
\r
delete bmb;\r
//delete pmb;\r
//delete ssb;\r
- // makes the text collection static\r
+\r
+ \r
+ // makes the text collection static\r
if (!disable_tc)\r
Text->MakeStatic();\r
\r
// this should be changed for more efficient processing\r
for (i=0; i<ntagnames; i++)\r
if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break; \r
- if (i==ntagnames) return ntagnames; //(TagType)NULLT; // tagname does not exists in the table\r
+ if (i==ntagnames) return (TagType)-1; //ntagnames; //(TagType)NULLT; // tagname does not exists in the table\r
else return i;\r
}\r
\r
unsigned char *XMLTree::GetTagName(TagType tagid)\r
{\r
unsigned char *s;\r
-\r
+ if(tagid==(uint)-1) return NULL;\r
if (tagid >= ntagnames) return NULL; // invalid tag identifier\r
s = (unsigned char *)umalloc((strlen((const char *)TagName[tagid])+1)*sizeof(unsigned char));\r
strcpy((char *)s, (const char *)TagName[tagid]);\r
\r
const unsigned char *XMLTree::GetTagNameByRef(TagType tagid)\r
{\r
+ if(tagid==(uint)-1) return NULL;\r
if (tagid >= ntagnames) return NULL; // invalid tag identifier\r
return ((const unsigned char*) TagName[tagid]);\r
}\r