fprintf(stderr, "Error: data structure has not been constructed properly\n");\r
exit(1);\r
}\r
-\r
- return parent(Par, x);\r
+ if (x == Root())\r
+ return NULLT;\r
+ else\r
+ return parent(Par, x);\r
}\r
\r
// Child(x,i): returns the i-th child of node x, assuming it exists.\r
fprintf(stderr, "Error: data structure has not been constructed properly\n");\r
exit(1);\r
}\r
-\r
+ if (x == Root())\r
+ return NULLT;\r
+ \r
return next_sibling(Par, x);\r
}\r
\r
// OpenDocument(empty_texts): it starts the construction of the data structure for\r
// the XML document. Parameter empty_texts indicates whether we index empty texts\r
// in document or not. Returns a non-zero value upon success, NULLT in case of error.\r
-int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text)\r
+int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text,bool dtc)\r
{\r
initialized = true;\r
finished = false;\r
+ found_attributes = false;\r
npar = 0;\r
parArraySize = 1;\r
- ntagnames = 0; \r
- \r
+ ntagnames = 2; \r
+ disable_tc = dtc;\r
+ \r
indexing_empty_texts = empty_texts;\r
\r
par_aux = (pb *)malloc(sizeof(pb)*parArraySize);\r
return NULLT;\r
}\r
\r
- TagName = NULL;\r
+ TagName = (unsigned char **) malloc(2*sizeof(unsigned char*));\r
+ if (!TagName){\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+\r
+ TagName[0] = (unsigned char *) malloc(4*sizeof(unsigned char));\r
+ strcpy((char *) TagName[0], "<@>");\r
+\r
+ if (!TagName[0]){\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+\r
+ TagName[1] = (unsigned char *) malloc(4*sizeof(unsigned char));\r
+ if (!TagName[1]){\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+\r
+ strcpy((char *) TagName[1], "<$>");\r
+\r
\r
if (!indexing_empty_texts) {\r
empty_texts_aux = (unsigned int *)malloc(sizeof(unsigned int));\r
static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
\r
- Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,2*ntagnames, bmb, ssb);\r
+\r
+ // If we found an attribute then "<@>" is present in the tree\r
+ // if we didn't then it is not. "<$>" is never present in the tree\r
+ int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
+\r
+ Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb);\r
\r
delete bmb;\r
delete pmb;\r
delete ssb;\r
// makes the text collection static\r
- Text->MakeStatic();\r
+ if (!disable_tc)\r
+ Text->MakeStatic();\r
\r
// creates the data structure marking the non-empty texts (just in the case it is necessary)\r
- if (!indexing_empty_texts) \r
+ if (!indexing_empty_texts) {\r
EBVector = new static_bitsequence_rrr02((uint *)empty_texts_aux,(ulong)npar,(uint)32);\r
+ free (empty_texts_aux);\r
+ empty_texts_aux = NULL;\r
+ }\r
+ \r
+ free(tags_aux);\r
+ tags_aux = NULL;\r
\r
finished = true;\r
\r
// transforms the tagname into a tag identifier. If the tag is new, we insert\r
// it in the table.\r
for (i=0; i<ntagnames; i++)\r
- if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;\r
+ if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;\r
\r
+\r
+ // NewOpenTag("<@>") was called\r
+ if (i==0) \r
+ found_attributes=true;\r
+\r
if (i==ntagnames) { // the tag is a new one, then we insert it\r
TagName = (unsigned char **)realloc(TagName, sizeof(char *)*(ntagnames+1));\r
\r
return NULLT;\r
}\r
\r
+ if (disable_tc) {\r
+ XMLTree::NewEmptyText();\r
+ return 1;\r
+ };\r
+\r
if (!indexing_empty_texts) {\r
empty_texts_aux = (unsigned int *)realloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));\r
if (!empty_texts_aux) {\r
}\r
\r
\r
-\r
+TagType XMLTree::RegisterTag(unsigned char *tagname)\r
+{\r
+ if (!finished)\r
+ return NULLT;\r
+ \r
+\r
+ TagType id = XMLTree::GetTagId(tagname);\r
+ if (id == NULLT){\r
+ id = ntagnames;\r
+ ntagnames = ntagnames + 1; \r
+ TagName = (unsigned char **) realloc(TagName,ntagnames*(sizeof(unsigned char*)));\r
+ strcpy((char*)TagName[id], (const char *)tagname); \r
+ };\r
+\r
+ return id;\r
+}\r