#include "XMLTree.h"\r
#include <cstring>\r
-// functions to convert tag positions to the corresponding tree node and viceversa. \r
+\r
+ // functions to convert tag positions to the corresponding tree node and viceversa. \r
// These are implemented in order to be able to change the tree and Tags representations, \r
// without affecting the code so much.\r
// Current implementation corresponds to balanced-parentheses representation for\r
return (int)x;\r
}\r
\r
+// to prevent suprious "unused result" warnings\r
+\r
+inline void ufread(void *ptr, size_t size, size_t nmemb, FILE *stream){\r
+ size_t res;\r
+ res = fread(ptr,size,nmemb,stream);\r
+ if (res < nmemb)\r
+ throw "ufread I/O error";\r
+\r
+ return;\r
+}\r
+inline void ufwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream){\r
+ size_t res;\r
+ res = fwrite(ptr,size,nmemb,stream);\r
+ if (res < nmemb)\r
+ throw "ufwrite I/O error";\r
+ return;\r
+}\r
+\r
// Save: saves XML tree data structure to file. \r
void XMLTree::Save(unsigned char *filename) \r
{\r
saveTree(Par, fp);\r
\r
// stores the table with tag names\r
- fwrite(&ntagnames, sizeof(int), 1, fp);\r
+ ufwrite(&ntagnames, sizeof(int), 1, fp);\r
for (i=0; i<ntagnames;i++)\r
- fprintf(fp, "%s\n",TagName[i]);\r
+ fprintf(fp, "%s\n",TagName[i]);\r
+ \r
\r
// stores the flags\r
- fwrite(&indexing_empty_texts, sizeof(bool), 1, fp);\r
- fwrite(&initialized, sizeof(bool), 1, fp);\r
- fwrite(&finished, sizeof(bool), 1, fp);\r
+ ufwrite(&indexing_empty_texts, sizeof(bool), 1, fp);\r
+ ufwrite(&initialized, sizeof(bool), 1, fp);\r
+ ufwrite(&finished, sizeof(bool), 1, fp);\r
+ ufwrite(&disable_tc, sizeof(bool),1,fp);\r
\r
if (!indexing_empty_texts) EBVector->save(fp);\r
\r
Tags->save(fp);\r
\r
// stores the texts \r
- Text->Save(fp);\r
+ if (!disable_tc)\r
+ Text->Save(fp);\r
\r
fclose(fp);\r
\r
loadTree(XML_Tree->Par, fp); \r
\r
// stores the table with tag names\r
- fread(&XML_Tree->ntagnames, sizeof(int), 1, fp);\r
-\r
+ ufread(&XML_Tree->ntagnames, sizeof(int), 1, fp);\r
XML_Tree->TagName = (unsigned char **)malloc(XML_Tree->ntagnames*sizeof(unsigned char *));\r
\r
for (i=0; i<XML_Tree->ntagnames;i++) {\r
- int k = feof(fp);\r
- fscanf(fp, "%s\n",filenameaux);\r
- XML_Tree->TagName[i] = (unsigned char *)malloc(sizeof(unsigned char)*(strlen((const char *)filenameaux)+1));\r
- strcpy((char *)XML_Tree->TagName[i], (const char *)filenameaux);\r
+ \r
+ // Kim is it needed ?\r
+ int k = feof(fp);\r
+ // fscanf chokes ont "\n" which is the case for the root element\r
+ char * r = fgets(filenameaux,1023,fp);\r
+ // int r = fscanf(fp, "<%s>\n",filenameaux);\r
+ if (r==NULL)\r
+ throw "Cannot read tag list";\r
+\r
+\r
+ int len = strlen((const char*)filenameaux);\r
+ XML_Tree->TagName[i] = (unsigned char *)calloc(len,sizeof(char));\r
+\r
+ //XML_Tree->TagName[i] = (unsigned char *)malloc(sizeof(unsigned char)*(strlen((const char *)filenameaux)+1));\r
+ //the - 1 removes the trailing \n\r
+ strncpy((char *)XML_Tree->TagName[i], (const char *)filenameaux,len - 1);\r
}\r
\r
// loads the flags\r
- fread(&(XML_Tree->indexing_empty_texts), sizeof(bool), 1, fp);\r
- fread(&(XML_Tree->initialized), sizeof(bool), 1, fp);\r
- fread(&(XML_Tree->finished), sizeof(bool), 1, fp);\r
- \r
+ ufread(&(XML_Tree->indexing_empty_texts), sizeof(bool), 1, fp);\r
+ ufread(&(XML_Tree->initialized), sizeof(bool), 1, fp);\r
+ ufread(&(XML_Tree->finished), sizeof(bool), 1, fp);\r
+ ufread(&(XML_Tree->disable_tc), sizeof(bool), 1, fp);\r
if (!(XML_Tree->indexing_empty_texts)) XML_Tree->EBVector = static_bitsequence_rrr02::load(fp);\r
\r
// loads the tags\r
XML_Tree->Tags = static_sequence::load(fp);\r
\r
- // loads the texts \r
- XML_Tree->Text->Load(fp,sample_rate_text);\r
+ // loads the texts\r
+ if (!XML_Tree->disable_tc){\r
+ XML_Tree->Text = TextCollection::InitTextCollection(sample_rate_text);\r
+ XML_Tree->Text->Load(fp,sample_rate_text);\r
+ }\r
+ else\r
+ XML_Tree->Text = NULL;\r
\r
fclose(fp);\r
- \r
return XML_Tree;\r
}\r
\r
treeNode XMLTree::Root() \r
{\r
if (!finished) {\r
- fprintf(stderr, "Error: data structure has not been constructed properly\n");\r
+ fprintf(stderr, "Root() : Error: data structure has not been constructed properly\n");\r
exit(1);\r
}\r
return root_node(Par);\r
fprintf(stderr, "Error: data structure has not been constructed properly\n");\r
exit(1);\r
}\r
-\r
- return parent(Par, x);\r
+ if (x == Root())\r
+ return NULLT;\r
+ else\r
+ return parent(Par, x);\r
}\r
\r
// Child(x,i): returns the i-th child of node x, assuming it exists.\r
// OpenDocument(empty_texts): it starts the construction of the data structure for\r
// the XML document. Parameter empty_texts indicates whether we index empty texts\r
// in document or not. Returns a non-zero value upon success, NULLT in case of error.\r
-int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text)\r
+int XMLTree::OpenDocument(bool empty_texts, int sample_rate_text,bool dtc)\r
{\r
initialized = true;\r
finished = false;\r
+ found_attributes = false;\r
npar = 0;\r
parArraySize = 1;\r
- ntagnames = 0; \r
- \r
+ ntagnames = 2; \r
+ disable_tc = dtc;\r
+ \r
indexing_empty_texts = empty_texts;\r
\r
par_aux = (pb *)malloc(sizeof(pb)*parArraySize);\r
return NULLT;\r
}\r
\r
- TagName = NULL;\r
+ TagName = (unsigned char **) malloc(2*sizeof(unsigned char*));\r
+ if (!TagName){\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+\r
+ TagName[0] = (unsigned char *) malloc(4*sizeof(unsigned char));\r
+ strcpy((char *) TagName[0], "<@>");\r
+\r
+ if (!TagName[0]){\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+\r
+ TagName[1] = (unsigned char *) malloc(4*sizeof(unsigned char));\r
+ if (!TagName[1]){\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+\r
+ strcpy((char *) TagName[1], "<$>");\r
+\r
\r
if (!indexing_empty_texts) {\r
empty_texts_aux = (unsigned int *)malloc(sizeof(unsigned int));\r
static_permutation_builder * pmb = new static_permutation_builder_mrrr(PERM_SAMPLE, bmb);\r
static_sequence_builder * ssb = new static_sequence_builder_gmr_chunk(bmb, pmb);\r
\r
- Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,2*ntagnames, bmb, ssb);\r
+\r
+ // If we found an attribute then "<@>" is present in the tree\r
+ // if we didn't then it is not. "<$>" is never present in the tree\r
+ int ntagsize = found_attributes ? 2*ntagnames-1 : 2*ntagnames - 2;\r
+\r
+ Tags = new static_sequence_gmr((uint *) tags_aux, (uint) npar-1,ntagsize, bmb, ssb);\r
\r
delete bmb;\r
delete pmb;\r
delete ssb;\r
// makes the text collection static\r
- Text->MakeStatic();\r
+ if (!disable_tc)\r
+ Text->MakeStatic();\r
\r
// creates the data structure marking the non-empty texts (just in the case it is necessary)\r
- if (!indexing_empty_texts) \r
+ if (!indexing_empty_texts) {\r
EBVector = new static_bitsequence_rrr02((uint *)empty_texts_aux,(ulong)npar,(uint)32);\r
+ free (empty_texts_aux);\r
+ empty_texts_aux = NULL;\r
+ }\r
+ \r
+ free(tags_aux);\r
+ tags_aux = NULL;\r
\r
finished = true;\r
\r
// transforms the tagname into a tag identifier. If the tag is new, we insert\r
// it in the table.\r
for (i=0; i<ntagnames; i++)\r
- if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;\r
+ if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;\r
\r
+\r
+ // NewOpenTag("<@>") was called\r
+ if (i==0) \r
+ found_attributes=true;\r
+\r
if (i==ntagnames) { // the tag is a new one, then we insert it\r
TagName = (unsigned char **)realloc(TagName, sizeof(char *)*(ntagnames+1));\r
\r
return NULLT;\r
}\r
\r
+ if (disable_tc) {\r
+ XMLTree::NewEmptyText();\r
+ return 1;\r
+ };\r
+\r
if (!indexing_empty_texts) {\r
empty_texts_aux = (unsigned int *)realloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));\r
if (!empty_texts_aux) {\r
}\r
\r
\r
-\r
+TagType XMLTree::RegisterTag(unsigned char *tagname)\r
+{\r
+ if (!finished)\r
+ return NULLT;\r
+ \r
+\r
+ TagType id = XMLTree::GetTagId(tagname);\r
+ if (id == NULLT){\r
+ id = ntagnames;\r
+ ntagnames = ntagnames + 1; \r
+ TagName = (unsigned char **) realloc(TagName,ntagnames*(sizeof(unsigned char*)));\r
+ strcpy((char*)TagName[id], (const char *)tagname); \r
+ };\r
+\r
+ return id;\r
+}\r