--- /dev/null
+\r
+#include "XMLTreeBuilder.h"\r
+#include "basics.h"\r
+\r
+// OpenDocument(empty_texts): it starts the construction of the data structure for\r
+// the XML document. Parameter empty_texts indicates whether we index empty texts\r
+// in document or not. Returns a non-zero value upon success, NULLT in case of error.\r
+int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dtc)\r
+ {\r
+ found_attributes = false;\r
+ npar = 0;\r
+ parArraySize = 1;\r
+ ntagnames = 4; \r
+ disable_tc = dtc;\r
+ \r
+ indexing_empty_texts = empty_texts;\r
+ \r
+ par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);\r
+ \r
+ tags_aux = (TagType *) umalloc(sizeof(TagType));\r
+ \r
+ TagName = (unsigned char **) umalloc(4*sizeof(unsigned char*));\r
+ TagName[0] = (unsigned char *) umalloc(4*sizeof(unsigned char));\r
+ strcpy((char *) TagName[0], "<@>");\r
+ TagName[1] = (unsigned char *) umalloc(4*sizeof(unsigned char));\r
+ strcpy((char *) TagName[1], "<$>");\r
+ TagName[2] = (unsigned char *) umalloc(5*sizeof(unsigned char));\r
+ strcpy((char *) TagName[2], "/<@>");\r
+ TagName[3] = (unsigned char *) umalloc(5*sizeof(unsigned char));\r
+ strcpy((char *) TagName[3], "/<$>");\r
+\r
+ if (!indexing_empty_texts) \r
+ empty_texts_aux = (unsigned int *)umalloc(sizeof(unsigned int));\r
+ \r
+ if (disable_tc)\r
+ TextBuilder = 0;\r
+ else \r
+ TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text);\r
+ Text = 0;\r
+ \r
+ return 1; // indicates success in the initialization of the data structure\r
+ }\r
+\r
+// CloseDocument(): it finishes the construction of the data structure for the XML\r
+// document. Tree and tags are represented in the final form, dynamic data \r
+// structures are made static, and the flag "finished" is set to true. After that, \r
+// the data structure can be queried.\r
+XMLTree *XMLTreeBuilder::CloseDocument()\r
+ { \r
+ // closing parenthesis for the tree root\r
+ par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));\r
+ setbit(par_aux, npar, CP);\r
+ npar++;\r
+ \r
+ // makes the text collection static\r
+ if (!disable_tc) {\r
+ assert(Text == 0);\r
+ assert(TextBuilder != 0);\r
+ Text = TextBuilder->InitTextCollection();\r
+ delete TextBuilder;\r
+ TextBuilder = 0;\r
+ }\r
+\r
+ XMLTree *T = new XMLTree(par_aux, npar, TagName, ntagnames, empty_texts_aux, tags_aux, \r
+ Text, CachedText, indexing_empty_texts, disable_tc);\r
+ return T; \r
+ }\r
+\r
+\r
+// NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.\r
+// Tag name is given. Returns a non-zero value upon success, and returns NULLT\r
+// in case of failing when trying to insert the new tag.\r
+int XMLTreeBuilder::NewOpenTag(unsigned char *tagname)\r
+ {\r
+ int i;\r
+ \r
+ // inserts a new opening parentheses in the bit sequence\r
+ if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis\r
+ par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);\r
+ parArraySize *= 2;\r
+ }\r
+ \r
+ setbit(par_aux,npar,OP); // marks a new opening parenthesis\r
+\r
+ // transforms the tagname into a tag identifier. If the tag is new, we insert\r
+ // it in the table.\r
+ for (i=0; i<ntagnames; i++)\r
+ if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;\r
+ \r
+\r
+ // NewOpenTag("<@>") was called\r
+ if (i==0) \r
+ found_attributes=true;\r
+\r
+ if (i==ntagnames) { // the tag is a new one, then we insert it\r
+ TagName = (unsigned char **)urealloc(TagName, sizeof(char *)*(ntagnames+1));\r
+ \r
+ if (!TagName) {\r
+ fprintf(stderr, "Error: not enough memory\n");\r
+ return NULLT;\r
+ }\r
+ \r
+ ntagnames++;\r
+ TagName[i] = (unsigned char *)umalloc(sizeof(unsigned char)*(strlen((const char *)tagname)+1));\r
+ strcpy((char *)TagName[i], (const char *)tagname);\r
+ } \r
+ tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
+\r
+ tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
+ \r
+ npar++;\r
+ \r
+ return 1; // success \r
+ }\r
+\r
+\r
+// NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.\r
+// Tag name is given. Returns a non-zero value upon success, and returns NULLT\r
+// in case of failing when trying to insert the new tag.\r
+int XMLTreeBuilder::NewClosingTag(unsigned char *tagname)\r
+ {\r
+ int i;\r
+ \r
+ // inserts a new closing parentheses in the bit sequence\r
+ if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis\r
+ par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);\r
+ parArraySize *= 2;\r
+ }\r
+ \r
+ setbit(par_aux,npar,CP); // marks a new closing parenthesis\r
+\r
+ // transforms the tagname into a tag identifier. If the tag is new, we insert\r
+ // it in the table.\r
+ for (i=0; i<ntagnames; i++)\r
+ if ((strcmp((const char *)tagname,(const char *)(TagName[i]+1))==0) && (TagName[i][0]=='/')) break;\r
+ \r
+ if (i==ntagnames) { // the tag is a new one, then we insert it\r
+ TagName = (unsigned char **)urealloc(TagName, sizeof(char *)*(ntagnames+1));\r
+ \r
+ ntagnames++;\r
+ TagName[i] = (unsigned char *)umalloc(sizeof(char)*(strlen((const char *)tagname)+2));\r
+ TagName[i][0] = '/';\r
+ strcpy((char *)&(TagName[i][1]), (const char *)tagname);\r
+ } \r
+\r
+ tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
+\r
+ tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
+ \r
+ npar++;\r
+\r
+ return 1; // success\r
+ }\r
+\r
+\r
+// NewText(s): indicates the event of finding a new (non-empty) text s in the document.\r
+// The new text is inserted within the text collection. Returns a non-zero value upon\r
+// success, NULLT in case of error.\r
+int XMLTreeBuilder::NewText(unsigned char *s)\r
+ {\r
+ if (disable_tc) {\r
+ XMLTreeBuilder::NewEmptyText();\r
+ return 1;\r
+ }\r
+\r
+ if (!indexing_empty_texts) {\r
+ empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));\r
+ bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector\r
+ }\r
+ \r
+ TextBuilder->InsertText(s);\r
+ string cpps = (char*) s;\r
+ CachedText.push_back(cpps); \r
+ \r
+ return 1; // success\r
+ }\r
+\r
+// NewEmptyText(): indicates the event of finding a new empty text in the document.\r
+// In case of indexing empty and non-empty texts, we insert the empty texts into the\r
+// text collection. In case of indexing only non-empty texts, it just indicates an\r
+// empty text in the bit vector of empty texts. Returns a non-zero value upon\r
+// success, NULLT in case of error.\r
+int XMLTreeBuilder::NewEmptyText() \r
+ {\r
+ unsigned char c = 0;\r
+\r
+ if (!indexing_empty_texts) {\r
+ empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));\r
+ \r
+ bitclean(empty_texts_aux, npar-1); // marks the empty text with a 0 in the bit vector\r
+ }\r
+ else TextBuilder->InsertText(&c); // we insert the empty text just in case we index all the texts\r
+ \r
+ return 1; // success \r
+ }\r
+\r