using std::string;\r
\r
XMLTreeBuilder::~XMLTreeBuilder(){\r
- \r
+ //free(par_aux);\r
+ free(tags_aux);\r
+ //delete other stuff.\r
+\r
}\r
\r
// OpenDocument(empty_texts): it starts the construction of the data structure for\r
// the XML document. Parameter empty_texts indicates whether we index empty texts\r
// in document or not. Returns a non-zero value upon success, NULLT in case of error.\r
-int XMLTreeBuilder::OpenDocument(bool empty_texts, \r
+int XMLTreeBuilder::OpenDocument(bool empty_texts,\r
int sample_rate_text,\r
bool dtc,\r
TextCollectionBuilder::index_type_t index_type)\r
disable_tc = dtc;\r
text_index_type = index_type;\r
STARTTIMER();\r
- \r
+\r
par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);\r
- \r
+\r
tags_aux = (TagType *) umalloc(sizeof(TagType));\r
- \r
+\r
TagName = new vector<string>();\r
tIdMap = new std::unordered_map<string,int>();\r
\r
\r
if (disable_tc)\r
TextBuilder = 0;\r
- else \r
+ else\r
TextBuilder = TextCollectionBuilder::create((unsigned)sample_rate_text, index_type);\r
\r
Text = 0;\r
}\r
\r
// CloseDocument(): it finishes the construction of the data structure for the XML\r
-// document. Tree and tags are represented in the final form, dynamic data \r
-// structures are made static, and the flag "finished" is set to true. After that, \r
+// document. Tree and tags are represented in the final form, dynamic data\r
+// structures are made static, and the flag "finished" is set to true. After that,\r
// the data structure can be queried.\r
XMLTree *XMLTreeBuilder::CloseDocument()\r
- { \r
+ {\r
//closing parenthesis for the tree root\r
//par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));\r
//setbit(par_aux, npar, CP);\r
//npar++;\r
- \r
+\r
// makes the text collection static\r
STOPTIMER(Parsing);\r
PRINTTIME("Parsing XML Document", Parsing);\r
PRINTTIME("Building TextCollection", Building);\r
\r
}\r
- \r
+\r
XMLTree *T = new XMLTree(par_aux,\r
- npar, \r
+ npar,\r
TagName,\r
tIdMap,\r
- empty_texts_aux, // freed by the constructor\r
- tags_aux, //freed by the constructor\r
+ empty_texts_aux, // freed by the constructor\r
+ tags_aux, // freed by the constructor\r
Text,\r
disable_tc,\r
text_index_type);\r
- return T; \r
+ tags_aux = 0;\r
+ empty_texts_aux = 0;\r
+ return T;\r
}\r
\r
\r
int XMLTreeBuilder::NewOpenTag(string tagname)\r
{\r
int i;\r
- \r
+\r
// inserts a new opening parentheses in the bit sequence\r
if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis\r
par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);\r
parArraySize *= 2;\r
}\r
- \r
+\r
setbit(par_aux,npar,OP); // marks a new opening parenthesis\r
- \r
+\r
TagIdMapIT tag_id = tIdMap->find(tagname);\r
- \r
+\r
if (tag_id == tIdMap->end()){\r
REGISTER_TAG(TagName,tIdMap,tagname);\r
i = TagName->size() - 1;\r
if (tagname.compare(PCDATA_OPEN_TAG) == 0 ||\r
tagname.compare(ATTRIBUTE_DATA_OPEN_TAG) == 0){\r
};\r
- \r
+\r
tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
- \r
+\r
tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
- \r
+\r
npar++;\r
- \r
- return 1; // success \r
+\r
+ return 1; // success\r
}\r
\r
\r
int XMLTreeBuilder::NewClosingTag(string tagname)\r
{\r
int i;\r
- \r
+\r
// inserts a new closing parentheses in the bit sequence\r
if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis\r
par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);\r
parArraySize *= 2;\r
}\r
- \r
+\r
setbit(par_aux,npar,CP); // marks a new closing parenthesis\r
- \r
+\r
//tagname.insert(0,"/");\r
\r
- //TagIdMapIT tag_id = tIdMap->find(tagname); \r
+ //TagIdMapIT tag_id = tIdMap->find(tagname);\r
\r
// if (tag_id == tIdMap->end()){\r
// REGISTER_TAG(TagName,tIdMap,tagname);\r
tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
\r
tags_aux[npar] = CLOSING_TAG_ID; // inserts the new tag id within the preorder sequence of tags\r
- \r
+\r
npar++;\r
\r
return 1; // success\r
\r
int n_eta_size = sizeof(uint)*(1+(npar-1)/(8*sizeof(uint)));\r
//see basics.h, recalloc resizes and sets the new area to 0.\r
- \r
+\r
empty_texts_aux = (uint *)urecalloc(empty_texts_aux,eta_size,n_eta_size);\r
eta_size = n_eta_size;\r
bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector\r
* along with this program; if not, write to the *\r
* Free Software Foundation, Inc., *\r
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *\r
- ******************************************************************************/ \r
+ ******************************************************************************/\r
\r
#ifndef XMLTREEBUILDER_H_\r
#define XMLTREEBUILDER_H_\r
\r
\r
class XMLTreeBuilder {\r
- \r
+\r
/** Array containing the balanced parentheses sequence */\r
pb *par_aux;\r
int parArraySize;\r
int npar;\r
\r
- /** Mapping from tag identifer to tag name */ \r
+ /** Mapping from tag identifer to tag name */\r
std::vector<std::string> *TagName;\r
TagIdMap * tIdMap;\r
/** Array containing the sequence of tags */\r
TagType *tags_aux;\r
- \r
+\r
/** The texts in the XML document */\r
TextCollectionBuilder *TextBuilder;\r
TextCollection *Text;\r
- \r
+\r
/** The texts in the XML document (cached for faster display) */\r
\r
std::vector<std::string> *CachedText;\r
XMLTreeBuilder() {;};\r
\r
~XMLTreeBuilder();\r
- \r
+\r
/** OpenDocument(sample_rate_text,dtc): initilizes the construction\r
- * of the data structure for the XML document. Parameter \r
+ * of the data structure for the XML document. Parameter\r
* sample_rate_text indicates the sampling rate for the text searching data\r
- * structures (small values get faster searching but a bigger space \r
+ * structures (small values get faster searching but a bigger space\r
* requirement). dtc disable the use of the TextCollection\r
* (i.e. everything is considered an empty text *)\r
- * Returns a non-zero value upon success, NULLT in case of \r
+ * Returns a non-zero value upon success, NULLT in case of\r
* error. */\r
int OpenDocument(bool empty_texts, int sample_rate_text, bool dtc,\r
TextCollectionBuilder::index_type_t index_type);\r
\r
- /** CloseDocument(): finishes the construction of the data structure for \r
- * the XML document. Tree and tags are represented in the final form, \r
+ /** CloseDocument(): finishes the construction of the data structure for\r
+ * the XML document. Tree and tags are represented in the final form,\r
* dynamic data structures are made static, returning the resulting\r
* XMLTree. After that, the XMLTree data structure can be queried. */\r
XMLTree *CloseDocument();\r
\r
- /** NewOpenTag(tagname): indicates the event of finding a new opening tag \r
- * in the document. Tag name is given. Returns a non-zero value upon \r
+ /** NewOpenTag(tagname): indicates the event of finding a new opening tag\r
+ * in the document. Tag name is given. Returns a non-zero value upon\r
* success, and returns NULLT in case of error. */\r
int NewOpenTag(std::string tagname);\r
- \r
+\r
/** NewClosingTag(tagname): indicates the event of finding a new closing tag\r
- * in the document. Tag name is given. Returns a non-zero value upon \r
+ * in the document. Tag name is given. Returns a non-zero value upon\r
* success, and returns NULLT in case of error. */\r
int NewClosingTag(std::string tagname);\r
- \r
- /** NewText(s): indicates the event of finding a new text s in \r
- * the document. The new text is inserted within the text collection. \r
- * Returns a non-zero value upon success, NULLT in case of error. \r
+\r
+ /** NewText(s): indicates the event of finding a new text s in\r
+ * the document. The new text is inserted within the text collection.\r
+ * Returns a non-zero value upon success, NULLT in case of error.\r
* If the string is empty, which is legal in attributes, then\r
* the string the sequence '\0x01\0x00' is inserted in the TextCollection\r
* It is ok to do so since a non printable character cannot occur in an XML document\r