+++ /dev/null
-#include "common.h"\r
-#include "XMLTreeBuilder.h"\r
-#include "timings.h"\r
-using std::string;\r
-\r
-XMLTreeBuilder::~XMLTreeBuilder(){\r
- //free(par_aux);\r
- free(tags_aux);\r
- //delete other stuff.\r
-\r
-}\r
-\r
-// OpenDocument(empty_texts): it starts the construction of the data structure for\r
-// the XML document. Parameter empty_texts indicates whether we index empty texts\r
-// in document or not. Returns a non-zero value upon success, NULLT in case of error.\r
-int XMLTreeBuilder::OpenDocument(bool empty_texts,\r
- int sample_rate_text,\r
- bool dtc,\r
- TextCollectionBuilder::index_type_t index_type)\r
- {\r
- npar = 0;\r
- parArraySize = 1;\r
- disable_tc = dtc;\r
- text_index_type = index_type;\r
- STARTTIMER();\r
-\r
- par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);\r
-\r
- tags_aux = (TagType *) umalloc(sizeof(TagType));\r
-\r
- TagName = new vector<string>();\r
- tIdMap = new std::unordered_map<string,int>();\r
-\r
- REGISTER_TAG(TagName,tIdMap,DOCUMENT_OPEN_TAG);\r
- REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_OPEN_TAG);\r
- REGISTER_TAG(TagName,tIdMap,PCDATA_OPEN_TAG);\r
- REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_OPEN_TAG);\r
- REGISTER_TAG(TagName,tIdMap,CLOSING_TAG);\r
- REGISTER_TAG(TagName,tIdMap,DOCUMENT_CLOSE_TAG);\r
- REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_CLOSE_TAG);\r
- REGISTER_TAG(TagName,tIdMap,PCDATA_CLOSE_TAG);\r
- REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_CLOSE_TAG);\r
-\r
-\r
- if (disable_tc)\r
- TextBuilder = 0;\r
- else\r
- TextBuilder = TextCollectionBuilder::create((unsigned)sample_rate_text, index_type);\r
-\r
- Text = 0;\r
- empty_texts_aux = (unsigned int *)ucalloc(sizeof(unsigned int),1);\r
- eta_size = sizeof(unsigned int);\r
- return 1; // indicates success in the initialization of the data structure\r
- }\r
-\r
-// CloseDocument(): it finishes the construction of the data structure for the XML\r
-// document. Tree and tags are represented in the final form, dynamic data\r
-// structures are made static, and the flag "finished" is set to true. After that,\r
-// the data structure can be queried.\r
-XMLTree *XMLTreeBuilder::CloseDocument()\r
- {\r
- //closing parenthesis for the tree root\r
- //par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));\r
- //setbit(par_aux, npar, CP);\r
- //npar++;\r
-\r
- // makes the text collection static\r
- STOPTIMER(Parsing);\r
- PRINTTIME("Parsing XML Document", Parsing);\r
-\r
- XMLTree *T = new XMLTree(par_aux,\r
- npar,\r
- TagName,\r
- tIdMap,\r
- empty_texts_aux, // freed by the constructor\r
- tags_aux, // freed by the constructor\r
- TextBuilder, // freed by the constructor\r
- disable_tc,\r
- text_index_type);\r
- tags_aux = 0;\r
- empty_texts_aux = 0;\r
- return T;\r
- }\r
-\r
-\r
-// NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.\r
-// Tag name is given. Returns a non-zero value upon success, and returns NULLT\r
-// in case of failing when trying to insert the new tag.\r
-int XMLTreeBuilder::NewOpenTag(string tagname)\r
- {\r
- int i;\r
- // inserts a new opening parentheses in the bit sequence\r
- if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis\r
-\r
- // If array is already 1GB, be gentler when resizing:\r
- if (sizeof(pb)*parArraySize >= 1024*1024*1024)\r
- parArraySize += (128*1024*1024);\r
- else\r
- parArraySize *= 2;\r
- par_aux = (pb *) urealloc(par_aux, sizeof(pb)*parArraySize);\r
- };\r
-\r
- bp_setbit(par_aux,npar,OP); // marks a new opening parenthesis\r
-\r
- TagIdMapIT tag_id = tIdMap->find(tagname);\r
-\r
- if (tag_id == tIdMap->end()){\r
- REGISTER_TAG(TagName,tIdMap,tagname);\r
- i = TagName->size() - 1;\r
- }\r
- else\r
- i = tag_id->second;\r
-\r
- if (tagname.compare(PCDATA_OPEN_TAG) == 0 ||\r
- tagname.compare(ATTRIBUTE_DATA_OPEN_TAG) == 0){\r
- };\r
-\r
-\r
- tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
-\r
- tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags\r
-\r
- npar++;\r
-\r
- return 1; // success\r
- }\r
-\r
-\r
-// NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.\r
-// Tag name is given. Returns a non-zero value upon success, and returns NULLT\r
-// in case of failing when trying to insert the new tag.\r
-int XMLTreeBuilder::NewClosingTag(string tagname)\r
- {\r
- int i;\r
- // inserts a new closing parentheses in the bit sequence\r
- if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis\r
- // If array is already 1GB, be gentler when resizing:\r
- if (sizeof(pb)*parArraySize >= 1024*1024*1024)\r
- parArraySize += (128*1024*1024);\r
- else\r
- parArraySize *= 2;\r
- par_aux = (pb *)urealloc(par_aux, sizeof(pb)*parArraySize);\r
- };\r
-\r
- bp_setbit(par_aux,npar,CP); // marks a new closing parenthesis\r
-\r
- //tagname.insert(0,"/");\r
-\r
- //TagIdMapIT tag_id = tIdMap->find(tagname);\r
-\r
- // if (tag_id == tIdMap->end()){\r
- // REGISTER_TAG(TagName,tIdMap,tagname);\r
- // i = TagName->size() - 1;\r
- // }\r
- // else\r
- // i = tag_id->second;\r
-\r
- tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));\r
-\r
- tags_aux[npar] = CLOSING_TAG_ID; // inserts the new tag id within the preorder sequence of tags\r
-\r
- npar++;\r
-\r
- return 1; // success\r
- }\r
-\r
-\r
-// NewText(s): indicates the event of finding a new (non-empty) text s in the document.\r
-// The new text is inserted within the text collection. Returns a non-zero value upon\r
-// success, NULLT in case of error.\r
-int XMLTreeBuilder::NewText(string text)\r
- {\r
- if (!disable_tc) {\r
- if (text.empty())\r
- TextBuilder->InsertText((uchar *)"\001");\r
- else\r
- TextBuilder->InsertText((uchar *) text.c_str());\r
- };\r
-\r
- int n_eta_size = sizeof(uint)*(1+(npar-1)/(8*sizeof(uint)));\r
- //see basics.h, recalloc resizes and sets the new area to 0.\r
-\r
- empty_texts_aux = (uint *)urecalloc(empty_texts_aux,eta_size,n_eta_size);\r
- eta_size = n_eta_size;\r
- bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector\r
-\r
- return 1; // success\r
- }\r
-\r
-\r