X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTreeBuilder.cpp;h=d9b083248a7082d3f94e706e66e7677c2fef4f22;hb=d79d6498e2d585560d915592ef59f3ad6a57b3c7;hp=9c7e3f6c941a008be31182d233d76d46d96a00ee;hpb=8b92ac7e539c796ee3160078b5ca30537f26ea51;p=SXSI%2FXMLTree.git diff --git a/XMLTreeBuilder.cpp b/XMLTreeBuilder.cpp index 9c7e3f6..d9b0832 100644 --- a/XMLTreeBuilder.cpp +++ b/XMLTreeBuilder.cpp @@ -1,16 +1,19 @@ -#include "basics.h" +#include "common.h" #include "XMLTreeBuilder.h" #include "timings.h" using std::string; XMLTreeBuilder::~XMLTreeBuilder(){ - + //free(par_aux); + free(tags_aux); + //delete other stuff. + } // OpenDocument(empty_texts): it starts the construction of the data structure for // the XML document. Parameter empty_texts indicates whether we index empty texts // in document or not. Returns a non-zero value upon success, NULLT in case of error. -int XMLTreeBuilder::OpenDocument(bool empty_texts, +int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dtc, TextCollectionBuilder::index_type_t index_type) @@ -20,11 +23,11 @@ int XMLTreeBuilder::OpenDocument(bool empty_texts, disable_tc = dtc; text_index_type = index_type; STARTTIMER(); - + par_aux = (pb *)umalloc(sizeof(pb)*parArraySize); - + tags_aux = (TagType *) umalloc(sizeof(TagType)); - + TagName = new vector(); tIdMap = new std::unordered_map(); @@ -41,7 +44,7 @@ int XMLTreeBuilder::OpenDocument(bool empty_texts, if (disable_tc) TextBuilder = 0; - else + else TextBuilder = TextCollectionBuilder::create((unsigned)sample_rate_text, index_type); Text = 0; @@ -51,42 +54,32 @@ int XMLTreeBuilder::OpenDocument(bool empty_texts, } // CloseDocument(): it finishes the construction of the data structure for the XML -// document. Tree and tags are represented in the final form, dynamic data -// structures are made static, and the flag "finished" is set to true. After that, +// document. Tree and tags are represented in the final form, dynamic data +// structures are made static, and the flag "finished" is set to true. After that, // the data structure can be queried. XMLTree *XMLTreeBuilder::CloseDocument() - { + { //closing parenthesis for the tree root //par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb)))); //setbit(par_aux, npar, CP); //npar++; - + // makes the text collection static STOPTIMER(Parsing); PRINTTIME("Parsing XML Document", Parsing); - if (!disable_tc) { - assert(Text == 0); - assert(TextBuilder != 0); - STARTTIMER(); - Text = TextBuilder->InitTextCollection(); - delete TextBuilder; - TextBuilder = 0; - STOPTIMER(Building); - PRINTTIME("Building TextCollection", Building); - - } - XMLTree *T = new XMLTree(par_aux, - npar, + npar, TagName, tIdMap, - empty_texts_aux, // freed by the constructor - tags_aux, //freed by the constructor - Text, + empty_texts_aux, // freed by the constructor + tags_aux, // freed by the constructor + TextBuilder, // freed by the constructor disable_tc, text_index_type); - return T; + tags_aux = 0; + empty_texts_aux = 0; + return T; } @@ -96,17 +89,21 @@ XMLTree *XMLTreeBuilder::CloseDocument() int XMLTreeBuilder::NewOpenTag(string tagname) { int i; - // inserts a new opening parentheses in the bit sequence if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis - par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize); - parArraySize *= 2; - } - - setbit(par_aux,npar,OP); // marks a new opening parenthesis - + + // If array is already 1GB, be gentler when resizing: + if (sizeof(pb)*parArraySize >= 1024*1024*1024) + parArraySize += (128*1024*1024); + else + parArraySize *= 2; + par_aux = (pb *) urealloc(par_aux, sizeof(pb)*parArraySize); + }; + + bp_setbit(par_aux,npar,OP); // marks a new opening parenthesis + TagIdMapIT tag_id = tIdMap->find(tagname); - + if (tag_id == tIdMap->end()){ REGISTER_TAG(TagName,tIdMap,tagname); i = TagName->size() - 1; @@ -117,14 +114,15 @@ int XMLTreeBuilder::NewOpenTag(string tagname) if (tagname.compare(PCDATA_OPEN_TAG) == 0 || tagname.compare(ATTRIBUTE_DATA_OPEN_TAG) == 0){ }; - + + tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1)); - + tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags - + npar++; - - return 1; // success + + return 1; // success } @@ -134,18 +132,21 @@ int XMLTreeBuilder::NewOpenTag(string tagname) int XMLTreeBuilder::NewClosingTag(string tagname) { int i; - // inserts a new closing parentheses in the bit sequence if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis - par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize); - parArraySize *= 2; - } - - setbit(par_aux,npar,CP); // marks a new closing parenthesis - + // If array is already 1GB, be gentler when resizing: + if (sizeof(pb)*parArraySize >= 1024*1024*1024) + parArraySize += (128*1024*1024); + else + parArraySize *= 2; + par_aux = (pb *)urealloc(par_aux, sizeof(pb)*parArraySize); + }; + + bp_setbit(par_aux,npar,CP); // marks a new closing parenthesis + //tagname.insert(0,"/"); - //TagIdMapIT tag_id = tIdMap->find(tagname); + //TagIdMapIT tag_id = tIdMap->find(tagname); // if (tag_id == tIdMap->end()){ // REGISTER_TAG(TagName,tIdMap,tagname); @@ -154,10 +155,10 @@ int XMLTreeBuilder::NewClosingTag(string tagname) // else // i = tag_id->second; - tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1)); + tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1)); tags_aux[npar] = CLOSING_TAG_ID; // inserts the new tag id within the preorder sequence of tags - + npar++; return 1; // success @@ -178,7 +179,7 @@ int XMLTreeBuilder::NewText(string text) int n_eta_size = sizeof(uint)*(1+(npar-1)/(8*sizeof(uint))); //see basics.h, recalloc resizes and sets the new area to 0. - + empty_texts_aux = (uint *)urecalloc(empty_texts_aux,eta_size,n_eta_size); eta_size = n_eta_size; bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector