X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTreeBuilder.cpp;fp=XMLTreeBuilder.cpp;h=138db9bd7ca125c8b2065ac9cc384b354b359153;hb=235e3214904e390d2f101c5d5bf7def98745b132;hp=27bc862691948186b0b766f090c1e578219a179e;hpb=aa6692a9fd2badf8e8e686b92075f041dc03bbef;p=SXSI%2FXMLTree.git diff --git a/XMLTreeBuilder.cpp b/XMLTreeBuilder.cpp index 27bc862..138db9b 100644 --- a/XMLTreeBuilder.cpp +++ b/XMLTreeBuilder.cpp @@ -1,43 +1,46 @@ - -#include "XMLTreeBuilder.h" #include "basics.h" +#include "XMLTreeBuilder.h" + + +XMLTreeBuilder::~XMLTreeBuilder(){ + +} // OpenDocument(empty_texts): it starts the construction of the data structure for // the XML document. Parameter empty_texts indicates whether we index empty texts // in document or not. Returns a non-zero value upon success, NULLT in case of error. int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dtc) { - found_attributes = false; npar = 0; parArraySize = 1; - ntagnames = 4; disable_tc = dtc; - indexing_empty_texts = empty_texts; - + par_aux = (pb *)umalloc(sizeof(pb)*parArraySize); tags_aux = (TagType *) umalloc(sizeof(TagType)); - TagName = (unsigned char **) umalloc(4*sizeof(unsigned char*)); - TagName[0] = (unsigned char *) umalloc(4*sizeof(unsigned char)); - strcpy((char *) TagName[0], "<@>"); - TagName[1] = (unsigned char *) umalloc(4*sizeof(unsigned char)); - strcpy((char *) TagName[1], "<$>"); - TagName[2] = (unsigned char *) umalloc(5*sizeof(unsigned char)); - strcpy((char *) TagName[2], "/<@>"); - TagName[3] = (unsigned char *) umalloc(5*sizeof(unsigned char)); - strcpy((char *) TagName[3], "/<$>"); - - if (!indexing_empty_texts) - empty_texts_aux = (unsigned int *)umalloc(sizeof(unsigned int)); - + TagName = new vector(); + tIdMap = new std::unordered_map(); + + REGISTER_TAG(TagName,tIdMap,DOCUMENT_OPEN_TAG); + REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_OPEN_TAG); + REGISTER_TAG(TagName,tIdMap,PCDATA_OPEN_TAG); + REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_OPEN_TAG); + REGISTER_TAG(TagName,tIdMap,DOCUMENT_CLOSE_TAG); + REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_CLOSE_TAG); + REGISTER_TAG(TagName,tIdMap,PCDATA_CLOSE_TAG); + REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_CLOSE_TAG); + + + CachedText = new vector; if (disable_tc) TextBuilder = 0; else - TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text); + TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text); Text = 0; - + empty_texts_aux = (unsigned int *)ucalloc(sizeof(unsigned int),1); + eta_size = sizeof(unsigned int); return 1; // indicates success in the initialization of the data structure } @@ -47,10 +50,10 @@ int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dt // the data structure can be queried. XMLTree *XMLTreeBuilder::CloseDocument() { - // closing parenthesis for the tree root - par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb)))); - setbit(par_aux, npar, CP); - npar++; + //closing parenthesis for the tree root + //par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb)))); + //setbit(par_aux, npar, CP); + //npar++; // makes the text collection static if (!disable_tc) { @@ -60,9 +63,16 @@ XMLTree *XMLTreeBuilder::CloseDocument() delete TextBuilder; TextBuilder = 0; } - - XMLTree *T = new XMLTree(par_aux, npar, TagName, ntagnames, empty_texts_aux, tags_aux, - Text, CachedText, indexing_empty_texts, disable_tc); + + XMLTree *T = new XMLTree(par_aux, + npar, + TagName, + tIdMap, + empty_texts_aux, // freed by the constructor + tags_aux, //freed by the constructor + Text, + CachedText, + disable_tc); return T; } @@ -70,7 +80,7 @@ XMLTree *XMLTreeBuilder::CloseDocument() // NewOpenTag(tagname): indicates the event of finding a new opening tag in the document. // Tag name is given. Returns a non-zero value upon success, and returns NULLT // in case of failing when trying to insert the new tag. -int XMLTreeBuilder::NewOpenTag(unsigned char *tagname) +int XMLTreeBuilder::NewOpenTag(string tagname) { int i; @@ -81,31 +91,22 @@ int XMLTreeBuilder::NewOpenTag(unsigned char *tagname) } setbit(par_aux,npar,OP); // marks a new opening parenthesis + + TagIdMapIT tag_id = tIdMap->find(tagname); + + if (tag_id == tIdMap->end()){ + REGISTER_TAG(TagName,tIdMap,tagname); + i = TagName->size() - 1; + } + else + i = tag_id->second; - // transforms the tagname into a tag identifier. If the tag is new, we insert - // it in the table. - for (i=0; i") was called - if (i==0) - found_attributes=true; - - if (i==ntagnames) { // the tag is a new one, then we insert it - TagName = (unsigned char **)urealloc(TagName, sizeof(char *)*(ntagnames+1)); - - if (!TagName) { - fprintf(stderr, "Error: not enough memory\n"); - return NULLT; - } - - ntagnames++; - TagName[i] = (unsigned char *)umalloc(sizeof(unsigned char)*(strlen((const char *)tagname)+1)); - strcpy((char *)TagName[i], (const char *)tagname); - } + if (tagname.compare(PCDATA_OPEN_TAG) == 0 || + tagname.compare(ATTRIBUTE_DATA_OPEN_TAG) == 0){ + }; + tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1)); - + tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags npar++; @@ -117,7 +118,7 @@ int XMLTreeBuilder::NewOpenTag(unsigned char *tagname) // NewClosingTag(tagname): indicates the event of finding a new closing tag in the document. // Tag name is given. Returns a non-zero value upon success, and returns NULLT // in case of failing when trying to insert the new tag. -int XMLTreeBuilder::NewClosingTag(unsigned char *tagname) +int XMLTreeBuilder::NewClosingTag(string tagname) { int i; @@ -128,20 +129,17 @@ int XMLTreeBuilder::NewClosingTag(unsigned char *tagname) } setbit(par_aux,npar,CP); // marks a new closing parenthesis + + tagname.insert(0,"/"); - // transforms the tagname into a tag identifier. If the tag is new, we insert - // it in the table. - for (i=0; ifind(tagname); + + if (tag_id == tIdMap->end()){ + REGISTER_TAG(TagName,tIdMap,tagname); + i = TagName->size() - 1; + } + else + i = tag_id->second; tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1)); @@ -156,41 +154,24 @@ int XMLTreeBuilder::NewClosingTag(unsigned char *tagname) // NewText(s): indicates the event of finding a new (non-empty) text s in the document. // The new text is inserted within the text collection. Returns a non-zero value upon // success, NULLT in case of error. -int XMLTreeBuilder::NewText(unsigned char *s) +int XMLTreeBuilder::NewText(string text) { - if (disable_tc) { - XMLTreeBuilder::NewEmptyText(); - return 1; - } - - if (!indexing_empty_texts) { - empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb)))); - bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector - } - - TextBuilder->InsertText(s); - string cpps = (char*) s; - CachedText.push_back(cpps); - - return 1; // success + if (!disable_tc) { + if (text.empty()) + TextBuilder->InsertText((uchar *)"\001"); + else + TextBuilder->InsertText((uchar *) text.c_str()); + }; + + CachedText->push_back(text); + int n_eta_size = sizeof(uint)*(1+(npar-1)/(8*sizeof(uint))); + //see basics.h, recalloc resizes and sets the new area to 0. + + empty_texts_aux = (uint *)urecalloc(empty_texts_aux,eta_size,n_eta_size); + eta_size = n_eta_size; + bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector + + return 1; // success } -// NewEmptyText(): indicates the event of finding a new empty text in the document. -// In case of indexing empty and non-empty texts, we insert the empty texts into the -// text collection. In case of indexing only non-empty texts, it just indicates an -// empty text in the bit vector of empty texts. Returns a non-zero value upon -// success, NULLT in case of error. -int XMLTreeBuilder::NewEmptyText() - { - unsigned char c = 0; - - if (!indexing_empty_texts) { - empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb)))); - - bitclean(empty_texts_aux, npar-1); // marks the empty text with a 0 in the bit vector - } - else TextBuilder->InsertText(&c); // we insert the empty text just in case we index all the texts - - return 1; // success - }