X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTreeBuilder.cpp;fp=XMLTreeBuilder.cpp;h=27bc862691948186b0b766f090c1e578219a179e;hb=aa6692a9fd2badf8e8e686b92075f041dc03bbef;hp=0000000000000000000000000000000000000000;hpb=5db16dd3e0bf609bc0fa84ee7d067f6bbc58013e;p=SXSI%2FXMLTree.git diff --git a/XMLTreeBuilder.cpp b/XMLTreeBuilder.cpp new file mode 100644 index 0000000..27bc862 --- /dev/null +++ b/XMLTreeBuilder.cpp @@ -0,0 +1,196 @@ + +#include "XMLTreeBuilder.h" +#include "basics.h" + +// OpenDocument(empty_texts): it starts the construction of the data structure for +// the XML document. Parameter empty_texts indicates whether we index empty texts +// in document or not. Returns a non-zero value upon success, NULLT in case of error. +int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dtc) + { + found_attributes = false; + npar = 0; + parArraySize = 1; + ntagnames = 4; + disable_tc = dtc; + + indexing_empty_texts = empty_texts; + + par_aux = (pb *)umalloc(sizeof(pb)*parArraySize); + + tags_aux = (TagType *) umalloc(sizeof(TagType)); + + TagName = (unsigned char **) umalloc(4*sizeof(unsigned char*)); + TagName[0] = (unsigned char *) umalloc(4*sizeof(unsigned char)); + strcpy((char *) TagName[0], "<@>"); + TagName[1] = (unsigned char *) umalloc(4*sizeof(unsigned char)); + strcpy((char *) TagName[1], "<$>"); + TagName[2] = (unsigned char *) umalloc(5*sizeof(unsigned char)); + strcpy((char *) TagName[2], "/<@>"); + TagName[3] = (unsigned char *) umalloc(5*sizeof(unsigned char)); + strcpy((char *) TagName[3], "/<$>"); + + if (!indexing_empty_texts) + empty_texts_aux = (unsigned int *)umalloc(sizeof(unsigned int)); + + if (disable_tc) + TextBuilder = 0; + else + TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text); + Text = 0; + + return 1; // indicates success in the initialization of the data structure + } + +// CloseDocument(): it finishes the construction of the data structure for the XML +// document. Tree and tags are represented in the final form, dynamic data +// structures are made static, and the flag "finished" is set to true. After that, +// the data structure can be queried. +XMLTree *XMLTreeBuilder::CloseDocument() + { + // closing parenthesis for the tree root + par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb)))); + setbit(par_aux, npar, CP); + npar++; + + // makes the text collection static + if (!disable_tc) { + assert(Text == 0); + assert(TextBuilder != 0); + Text = TextBuilder->InitTextCollection(); + delete TextBuilder; + TextBuilder = 0; + } + + XMLTree *T = new XMLTree(par_aux, npar, TagName, ntagnames, empty_texts_aux, tags_aux, + Text, CachedText, indexing_empty_texts, disable_tc); + return T; + } + + +// NewOpenTag(tagname): indicates the event of finding a new opening tag in the document. +// Tag name is given. Returns a non-zero value upon success, and returns NULLT +// in case of failing when trying to insert the new tag. +int XMLTreeBuilder::NewOpenTag(unsigned char *tagname) + { + int i; + + // inserts a new opening parentheses in the bit sequence + if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis + par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize); + parArraySize *= 2; + } + + setbit(par_aux,npar,OP); // marks a new opening parenthesis + + // transforms the tagname into a tag identifier. If the tag is new, we insert + // it in the table. + for (i=0; i") was called + if (i==0) + found_attributes=true; + + if (i==ntagnames) { // the tag is a new one, then we insert it + TagName = (unsigned char **)urealloc(TagName, sizeof(char *)*(ntagnames+1)); + + if (!TagName) { + fprintf(stderr, "Error: not enough memory\n"); + return NULLT; + } + + ntagnames++; + TagName[i] = (unsigned char *)umalloc(sizeof(unsigned char)*(strlen((const char *)tagname)+1)); + strcpy((char *)TagName[i], (const char *)tagname); + } + tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1)); + + tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags + + npar++; + + return 1; // success + } + + +// NewClosingTag(tagname): indicates the event of finding a new closing tag in the document. +// Tag name is given. Returns a non-zero value upon success, and returns NULLT +// in case of failing when trying to insert the new tag. +int XMLTreeBuilder::NewClosingTag(unsigned char *tagname) + { + int i; + + // inserts a new closing parentheses in the bit sequence + if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis + par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize); + parArraySize *= 2; + } + + setbit(par_aux,npar,CP); // marks a new closing parenthesis + + // transforms the tagname into a tag identifier. If the tag is new, we insert + // it in the table. + for (i=0; iInsertText(s); + string cpps = (char*) s; + CachedText.push_back(cpps); + + return 1; // success + } + +// NewEmptyText(): indicates the event of finding a new empty text in the document. +// In case of indexing empty and non-empty texts, we insert the empty texts into the +// text collection. In case of indexing only non-empty texts, it just indicates an +// empty text in the bit vector of empty texts. Returns a non-zero value upon +// success, NULLT in case of error. +int XMLTreeBuilder::NewEmptyText() + { + unsigned char c = 0; + + if (!indexing_empty_texts) { + empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb)))); + + bitclean(empty_texts_aux, npar-1); // marks the empty text with a 0 in the bit vector + } + else TextBuilder->InsertText(&c); // we insert the empty text just in case we index all the texts + + return 1; // success + } +