2 #include "XMLTreeBuilder.h"
\r
5 // OpenDocument(empty_texts): it starts the construction of the data structure for
\r
6 // the XML document. Parameter empty_texts indicates whether we index empty texts
\r
7 // in document or not. Returns a non-zero value upon success, NULLT in case of error.
\r
8 int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dtc)
\r
10 found_attributes = false;
\r
16 indexing_empty_texts = empty_texts;
\r
18 par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);
\r
20 tags_aux = (TagType *) umalloc(sizeof(TagType));
\r
22 TagName = (unsigned char **) umalloc(4*sizeof(unsigned char*));
\r
23 TagName[0] = (unsigned char *) umalloc(4*sizeof(unsigned char));
\r
24 strcpy((char *) TagName[0], "<@>");
\r
25 TagName[1] = (unsigned char *) umalloc(4*sizeof(unsigned char));
\r
26 strcpy((char *) TagName[1], "<$>");
\r
27 TagName[2] = (unsigned char *) umalloc(5*sizeof(unsigned char));
\r
28 strcpy((char *) TagName[2], "/<@>");
\r
29 TagName[3] = (unsigned char *) umalloc(5*sizeof(unsigned char));
\r
30 strcpy((char *) TagName[3], "/<$>");
\r
32 if (!indexing_empty_texts)
\r
33 empty_texts_aux = (unsigned int *)umalloc(sizeof(unsigned int));
\r
38 TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text);
\r
41 return 1; // indicates success in the initialization of the data structure
\r
44 // CloseDocument(): it finishes the construction of the data structure for the XML
\r
45 // document. Tree and tags are represented in the final form, dynamic data
\r
46 // structures are made static, and the flag "finished" is set to true. After that,
\r
47 // the data structure can be queried.
\r
48 XMLTree *XMLTreeBuilder::CloseDocument()
\r
50 // closing parenthesis for the tree root
\r
51 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));
\r
52 setbit(par_aux, npar, CP);
\r
55 // makes the text collection static
\r
58 assert(TextBuilder != 0);
\r
59 Text = TextBuilder->InitTextCollection();
\r
64 XMLTree *T = new XMLTree(par_aux, npar, TagName, ntagnames, empty_texts_aux, tags_aux,
\r
65 Text, CachedText, indexing_empty_texts, disable_tc);
\r
70 // NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.
\r
71 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
72 // in case of failing when trying to insert the new tag.
\r
73 int XMLTreeBuilder::NewOpenTag(unsigned char *tagname)
\r
77 // inserts a new opening parentheses in the bit sequence
\r
78 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
79 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
83 setbit(par_aux,npar,OP); // marks a new opening parenthesis
\r
85 // transforms the tagname into a tag identifier. If the tag is new, we insert
\r
87 for (i=0; i<ntagnames; i++)
\r
88 if (strcmp((const char *)tagname,(const char *)TagName[i])==0) break;
\r
91 // NewOpenTag("<@>") was called
\r
93 found_attributes=true;
\r
95 if (i==ntagnames) { // the tag is a new one, then we insert it
\r
96 TagName = (unsigned char **)urealloc(TagName, sizeof(char *)*(ntagnames+1));
\r
99 fprintf(stderr, "Error: not enough memory\n");
\r
104 TagName[i] = (unsigned char *)umalloc(sizeof(unsigned char)*(strlen((const char *)tagname)+1));
\r
105 strcpy((char *)TagName[i], (const char *)tagname);
\r
107 tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
109 tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags
\r
113 return 1; // success
\r
117 // NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.
\r
118 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
119 // in case of failing when trying to insert the new tag.
\r
120 int XMLTreeBuilder::NewClosingTag(unsigned char *tagname)
\r
124 // inserts a new closing parentheses in the bit sequence
\r
125 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
126 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
130 setbit(par_aux,npar,CP); // marks a new closing parenthesis
\r
132 // transforms the tagname into a tag identifier. If the tag is new, we insert
\r
133 // it in the table.
\r
134 for (i=0; i<ntagnames; i++)
\r
135 if ((strcmp((const char *)tagname,(const char *)(TagName[i]+1))==0) && (TagName[i][0]=='/')) break;
\r
137 if (i==ntagnames) { // the tag is a new one, then we insert it
\r
138 TagName = (unsigned char **)urealloc(TagName, sizeof(char *)*(ntagnames+1));
\r
141 TagName[i] = (unsigned char *)umalloc(sizeof(char)*(strlen((const char *)tagname)+2));
\r
142 TagName[i][0] = '/';
\r
143 strcpy((char *)&(TagName[i][1]), (const char *)tagname);
\r
146 tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
148 tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags
\r
152 return 1; // success
\r
156 // NewText(s): indicates the event of finding a new (non-empty) text s in the document.
\r
157 // The new text is inserted within the text collection. Returns a non-zero value upon
\r
158 // success, NULLT in case of error.
\r
159 int XMLTreeBuilder::NewText(unsigned char *s)
\r
162 XMLTreeBuilder::NewEmptyText();
\r
166 if (!indexing_empty_texts) {
\r
167 empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));
\r
168 bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector
\r
171 TextBuilder->InsertText(s);
\r
172 string cpps = (char*) s;
\r
173 CachedText.push_back(cpps);
\r
175 return 1; // success
\r
178 // NewEmptyText(): indicates the event of finding a new empty text in the document.
\r
179 // In case of indexing empty and non-empty texts, we insert the empty texts into the
\r
180 // text collection. In case of indexing only non-empty texts, it just indicates an
\r
181 // empty text in the bit vector of empty texts. Returns a non-zero value upon
\r
182 // success, NULLT in case of error.
\r
183 int XMLTreeBuilder::NewEmptyText()
\r
185 unsigned char c = 0;
\r
187 if (!indexing_empty_texts) {
\r
188 empty_texts_aux = (unsigned int *)urealloc(empty_texts_aux, sizeof(pb)*(1+(npar-1)/(8*sizeof(pb))));
\r
190 bitclean(empty_texts_aux, npar-1); // marks the empty text with a 0 in the bit vector
\r
192 else TextBuilder->InsertText(&c); // we insert the empty text just in case we index all the texts
\r
194 return 1; // success
\r