2 #include "XMLTreeBuilder.h"
\r
5 XMLTreeBuilder::~XMLTreeBuilder(){
\r
9 // OpenDocument(empty_texts): it starts the construction of the data structure for
\r
10 // the XML document. Parameter empty_texts indicates whether we index empty texts
\r
11 // in document or not. Returns a non-zero value upon success, NULLT in case of error.
\r
12 int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dtc)
\r
19 par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);
\r
21 tags_aux = (TagType *) umalloc(sizeof(TagType));
\r
23 TagName = new vector<string>();
\r
24 tIdMap = new std::unordered_map<string,int>();
\r
26 REGISTER_TAG(TagName,tIdMap,DOCUMENT_OPEN_TAG);
\r
27 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_OPEN_TAG);
\r
28 REGISTER_TAG(TagName,tIdMap,PCDATA_OPEN_TAG);
\r
29 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_OPEN_TAG);
\r
30 REGISTER_TAG(TagName,tIdMap,DOCUMENT_CLOSE_TAG);
\r
31 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_CLOSE_TAG);
\r
32 REGISTER_TAG(TagName,tIdMap,PCDATA_CLOSE_TAG);
\r
33 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_CLOSE_TAG);
\r
39 TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text);
\r
41 empty_texts_aux = (unsigned int *)ucalloc(sizeof(unsigned int),1);
\r
42 eta_size = sizeof(unsigned int);
\r
43 return 1; // indicates success in the initialization of the data structure
\r
46 // CloseDocument(): it finishes the construction of the data structure for the XML
\r
47 // document. Tree and tags are represented in the final form, dynamic data
\r
48 // structures are made static, and the flag "finished" is set to true. After that,
\r
49 // the data structure can be queried.
\r
50 XMLTree *XMLTreeBuilder::CloseDocument()
\r
52 //closing parenthesis for the tree root
\r
53 //par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));
\r
54 //setbit(par_aux, npar, CP);
\r
57 // makes the text collection static
\r
60 assert(TextBuilder != 0);
\r
61 Text = TextBuilder->InitTextCollection();
\r
66 XMLTree *T = new XMLTree(par_aux,
\r
70 empty_texts_aux, // freed by the constructor
\r
71 tags_aux, //freed by the constructor
\r
78 // NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.
\r
79 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
80 // in case of failing when trying to insert the new tag.
\r
81 int XMLTreeBuilder::NewOpenTag(string tagname)
\r
85 // inserts a new opening parentheses in the bit sequence
\r
86 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
87 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
91 setbit(par_aux,npar,OP); // marks a new opening parenthesis
\r
93 TagIdMapIT tag_id = tIdMap->find(tagname);
\r
95 if (tag_id == tIdMap->end()){
\r
96 REGISTER_TAG(TagName,tIdMap,tagname);
\r
97 i = TagName->size() - 1;
\r
100 i = tag_id->second;
\r
102 if (tagname.compare(PCDATA_OPEN_TAG) == 0 ||
\r
103 tagname.compare(ATTRIBUTE_DATA_OPEN_TAG) == 0){
\r
106 tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
108 tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags
\r
112 return 1; // success
\r
116 // NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.
\r
117 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
118 // in case of failing when trying to insert the new tag.
\r
119 int XMLTreeBuilder::NewClosingTag(string tagname)
\r
123 // inserts a new closing parentheses in the bit sequence
\r
124 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
125 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
129 setbit(par_aux,npar,CP); // marks a new closing parenthesis
\r
131 tagname.insert(0,"/");
\r
133 TagIdMapIT tag_id = tIdMap->find(tagname);
\r
135 if (tag_id == tIdMap->end()){
\r
136 REGISTER_TAG(TagName,tIdMap,tagname);
\r
137 i = TagName->size() - 1;
\r
140 i = tag_id->second;
\r
142 tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
144 tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags
\r
148 return 1; // success
\r
152 // NewText(s): indicates the event of finding a new (non-empty) text s in the document.
\r
153 // The new text is inserted within the text collection. Returns a non-zero value upon
\r
154 // success, NULLT in case of error.
\r
155 int XMLTreeBuilder::NewText(string text)
\r
159 TextBuilder->InsertText((uchar *)"\001");
\r
161 TextBuilder->InsertText((uchar *) text.c_str());
\r
164 int n_eta_size = sizeof(uint)*(1+(npar-1)/(8*sizeof(uint)));
\r
165 //see basics.h, recalloc resizes and sets the new area to 0.
\r
167 empty_texts_aux = (uint *)urecalloc(empty_texts_aux,eta_size,n_eta_size);
\r
168 eta_size = n_eta_size;
\r
169 bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector
\r
171 return 1; // success
\r