2 #include "XMLTreeBuilder.h"
\r
6 XMLTreeBuilder::~XMLTreeBuilder(){
\r
9 //delete other stuff.
\r
13 // OpenDocument(empty_texts): it starts the construction of the data structure for
\r
14 // the XML document. Parameter empty_texts indicates whether we index empty texts
\r
15 // in document or not. Returns a non-zero value upon success, NULLT in case of error.
\r
16 int XMLTreeBuilder::OpenDocument(bool empty_texts,
\r
17 int sample_rate_text,
\r
19 TextCollectionBuilder::index_type_t index_type)
\r
24 text_index_type = index_type;
\r
27 par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);
\r
29 tags_aux = (TagType *) umalloc(sizeof(TagType));
\r
31 TagName = new vector<string>();
\r
32 tIdMap = new std::unordered_map<string,int>();
\r
34 REGISTER_TAG(TagName,tIdMap,DOCUMENT_OPEN_TAG);
\r
35 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_OPEN_TAG);
\r
36 REGISTER_TAG(TagName,tIdMap,PCDATA_OPEN_TAG);
\r
37 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_OPEN_TAG);
\r
38 REGISTER_TAG(TagName,tIdMap,CLOSING_TAG);
\r
39 REGISTER_TAG(TagName,tIdMap,DOCUMENT_CLOSE_TAG);
\r
40 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_CLOSE_TAG);
\r
41 REGISTER_TAG(TagName,tIdMap,PCDATA_CLOSE_TAG);
\r
42 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_CLOSE_TAG);
\r
48 TextBuilder = TextCollectionBuilder::create((unsigned)sample_rate_text, index_type);
\r
51 empty_texts_aux = (unsigned int *)ucalloc(sizeof(unsigned int),1);
\r
52 eta_size = sizeof(unsigned int);
\r
53 return 1; // indicates success in the initialization of the data structure
\r
56 // CloseDocument(): it finishes the construction of the data structure for the XML
\r
57 // document. Tree and tags are represented in the final form, dynamic data
\r
58 // structures are made static, and the flag "finished" is set to true. After that,
\r
59 // the data structure can be queried.
\r
60 XMLTree *XMLTreeBuilder::CloseDocument()
\r
62 //closing parenthesis for the tree root
\r
63 //par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));
\r
64 //setbit(par_aux, npar, CP);
\r
67 // makes the text collection static
\r
69 PRINTTIME("Parsing XML Document", Parsing);
\r
73 assert(TextBuilder != 0);
\r
75 Text = TextBuilder->InitTextCollection();
\r
78 STOPTIMER(Building);
\r
79 PRINTTIME("Building TextCollection", Building);
\r
83 XMLTree *T = new XMLTree(par_aux,
\r
87 empty_texts_aux, // freed by the constructor
\r
88 tags_aux, // freed by the constructor
\r
93 empty_texts_aux = 0;
\r
98 // NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.
\r
99 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
100 // in case of failing when trying to insert the new tag.
\r
101 int XMLTreeBuilder::NewOpenTag(string tagname)
\r
105 // inserts a new opening parentheses in the bit sequence
\r
106 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
107 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
111 setbit(par_aux,npar,OP); // marks a new opening parenthesis
\r
113 TagIdMapIT tag_id = tIdMap->find(tagname);
\r
115 if (tag_id == tIdMap->end()){
\r
116 REGISTER_TAG(TagName,tIdMap,tagname);
\r
117 i = TagName->size() - 1;
\r
120 i = tag_id->second;
\r
122 if (tagname.compare(PCDATA_OPEN_TAG) == 0 ||
\r
123 tagname.compare(ATTRIBUTE_DATA_OPEN_TAG) == 0){
\r
126 tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
128 tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags
\r
132 return 1; // success
\r
136 // NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.
\r
137 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
138 // in case of failing when trying to insert the new tag.
\r
139 int XMLTreeBuilder::NewClosingTag(string tagname)
\r
143 // inserts a new closing parentheses in the bit sequence
\r
144 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
145 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
149 setbit(par_aux,npar,CP); // marks a new closing parenthesis
\r
151 //tagname.insert(0,"/");
\r
153 //TagIdMapIT tag_id = tIdMap->find(tagname);
\r
155 // if (tag_id == tIdMap->end()){
\r
156 // REGISTER_TAG(TagName,tIdMap,tagname);
\r
157 // i = TagName->size() - 1;
\r
160 // i = tag_id->second;
\r
162 tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
164 tags_aux[npar] = CLOSING_TAG_ID; // inserts the new tag id within the preorder sequence of tags
\r
168 return 1; // success
\r
172 // NewText(s): indicates the event of finding a new (non-empty) text s in the document.
\r
173 // The new text is inserted within the text collection. Returns a non-zero value upon
\r
174 // success, NULLT in case of error.
\r
175 int XMLTreeBuilder::NewText(string text)
\r
179 TextBuilder->InsertText((uchar *)"\001");
\r
181 TextBuilder->InsertText((uchar *) text.c_str());
\r
184 int n_eta_size = sizeof(uint)*(1+(npar-1)/(8*sizeof(uint)));
\r
185 //see basics.h, recalloc resizes and sets the new area to 0.
\r
187 empty_texts_aux = (uint *)urecalloc(empty_texts_aux,eta_size,n_eta_size);
\r
188 eta_size = n_eta_size;
\r
189 bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector
\r
191 return 1; // success
\r