2 #include "XMLTreeBuilder.h"
\r
6 XMLTreeBuilder::~XMLTreeBuilder(){
\r
10 // OpenDocument(empty_texts): it starts the construction of the data structure for
\r
11 // the XML document. Parameter empty_texts indicates whether we index empty texts
\r
12 // in document or not. Returns a non-zero value upon success, NULLT in case of error.
\r
13 int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dtc)
\r
21 par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);
\r
23 tags_aux = (TagType *) umalloc(sizeof(TagType));
\r
25 TagName = new vector<string>();
\r
26 tIdMap = new std::unordered_map<string,int>();
\r
28 REGISTER_TAG(TagName,tIdMap,DOCUMENT_OPEN_TAG);
\r
29 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_OPEN_TAG);
\r
30 REGISTER_TAG(TagName,tIdMap,PCDATA_OPEN_TAG);
\r
31 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_OPEN_TAG);
\r
32 REGISTER_TAG(TagName,tIdMap,CLOSING_TAG);
\r
33 REGISTER_TAG(TagName,tIdMap,DOCUMENT_CLOSE_TAG);
\r
34 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_CLOSE_TAG);
\r
35 REGISTER_TAG(TagName,tIdMap,PCDATA_CLOSE_TAG);
\r
36 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_CLOSE_TAG);
\r
42 TextBuilder = TextCollectionBuilder::create((unsigned)sample_rate_text, TextCollectionBuilder::index_type_default);
\r
44 empty_texts_aux = (unsigned int *)ucalloc(sizeof(unsigned int),1);
\r
45 eta_size = sizeof(unsigned int);
\r
46 return 1; // indicates success in the initialization of the data structure
\r
49 // CloseDocument(): it finishes the construction of the data structure for the XML
\r
50 // document. Tree and tags are represented in the final form, dynamic data
\r
51 // structures are made static, and the flag "finished" is set to true. After that,
\r
52 // the data structure can be queried.
\r
53 XMLTree *XMLTreeBuilder::CloseDocument()
\r
55 //closing parenthesis for the tree root
\r
56 //par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));
\r
57 //setbit(par_aux, npar, CP);
\r
60 // makes the text collection static
\r
62 PRINTTIME("Parsing XML Document", Parsing);
\r
66 assert(TextBuilder != 0);
\r
68 Text = TextBuilder->InitTextCollection();
\r
71 STOPTIMER(Building);
\r
72 PRINTTIME("Building TextCollection", Building);
\r
76 XMLTree *T = new XMLTree(par_aux,
\r
80 empty_texts_aux, // freed by the constructor
\r
81 tags_aux, //freed by the constructor
\r
88 // NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.
\r
89 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
90 // in case of failing when trying to insert the new tag.
\r
91 int XMLTreeBuilder::NewOpenTag(string tagname)
\r
95 // inserts a new opening parentheses in the bit sequence
\r
96 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
97 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
101 setbit(par_aux,npar,OP); // marks a new opening parenthesis
\r
103 TagIdMapIT tag_id = tIdMap->find(tagname);
\r
105 if (tag_id == tIdMap->end()){
\r
106 REGISTER_TAG(TagName,tIdMap,tagname);
\r
107 i = TagName->size() - 1;
\r
110 i = tag_id->second;
\r
112 if (tagname.compare(PCDATA_OPEN_TAG) == 0 ||
\r
113 tagname.compare(ATTRIBUTE_DATA_OPEN_TAG) == 0){
\r
116 tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
118 tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags
\r
122 return 1; // success
\r
126 // NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.
\r
127 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
128 // in case of failing when trying to insert the new tag.
\r
129 int XMLTreeBuilder::NewClosingTag(string tagname)
\r
133 // inserts a new closing parentheses in the bit sequence
\r
134 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
135 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
139 setbit(par_aux,npar,CP); // marks a new closing parenthesis
\r
141 //tagname.insert(0,"/");
\r
143 //TagIdMapIT tag_id = tIdMap->find(tagname);
\r
145 // if (tag_id == tIdMap->end()){
\r
146 // REGISTER_TAG(TagName,tIdMap,tagname);
\r
147 // i = TagName->size() - 1;
\r
150 // i = tag_id->second;
\r
152 tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
154 tags_aux[npar] = CLOSING_TAG_ID; // inserts the new tag id within the preorder sequence of tags
\r
158 return 1; // success
\r
162 // NewText(s): indicates the event of finding a new (non-empty) text s in the document.
\r
163 // The new text is inserted within the text collection. Returns a non-zero value upon
\r
164 // success, NULLT in case of error.
\r
165 int XMLTreeBuilder::NewText(string text)
\r
169 TextBuilder->InsertText((uchar *)"\001");
\r
171 TextBuilder->InsertText((uchar *) text.c_str());
\r
174 int n_eta_size = sizeof(uint)*(1+(npar-1)/(8*sizeof(uint)));
\r
175 //see basics.h, recalloc resizes and sets the new area to 0.
\r
177 empty_texts_aux = (uint *)urecalloc(empty_texts_aux,eta_size,n_eta_size);
\r
178 eta_size = n_eta_size;
\r
179 bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector
\r
181 return 1; // success
\r