2 #include "XMLTreeBuilder.h"
\r
5 XMLTreeBuilder::~XMLTreeBuilder(){
\r
9 // OpenDocument(empty_texts): it starts the construction of the data structure for
\r
10 // the XML document. Parameter empty_texts indicates whether we index empty texts
\r
11 // in document or not. Returns a non-zero value upon success, NULLT in case of error.
\r
12 int XMLTreeBuilder::OpenDocument(bool empty_texts, int sample_rate_text, bool dtc)
\r
19 par_aux = (pb *)umalloc(sizeof(pb)*parArraySize);
\r
21 tags_aux = (TagType *) umalloc(sizeof(TagType));
\r
23 TagName = new vector<string>();
\r
24 tIdMap = new std::unordered_map<string,int>();
\r
26 REGISTER_TAG(TagName,tIdMap,DOCUMENT_OPEN_TAG);
\r
27 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_OPEN_TAG);
\r
28 REGISTER_TAG(TagName,tIdMap,PCDATA_OPEN_TAG);
\r
29 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_OPEN_TAG);
\r
30 REGISTER_TAG(TagName,tIdMap,DOCUMENT_CLOSE_TAG);
\r
31 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_CLOSE_TAG);
\r
32 REGISTER_TAG(TagName,tIdMap,PCDATA_CLOSE_TAG);
\r
33 REGISTER_TAG(TagName,tIdMap,ATTRIBUTE_DATA_CLOSE_TAG);
\r
36 CachedText = new vector<string>;
\r
40 TextBuilder = new TextCollectionBuilder((unsigned)sample_rate_text);
\r
42 empty_texts_aux = (unsigned int *)ucalloc(sizeof(unsigned int),1);
\r
43 eta_size = sizeof(unsigned int);
\r
44 return 1; // indicates success in the initialization of the data structure
\r
47 // CloseDocument(): it finishes the construction of the data structure for the XML
\r
48 // document. Tree and tags are represented in the final form, dynamic data
\r
49 // structures are made static, and the flag "finished" is set to true. After that,
\r
50 // the data structure can be queried.
\r
51 XMLTree *XMLTreeBuilder::CloseDocument()
\r
53 //closing parenthesis for the tree root
\r
54 //par_aux = (pb *)urealloc(par_aux, sizeof(pb)*(1+npar/(8*sizeof(pb))));
\r
55 //setbit(par_aux, npar, CP);
\r
58 // makes the text collection static
\r
61 assert(TextBuilder != 0);
\r
62 Text = TextBuilder->InitTextCollection();
\r
67 XMLTree *T = new XMLTree(par_aux,
\r
71 empty_texts_aux, // freed by the constructor
\r
72 tags_aux, //freed by the constructor
\r
80 // NewOpenTag(tagname): indicates the event of finding a new opening tag in the document.
\r
81 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
82 // in case of failing when trying to insert the new tag.
\r
83 int XMLTreeBuilder::NewOpenTag(string tagname)
\r
87 // inserts a new opening parentheses in the bit sequence
\r
88 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
89 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
93 setbit(par_aux,npar,OP); // marks a new opening parenthesis
\r
95 TagIdMapIT tag_id = tIdMap->find(tagname);
\r
97 if (tag_id == tIdMap->end()){
\r
98 REGISTER_TAG(TagName,tIdMap,tagname);
\r
99 i = TagName->size() - 1;
\r
102 i = tag_id->second;
\r
104 if (tagname.compare(PCDATA_OPEN_TAG) == 0 ||
\r
105 tagname.compare(ATTRIBUTE_DATA_OPEN_TAG) == 0){
\r
108 tags_aux = (TagType *) urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
110 tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags
\r
114 return 1; // success
\r
118 // NewClosingTag(tagname): indicates the event of finding a new closing tag in the document.
\r
119 // Tag name is given. Returns a non-zero value upon success, and returns NULLT
\r
120 // in case of failing when trying to insert the new tag.
\r
121 int XMLTreeBuilder::NewClosingTag(string tagname)
\r
125 // inserts a new closing parentheses in the bit sequence
\r
126 if (sizeof(pb)*8*parArraySize == npar) { // no space left for the new parenthesis
\r
127 par_aux = (pb *)urealloc(par_aux, sizeof(pb)*2*parArraySize);
\r
131 setbit(par_aux,npar,CP); // marks a new closing parenthesis
\r
133 tagname.insert(0,"/");
\r
135 TagIdMapIT tag_id = tIdMap->find(tagname);
\r
137 if (tag_id == tIdMap->end()){
\r
138 REGISTER_TAG(TagName,tIdMap,tagname);
\r
139 i = TagName->size() - 1;
\r
142 i = tag_id->second;
\r
144 tags_aux = (TagType *)urealloc(tags_aux, sizeof(TagType)*(npar + 1));
\r
146 tags_aux[npar] = i; // inserts the new tag id within the preorder sequence of tags
\r
150 return 1; // success
\r
154 // NewText(s): indicates the event of finding a new (non-empty) text s in the document.
\r
155 // The new text is inserted within the text collection. Returns a non-zero value upon
\r
156 // success, NULLT in case of error.
\r
157 int XMLTreeBuilder::NewText(string text)
\r
161 TextBuilder->InsertText((uchar *)"\001");
\r
163 TextBuilder->InsertText((uchar *) text.c_str());
\r
166 CachedText->push_back(text);
\r
167 int n_eta_size = sizeof(uint)*(1+(npar-1)/(8*sizeof(uint)));
\r
168 //see basics.h, recalloc resizes and sets the new area to 0.
\r
170 empty_texts_aux = (uint *)urecalloc(empty_texts_aux,eta_size,n_eta_size);
\r
171 eta_size = n_eta_size;
\r
172 bitset(empty_texts_aux, npar-1); // marks the non-empty text with a 1 in the bit vector
\r
174 return 1; // success
\r