2 /******************************************************************************
\r
3 * Copyright (C) 2009 by Diego Arroyuelo *
\r
4 * Builder class for the in-memory XQuery/XPath engine *
\r
6 * This program is free software; you can redistribute it and/or modify *
\r
7 * it under the terms of the GNU Lesser General Public License as published *
\r
8 * by the Free Software Foundation; either version 2 of the License, or *
\r
9 * (at your option) any later version. *
\r
11 * This program is distributed in the hope that it will be useful, *
\r
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
\r
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
\r
14 * GNU Lesser General Public License for more details. *
\r
16 * You should have received a copy of the GNU Lesser General Public License *
\r
17 * along with this program; if not, write to the *
\r
18 * Free Software Foundation, Inc., *
\r
19 * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
\r
20 ******************************************************************************/
\r
22 #ifndef XMLTREEBUILDER_H_
\r
23 #define XMLTREEBUILDER_H_
\r
25 #include "TextCollection/TextCollectionBuilder.h"
\r
31 #include "XMLTree.h"
\r
33 using SXSI::TextCollection;
\r
34 using SXSI::TextCollectionBuilder;
\r
39 #define bitset(e,p) ((e)[(p)/W] |= (1<<((p)%W)))
\r
40 // cleans bit p in e
\r
41 #define bitclean(e,p) ((e)[(p)/W] &= ~(1<<((p)%W)))
\r
45 class XMLTreeBuilder {
\r
47 /** Array containing the balanced parentheses sequence */
\r
52 /** Mapping from tag identifer to tag name */
\r
53 std::vector<std::string> *TagName;
\r
55 /** Array containing the sequence of tags */
\r
58 /** The texts in the XML document */
\r
59 TextCollectionBuilder *TextBuilder;
\r
60 TextCollection *Text;
\r
62 /** The texts in the XML document (cached for faster display) */
\r
64 std::vector<std::string> *CachedText;
\r
66 unsigned int *empty_texts_aux;
\r
68 // Allows to disable the TextCollection for benchmarkin purposes
\r
70 TextCollectionBuilder::index_type_t text_index_type;
\r
73 XMLTreeBuilder() {;};
\r
77 /** OpenDocument(sample_rate_text,dtc): initilizes the construction
\r
78 * of the data structure for the XML document. Parameter
\r
79 * sample_rate_text indicates the sampling rate for the text searching data
\r
80 * structures (small values get faster searching but a bigger space
\r
81 * requirement). dtc disable the use of the TextCollection
\r
82 * (i.e. everything is considered an empty text *)
\r
83 * Returns a non-zero value upon success, NULLT in case of
\r
85 int OpenDocument(bool empty_texts, int sample_rate_text, bool dtc,
\r
86 TextCollectionBuilder::index_type_t index_type);
\r
88 /** CloseDocument(): finishes the construction of the data structure for
\r
89 * the XML document. Tree and tags are represented in the final form,
\r
90 * dynamic data structures are made static, returning the resulting
\r
91 * XMLTree. After that, the XMLTree data structure can be queried. */
\r
92 XMLTree *CloseDocument();
\r
94 /** NewOpenTag(tagname): indicates the event of finding a new opening tag
\r
95 * in the document. Tag name is given. Returns a non-zero value upon
\r
96 * success, and returns NULLT in case of error. */
\r
97 int NewOpenTag(std::string tagname);
\r
99 /** NewClosingTag(tagname): indicates the event of finding a new closing tag
\r
100 * in the document. Tag name is given. Returns a non-zero value upon
\r
101 * success, and returns NULLT in case of error. */
\r
102 int NewClosingTag(std::string tagname);
\r
104 /** NewText(s): indicates the event of finding a new text s in
\r
105 * the document. The new text is inserted within the text collection.
\r
106 * Returns a non-zero value upon success, NULLT in case of error.
\r
107 * If the string is empty, which is legal in attributes, then
\r
108 * the string the sequence '\0x01\0x00' is inserted in the TextCollection
\r
109 * It is ok to do so since a non printable character cannot occur in an XML document
\r
111 int NewText(std::string text);
\r