X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTreeBuilder.h;h=5253dd0c7bf902dca3d3bc2cf62e9fd9a5be80e0;hb=6705d3d650a0823115d712cbf9a46b6c10f4e04c;hp=055eedafa47a2a12b54f5e075b4ab57a98852b34;hpb=f32808a35be7a1e62830a5972473178014fa44e5;p=SXSI%2FXMLTree.git diff --git a/XMLTreeBuilder.h b/XMLTreeBuilder.h index 055eeda..5253dd0 100644 --- a/XMLTreeBuilder.h +++ b/XMLTreeBuilder.h @@ -17,12 +17,12 @@ * along with this program; if not, write to the * * Free Software Foundation, Inc., * * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * - ******************************************************************************/ + ******************************************************************************/ #ifndef XMLTREEBUILDER_H_ #define XMLTREEBUILDER_H_ -#include "TextCollection/TextCollectionBuilder.h" +#include #undef W #undef WW #undef Wminusone @@ -43,22 +43,22 @@ using SXSI::TextCollectionBuilder; class XMLTreeBuilder { - + /** Array containing the balanced parentheses sequence */ pb *par_aux; int parArraySize; int npar; - /** Mapping from tag identifer to tag name */ + /** Mapping from tag identifer to tag name */ std::vector *TagName; TagIdMap * tIdMap; /** Array containing the sequence of tags */ TagType *tags_aux; - + /** The texts in the XML document */ TextCollectionBuilder *TextBuilder; TextCollection *Text; - + /** The texts in the XML document (cached for faster display) */ std::vector *CachedText; @@ -67,42 +67,43 @@ class XMLTreeBuilder { int eta_size; // Allows to disable the TextCollection for benchmarkin purposes bool disable_tc; - + TextCollectionBuilder::index_type_t text_index_type; public: XMLTreeBuilder() {;}; ~XMLTreeBuilder(); - + /** OpenDocument(sample_rate_text,dtc): initilizes the construction - * of the data structure for the XML document. Parameter + * of the data structure for the XML document. Parameter * sample_rate_text indicates the sampling rate for the text searching data - * structures (small values get faster searching but a bigger space + * structures (small values get faster searching but a bigger space * requirement). dtc disable the use of the TextCollection * (i.e. everything is considered an empty text *) - * Returns a non-zero value upon success, NULLT in case of + * Returns a non-zero value upon success, NULLT in case of * error. */ - int OpenDocument(bool empty_texts, int sample_rate_text, bool dtc); + int OpenDocument(bool empty_texts, int sample_rate_text, bool dtc, + TextCollectionBuilder::index_type_t index_type); - /** CloseDocument(): finishes the construction of the data structure for - * the XML document. Tree and tags are represented in the final form, + /** CloseDocument(): finishes the construction of the data structure for + * the XML document. Tree and tags are represented in the final form, * dynamic data structures are made static, returning the resulting * XMLTree. After that, the XMLTree data structure can be queried. */ XMLTree *CloseDocument(); - /** NewOpenTag(tagname): indicates the event of finding a new opening tag - * in the document. Tag name is given. Returns a non-zero value upon + /** NewOpenTag(tagname): indicates the event of finding a new opening tag + * in the document. Tag name is given. Returns a non-zero value upon * success, and returns NULLT in case of error. */ int NewOpenTag(std::string tagname); - + /** NewClosingTag(tagname): indicates the event of finding a new closing tag - * in the document. Tag name is given. Returns a non-zero value upon + * in the document. Tag name is given. Returns a non-zero value upon * success, and returns NULLT in case of error. */ int NewClosingTag(std::string tagname); - - /** NewText(s): indicates the event of finding a new text s in - * the document. The new text is inserted within the text collection. - * Returns a non-zero value upon success, NULLT in case of error. + + /** NewText(s): indicates the event of finding a new text s in + * the document. The new text is inserted within the text collection. + * Returns a non-zero value upon success, NULLT in case of error. * If the string is empty, which is legal in attributes, then * the string the sequence '\0x01\0x00' is inserted in the TextCollection * It is ok to do so since a non printable character cannot occur in an XML document