X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLTreeBuilder.h;fp=XMLTreeBuilder.h;h=38d81baf7a3849f477249203845ed6cec2f5b4b1;hb=aa6692a9fd2badf8e8e686b92075f041dc03bbef;hp=0000000000000000000000000000000000000000;hpb=5db16dd3e0bf609bc0fa84ee7d067f6bbc58013e;p=SXSI%2FXMLTree.git

diff --git a/XMLTreeBuilder.h b/XMLTreeBuilder.h
new file mode 100644
index 0000000..38d81ba
--- /dev/null
+++ b/XMLTreeBuilder.h
@@ -0,0 +1,131 @@
+
+/******************************************************************************
+ *   Copyright (C) 2009 by Diego Arroyuelo                                    *
+ *   Builder class for the in-memory XQuery/XPath engine                      *
+ *                                                                            *
+ *   This program is free software; you can redistribute it and/or modify     *
+ *   it under the terms of the GNU Lesser General Public License as published *
+ *   by the Free Software Foundation; either version 2 of the License, or     *
+ *   (at your option) any later version.                                      *
+ *                                                                            *
+ *   This program is distributed in the hope that it will be useful,          *
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of           *
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the            *
+ *   GNU Lesser General Public License for more details.                      *
+ *                                                                            *
+ *   You should have received a copy of the GNU Lesser General Public License *
+ *   along with this program; if not, write to the                            *
+ *   Free Software Foundation, Inc.,                                          *
+ *   59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.                *
+ ******************************************************************************/ 
+
+#ifndef XMLTREEBUILDER_H_
+#define XMLTREEBUILDER_H_
+#include "TextCollection/TextCollectionBuilder.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <cstring>
+
+
+#undef W
+#undef WW
+#undef Wminusone
+
+#include "bp.h"
+#include "XMLTree.h"
+#include <static_bitsequence.h>
+#include <alphabet_mapper.h>
+#include <static_sequence.h>
+using SXSI::TextCollection;
+using SXSI::TextCollectionBuilder;
+
+#define NULLT -1
+
+        // sets bit p in e
+#define bitset(e,p) ((e)[(p)/W] |= (1<<((p)%W)))
+        // cleans bit p in e
+#define bitclean(e,p) ((e)[(p)/W] &= ~(1<<((p)%W)))
+
+
+class XMLTreeBuilder {
+  
+   /** Array containing the balanced parentheses sequence */
+   pb *par_aux;
+   int parArraySize;
+   int npar;
+
+   /** Mapping from tag identifer to tag name */  
+   unsigned char **TagName;
+   int ntagnames;
+
+   /** Array containing the sequence of tags */
+   TagType *tags_aux;
+   
+   /** The texts in the XML document */
+   TextCollectionBuilder *TextBuilder;
+   TextCollection *Text;
+   
+   /** The texts in the XML document (cached for faster display) */
+   vector<string> CachedText;
+
+   /** boolean flag indicating whether we are indexing empty texts or not */
+   bool indexing_empty_texts; 
+   unsigned int *empty_texts_aux;
+
+   // The TagName array should always contains two special tags
+   // <@> for attributes and <$> for PCDATA.
+   // <$> can never be in a document (since we handle the text differently)
+   // but <@> can be returned by the parser. This boolean is needed for the construction
+   // of the Tag bitmap to know if <@> must be taken into account or not
+   bool found_attributes;
+
+   // Allows to disable the TextCollection for benchmarkin purposes
+   bool disable_tc;
+
+public:
+
+   XMLTreeBuilder() {;};
+
+   ~XMLTreeBuilder();
+   
+   /** OpenDocument(empty_texts,sample_rate_text,dtc): initilizes the construction
+    * of the data structure for the XML document. Parameter empty_texts 
+    * indicates whether we index empty texts in document or not. Parameter 
+    * sample_rate_text indicates the sampling rate for the text searching data
+    * structures (small values get faster searching but a bigger space 
+    * requirement). dtc disable the use of the TextCollection
+    * (i.e. everything is considered an empty text *)
+    * Returns a non-zero value upon success, NULLT in case of 
+    * error. */
+   int OpenDocument(bool empty_texts, int sample_rate_text, bool dtc);
+
+   /** CloseDocument(): finishes the construction of the data structure for 
+    * the XML document. Tree and tags are represented in the final form, 
+    * dynamic data structures are made static, returning the resulting
+    * XMLTree. After that, the XMLTree data structure can be queried. */
+   XMLTree *CloseDocument();
+
+   /** NewOpenTag(tagname): indicates the event of finding a new opening tag 
+    * in the document. Tag name is given. Returns a non-zero value upon 
+    * success, and returns NULLT in case of error. */
+   int NewOpenTag(unsigned char *tagname);
+   
+   /** NewClosingTag(tagname): indicates the event of finding a new closing tag
+    *  in the document. Tag name is given. Returns a non-zero value upon 
+    *  success, and returns NULLT in case of error. */
+   int NewClosingTag(unsigned char *tagname);
+ 
+   /** NewText(s): indicates the event of finding a new (non-empty) text s in 
+    * the document. The new text is inserted within the text collection. 
+    * Returns a non-zero value upon success, NULLT in case of error. */
+   int NewText(unsigned char *s);
+
+   /** NewEmptyText(): indicates the event of finding a new empty text in the 
+    * document. In case of indexing empty and non-empty texts, we insert the 
+    * empty texts into the text collection. In case of indexing only non-empty
+    * texts, it just indicates an empty text in the bit vector of empty texts. 
+    * Returns a non-zero value upon success, NULLT in case of error. */
+   int NewEmptyText();
+};
+#endif
+