Merge branch 'local-ocamlbuild' into local-trunk
[SXSI/xpathcomp.git] / src / XMLDocShredder.h
diff --git a/src/XMLDocShredder.h b/src/XMLDocShredder.h
new file mode 100644 (file)
index 0000000..a12500d
--- /dev/null
@@ -0,0 +1,68 @@
+/**************************************
+ * XMLDocShredder.h
+ * --------------------
+ * Header file for the shredder routine that invokes the XML parser and 
+ * calls the appropriate construction methods of the storage interface in
+ * correspondence with received parsing events.
+ * 
+ * Author: Greg Leighton
+ * Date: 02/11/08
+ */
+
+#ifndef XMLDOCSHREDDER_H_
+#define XMLDOCSHREDDER_H_
+
+#include <libxml++/libxml++.h>
+#include <libxml++/parsers/textreader.h>
+#include <string>
+#include "XMLTree.h"
+#include "XMLTreeBuilder.h"
+
+using namespace std;
+using namespace xmlpp;
+
+
+class XMLDocShredder
+{
+  void doText();
+public:
+  XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc,
+                TextCollectionBuilder::index_type_t index_type
+                );
+  XMLDocShredder(const unsigned char * data,
+                TextReader::size_type size,
+                int sf, bool iet, bool dtc,
+                TextCollectionBuilder::index_type_t index_type
+                );
+  virtual ~XMLDocShredder();
+  virtual void processStartElement();
+  virtual void processEndElement();
+  virtual void processPCDATA();
+  virtual void processAttributes();
+  virtual void processSignificantWhitespace();
+  virtual void processStartDocument(const string docName);
+  virtual void processEndDocument();
+  virtual void processComment();
+  virtual void processProcessingInstruction();
+  virtual void processDocTypeDeclaration();
+  virtual void processUnknownNodeType();
+  virtual void processCDATASection();
+  virtual void parse();
+  
+  XMLTree * getXMLTree(){
+    return tree;
+  }
+
+       
+ private:
+  XMLTreeBuilder * tb;
+  XMLTree * tree;
+  TextReader *reader_;
+  void setProperties();
+  bool last_text;
+  string buffer; 
+  // used to coalece successive text events
+  // which can occur if we discard pi and comment nodes.
+};
+
+#endif /*XMLDOCSHREDDER_H_*/