--- /dev/null
+/**************************************
+ * XMLDocShredder.h
+ * --------------------
+ * Header file for the shredder routine that invokes the XML parser and
+ * calls the appropriate construction methods of the storage interface in
+ * correspondence with received parsing events.
+ *
+ * Author: Greg Leighton
+ * Date: 02/11/08
+ */
+
+#ifndef XMLDOCSHREDDER_H_
+#define XMLDOCSHREDDER_H_
+
+#include <libxml++/libxml++.h>
+#include <libxml++/parsers/textreader.h>
+#include <string>
+#include "XMLTree.h"
+#include "XMLTreeBuilder.h"
+
+using namespace std;
+using namespace xmlpp;
+
+
+class XMLDocShredder
+{
+ void doText();
+public:
+ XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc,
+ TextCollectionBuilder::index_type_t index_type
+ );
+ XMLDocShredder(const unsigned char * data,
+ TextReader::size_type size,
+ int sf, bool iet, bool dtc,
+ TextCollectionBuilder::index_type_t index_type
+ );
+ virtual ~XMLDocShredder();
+ virtual void processStartElement();
+ virtual void processEndElement();
+ virtual void processPCDATA();
+ virtual void processAttributes();
+ virtual void processSignificantWhitespace();
+ virtual void processStartDocument(const string docName);
+ virtual void processEndDocument();
+ virtual void processComment();
+ virtual void processProcessingInstruction();
+ virtual void processDocTypeDeclaration();
+ virtual void processUnknownNodeType();
+ virtual void processCDATASection();
+ virtual void parse();
+
+ XMLTree * getXMLTree(){
+ return tree;
+ }
+
+
+ private:
+ XMLTreeBuilder * tb;
+ XMLTree * tree;
+ TextReader *reader_;
+ void setProperties();
+ bool last_text;
+ string buffer;
+ // used to coalece successive text events
+ // which can occur if we discard pi and comment nodes.
+};
+
+#endif /*XMLDOCSHREDDER_H_*/