1 /**************************************
4 * Header file for the shredder routine that invokes the XML parser and
5 * calls the appropriate construction methods of the storage interface in
6 * correspondence with received parsing events.
8 * Author: Greg Leighton
12 #ifndef XMLDOCSHREDDER_H_
13 #define XMLDOCSHREDDER_H_
15 #include <libxml++/libxml++.h>
16 #include <libxml++/parsers/textreader.h>
19 #include "XMLTreeBuilder.h"
22 using namespace xmlpp;
29 XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc,
30 TextCollectionBuilder::index_type_t index_type
32 XMLDocShredder(const unsigned char * data,
33 TextReader::size_type size,
34 int sf, bool iet, bool dtc,
35 TextCollectionBuilder::index_type_t index_type
37 virtual ~XMLDocShredder();
38 virtual void processStartElement();
39 virtual void processEndElement();
40 virtual void processPCDATA();
41 virtual void processAttributes();
42 virtual void processSignificantWhitespace();
43 virtual void processStartDocument(const string docName);
44 virtual void processEndDocument();
45 virtual void processComment();
46 virtual void processProcessingInstruction();
47 virtual void processDocTypeDeclaration();
48 virtual void processUnknownNodeType();
49 virtual void processCDATASection();
52 XMLTree * getXMLTree(){
64 // used to coalece successive text events
65 // which can occur if we discard pi and comment nodes.
68 #endif /*XMLDOCSHREDDER_H_*/