X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=src%2FXMLDocShredder.h;fp=src%2FXMLDocShredder.h;h=a12500d2f7192cdd53cc8b714e79da54bb802b64;hb=4b52da1a20a4fe031930bb96d2ca46bec06dc529;hp=0000000000000000000000000000000000000000;hpb=a223af3254fb51c279cfbccdc18c59484fdca74e;p=SXSI%2Fxpathcomp.git diff --git a/src/XMLDocShredder.h b/src/XMLDocShredder.h new file mode 100644 index 0000000..a12500d --- /dev/null +++ b/src/XMLDocShredder.h @@ -0,0 +1,68 @@ +/************************************** + * XMLDocShredder.h + * -------------------- + * Header file for the shredder routine that invokes the XML parser and + * calls the appropriate construction methods of the storage interface in + * correspondence with received parsing events. + * + * Author: Greg Leighton + * Date: 02/11/08 + */ + +#ifndef XMLDOCSHREDDER_H_ +#define XMLDOCSHREDDER_H_ + +#include +#include +#include +#include "XMLTree.h" +#include "XMLTreeBuilder.h" + +using namespace std; +using namespace xmlpp; + + +class XMLDocShredder +{ + void doText(); +public: + XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc, + TextCollectionBuilder::index_type_t index_type + ); + XMLDocShredder(const unsigned char * data, + TextReader::size_type size, + int sf, bool iet, bool dtc, + TextCollectionBuilder::index_type_t index_type + ); + virtual ~XMLDocShredder(); + virtual void processStartElement(); + virtual void processEndElement(); + virtual void processPCDATA(); + virtual void processAttributes(); + virtual void processSignificantWhitespace(); + virtual void processStartDocument(const string docName); + virtual void processEndDocument(); + virtual void processComment(); + virtual void processProcessingInstruction(); + virtual void processDocTypeDeclaration(); + virtual void processUnknownNodeType(); + virtual void processCDATASection(); + virtual void parse(); + + XMLTree * getXMLTree(){ + return tree; + } + + + private: + XMLTreeBuilder * tb; + XMLTree * tree; + TextReader *reader_; + void setProperties(); + bool last_text; + string buffer; + // used to coalece successive text events + // which can occur if we discard pi and comment nodes. +}; + +#endif /*XMLDOCSHREDDER_H_*/