#include <libxml++/libxml++.h>
#include <libxml++/parsers/textreader.h>
#include <string>
-#include <unordered_map>
#include "StorageInterface.h"
using namespace std;
using namespace xmlpp;
-/* For Hashmap. Seems fairly well supported */
-using namespace __gnu_cxx;
-typedef pair<int,string> cons_str;
-typedef pair<string,int> cons_int;
class XMLDocShredder
{
public:
- XMLDocShredder(const string inFileName);
- XMLDocShredder(const unsigned char * data, TextReader::size_type size);
- virtual ~XMLDocShredder();
- virtual void processStartElement();
- virtual void processEndElement();
- virtual void processPCDATA();
- virtual void processAttributes();
- virtual void processSignificantWhitespace();
- virtual void processStartDocument(const string docName);
- virtual void processEndDocument();
- virtual void processComment();
- virtual void processProcessingInstruction();
- virtual void processDocTypeDeclaration();
- virtual void processUnknownNodeType();
- virtual void processCDATASection();
- virtual void parse();
-
+ XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc);
+ XMLDocShredder(const unsigned char * data, TextReader::size_type size,int sf, bool iet, bool dtc);
+ virtual ~XMLDocShredder();
+ virtual void processStartElement();
+ virtual void processEndElement();
+ virtual void processPCDATA();
+ virtual void processAttributes();
+ virtual void processSignificantWhitespace();
+ virtual void processStartDocument(const string docName);
+ virtual void processEndDocument();
+ virtual void processComment();
+ virtual void processProcessingInstruction();
+ virtual void processDocTypeDeclaration();
+ virtual void processUnknownNodeType();
+ virtual void processCDATASection();
+ virtual void parse();
+
StorageInterface *storageIfc_;
TextReader *reader_;
void setProperties();
bool last_text;
+ string buffer;
+ // used to coalece successive text events
+ // which can occur if we discard pi and comment nodes.
};
#endif /*XMLDOCSHREDDER_H_*/