X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLDocShredder.h;h=ca555fa7224c14719bffc47f73e2332d3e6665b9;hb=f0557f21878be17ddc75b1bc8f4f86da68c8e604;hp=19ed10f252b19ce917729ad2eb6ddacc56fbb052;hpb=3623eefccfb5fc69e19ad975a3669f51a2a8b276;p=SXSI%2Fxpathcomp.git diff --git a/XMLDocShredder.h b/XMLDocShredder.h index 19ed10f..ca555fa 100644 --- a/XMLDocShredder.h +++ b/XMLDocShredder.h @@ -15,47 +15,48 @@ #include #include #include -#include -#include "StorageInterface.h" +#include "XMLTree.h" +#include "XMLTreeBuilder.h" using namespace std; using namespace xmlpp; -/* For Hashmap. Seems fairly well supported */ -using namespace __gnu_cxx; -typedef pair cons_str; -typedef pair cons_int; class XMLDocShredder { + void doText(); public: - XMLDocShredder(const string inFileName); - XMLDocShredder(const unsigned char * data, TextReader::size_type size); - virtual ~XMLDocShredder(); - virtual void processStartElement(); - virtual void processEndElement(); - virtual void processPCDATA(); - virtual void processAttributes(); - virtual void processSignificantWhitespace(); - virtual void processStartDocument(const string docName); - virtual void processEndDocument(); - virtual void processComment(); - virtual void processProcessingInstruction(); - virtual void processDocTypeDeclaration(); - virtual void processUnknownNodeType(); - virtual void processCDATASection(); - virtual void parse(); - virtual int tagID(string); - virtual string idTag(int); - - StorageInterface *storageIfc_; + XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc); + XMLDocShredder(const unsigned char * data, TextReader::size_type size,int sf, bool iet, bool dtc); + virtual ~XMLDocShredder(); + virtual void processStartElement(); + virtual void processEndElement(); + virtual void processPCDATA(); + virtual void processAttributes(); + virtual void processSignificantWhitespace(); + virtual void processStartDocument(const string docName); + virtual void processEndDocument(); + virtual void processComment(); + virtual void processProcessingInstruction(); + virtual void processDocTypeDeclaration(); + virtual void processUnknownNodeType(); + virtual void processCDATASection(); + virtual void parse(); + + XMLTree * getXMLTree(){ + return tree; + } -private: - TextReader *reader_; - void setProperties(); - unordered_map idTags_; - unordered_map tagsID_; + private: + XMLTreeBuilder * tb; + XMLTree * tree; + TextReader *reader_; + void setProperties(); + bool last_text; + string buffer; + // used to coalece successive text events + // which can occur if we discard pi and comment nodes. }; #endif /*XMLDOCSHREDDER_H_*/