#include <iostream>
#include "XMLDocShredder.h"
-#include "SXSIStorageInterface.h"
#include <libxml++/exceptions/parse_error.h>
#include "Utils.h"
using namespace Glib;
+void XMLDocShredder::doText(){
+
+ if (!buffer.empty()){
+ tb->NewOpenTag(PCDATA_OPEN_TAG);
+ tb->NewText(buffer);
+ tb->NewClosingTag(PCDATA_OPEN_TAG);
+ };
+ buffer.clear();
+
+}
+
void XMLDocShredder::setProperties(){
/* instruct the parser to expand entity references and report as
* regular PCDATA
}
XMLDocShredder::XMLDocShredder(const unsigned char * data,
- TextReader::size_type size)
+ TextReader::size_type size,
+ int sf,
+ bool iet,
+ bool dtc)
{
- last_text = false;
+ tree = NULL;
reader_ = new TextReader(data,size,"");
setProperties();
- storageIfc_ = new SXSIStorageInterface();
+ tb = new XMLTreeBuilder();
+ buffer.clear();
+ tb->OpenDocument(iet,sf,dtc);
}
-XMLDocShredder::XMLDocShredder(const string inFileName)
+XMLDocShredder::XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc)
{
- last_text = false;
+ tree = NULL;
reader_ = new TextReader(inFileName);
setProperties();
- storageIfc_ = new SXSIStorageInterface();
-
+ tb = new XMLTreeBuilder();
+ buffer.clear();
+ tb->OpenDocument(iet,sf,dtc);
}
XMLDocShredder::~XMLDocShredder()
{
delete reader_;
- delete storageIfc_;
+ reader_ = NULL;
+ delete tb;
+ tb = NULL;
}
void XMLDocShredder::processStartElement()
{
- // fetch element name; this will be the full qualified name
- ustring name = reader_->get_name();
- bool empty = false;
-
- if (!last_text)
- storageIfc_->newText(""); //prevText
- last_text = false;
-
- storageIfc_->newChild(name);
-
- /* We must be really carefull here. calling process attributes moves
- the document pointer on the last attribute, hence calling reader_->is_empty
- afterwards will yield the wrong result. It is better to call it while we are
- on the element and generate a nodeFinished() call at the end */
- empty = reader_->is_empty_element();
-
-
- // now, process attributes
- if (reader_->has_attributes())
- {
- processAttributes();
- };
-
-
- if (empty){
- storageIfc_->newText(""); //myText
- storageIfc_->nodeFinished(name);
- storageIfc_->newText(""); //nextText
- };
-
-
-
-
-
+ doText();
+ // fetch element name; this will be the full qualified name
+ ustring name = reader_->get_name();
+ bool empty = false;
+
+ tb->NewOpenTag(name);
+
+ /* We must be really carefull here. calling process attributes moves
+ the document pointer on the last attribute, hence calling reader_->is_empty
+ afterwards will yield the wrong result. It is better to call it while we are
+ on the element and generate a nodeFinished() call at the end */
+ empty = reader_->is_empty_element();
+
+
+ // now, process attributes
+ if (reader_->has_attributes())
+ processAttributes();
+
+
+ if (empty)
+ tb->NewClosingTag(name);
+
+
}
void XMLDocShredder::processEndElement()
{
- // tell the storage interface that the current node has been completely processed
- if (!last_text)
- storageIfc_->newText(""); //nextText of previous node
- last_text = false;
- storageIfc_->nodeFinished(reader_->get_name());
+ doText();
+ ustring name = reader_->get_name();
+ tb->NewClosingTag(name);
}
void XMLDocShredder::processPCDATA()
{
- // send the content of this PCDATA node to the storage interface as a text node
-
- if (reader_->has_value())
- {
- storageIfc_->newChild("<$>");
- storageIfc_->newText(reader_->get_value());
- last_text = true;
- }
- else
- storageIfc_->newText("");
+ // send the content of this PCDATA node to the storage interface as a text node
+ if (reader_->has_value())
+ buffer += reader_->get_value();
+
}
void XMLDocShredder::processAttributes()
{
reader_->move_to_first_attribute();
- string nspaceStr = "xmlns";
- storageIfc_->newChild("<@>");
+ string nspaceStr = "xmlns";
+ tb->NewOpenTag(ATTRIBUTE_OPEN_TAG);
do
- {
+ {
ustring name = reader_->get_name();
ustring value = reader_->get_value();
if ((name.find(nspaceStr.c_str(), 0, 5)) == 0)
{
- storageIfc_->newChild(":" + value);
- storageIfc_->nodeFinished(":" + value);
+ //TODO
}
/* otherwise, this is an ordinary attribute, so we construct a new child node of the
else
{
- storageIfc_->newChild(name);
- storageIfc_->newChild("<$>");
- storageIfc_->newText(value);
- storageIfc_->nodeFinished("<$>");
+ string attname = "<@>"+name;
+ tb->NewOpenTag(attname);
+ tb->NewOpenTag(ATTRIBUTE_DATA_OPEN_TAG);
+ tb->NewText(value);
+ tb->NewClosingTag(ATTRIBUTE_DATA_OPEN_TAG);
+ tb->NewClosingTag(attname);
}
}
while (reader_->move_to_next_attribute());
- storageIfc_->nodeFinished("<@>");
+ tb->NewClosingTag(ATTRIBUTE_OPEN_TAG);
}
void XMLDocShredder::processSignificantWhitespace()
{
- ustring value = reader_->get_value();
-
- // each significant whitespace sequence constructs a text node
- storageIfc_->newChild("<$>");
- storageIfc_->newText(value);
+ if (reader_->has_value())
+ buffer += reader_->get_value();
-
}
void XMLDocShredder::processStartDocument(const string docName)
{
// tell storage interface to construct the document name
- storageIfc_->newChild("");
+
+ tb->NewOpenTag(DOCUMENT_OPEN_TAG);
}
void XMLDocShredder::processEndDocument()
{
- /* tell the storage interface that document parsing has finished, and structures
- * can now be written to disk. */
- storageIfc_->nodeFinished("");
- storageIfc_->parsingFinished();
+ doText();
+ /* tell the storage interface that document parsing has finished, and structures
+ * can now be written to disk. */
+ tb->NewClosingTag(DOCUMENT_OPEN_TAG);
+ tree = tb->CloseDocument();
+
}
void XMLDocShredder::processComment()
* model. Instead, we simply pass the converted text value to the storage interface as
* a text node attached to the current context node.
*/
-
- ustring value = reader_->get_value();
- storageIfc_->newChild("<$>");
- storageIfc_->newText(value);
- last_text = true;
- // storageIfc_->nodeFinished();
-
+ if (reader_->has_value())
+ buffer+= reader_->get_value();
}
void XMLDocShredder::processUnknownNodeType()