X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLDocShredder.cpp;h=cb70e19590eef87217c0daaa82c62f66061ca7c7;hb=df5fdb22632be887ecd9f5c46a014e7e970148a2;hp=1516c9fc1cd724c346c66877b250af4d94d843f3;hpb=451e60ad59e35344dff62da5ca27fcd5eec1bff9;p=SXSI%2Fxpathcomp.git diff --git a/XMLDocShredder.cpp b/XMLDocShredder.cpp index 1516c9f..cb70e19 100644 --- a/XMLDocShredder.cpp +++ b/XMLDocShredder.cpp @@ -18,12 +18,22 @@ #include #include "XMLDocShredder.h" -#include "SXSIStorageInterface.h" #include #include "Utils.h" using namespace Glib; +void XMLDocShredder::doText(){ + + if (!buffer.empty()){ + tb->NewOpenTag(PCDATA_OPEN_TAG); + tb->NewText(buffer); + tb->NewClosingTag(PCDATA_OPEN_TAG); + }; + buffer.clear(); + +} + void XMLDocShredder::setProperties(){ /* instruct the parser to expand entity references and report as * regular PCDATA @@ -58,74 +68,77 @@ XMLDocShredder::XMLDocShredder(const unsigned char * data, bool iet, bool dtc) { - last_text = false; + tree = NULL; reader_ = new TextReader(data,size,""); setProperties(); - storageIfc_ = new SXSIStorageInterface(sf,iet,dtc); - buffer = ""; + tb = new XMLTreeBuilder(); + buffer.clear(); + tb->OpenDocument(iet,sf,dtc); } XMLDocShredder::XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc) { - last_text = false; + tree = NULL; reader_ = new TextReader(inFileName); setProperties(); - storageIfc_ = new SXSIStorageInterface(sf,iet,dtc); - buffer = ""; + tb = new XMLTreeBuilder(); + buffer.clear(); + tb->OpenDocument(iet,sf,dtc); } XMLDocShredder::~XMLDocShredder() { delete reader_; - delete storageIfc_; + reader_ = NULL; + delete tb; + tb = NULL; } void XMLDocShredder::processStartElement() { - // fetch element name; this will be the full qualified name - ustring name = reader_->get_name(); - bool empty = false; - - storageIfc_->newChild(name); - - /* We must be really carefull here. calling process attributes moves - the document pointer on the last attribute, hence calling reader_->is_empty - afterwards will yield the wrong result. It is better to call it while we are - on the element and generate a nodeFinished() call at the end */ - empty = reader_->is_empty_element(); - - - // now, process attributes - if (reader_->has_attributes()) - { - processAttributes(); - }; - - - if (empty){ - storageIfc_->nodeFinished(name); - }; - - + doText(); + // fetch element name; this will be the full qualified name + ustring name = reader_->get_name(); + bool empty = false; + size_t found = name.find_first_of(':'); + if (found == ustring::npos) + tb->NewOpenTag(name); + else + tb->NewOpenTag(name.substr(found+1,name.length() - found - 1)); + + /* We must be really carefull here. calling process attributes moves + the document pointer on the last attribute, hence calling reader_->is_empty + afterwards will yield the wrong result. It is better to call it while we are + on the element and generate a nodeFinished() call at the end */ + empty = reader_->is_empty_element(); + + + // now, process attributes + if (reader_->has_attributes()) + processAttributes(); + + + if (empty) + tb->NewClosingTag(name); + + } void XMLDocShredder::processEndElement() { - // tell the storage interface that the current node has been completely processed - storageIfc_->nodeFinished(reader_->get_name()); + doText(); + ustring name = reader_->get_name(); + tb->NewClosingTag(name); } void XMLDocShredder::processPCDATA() { // send the content of this PCDATA node to the storage interface as a text node - - if (reader_->has_value()){ - storageIfc_->newChild("<$>"); - storageIfc_->newText(reader_->get_value()); - storageIfc_->nodeFinished("<$>"); - }; + if (reader_->has_value()) + buffer += reader_->get_value(); + } void XMLDocShredder::processAttributes() @@ -133,9 +146,9 @@ void XMLDocShredder::processAttributes() reader_->move_to_first_attribute(); string nspaceStr = "xmlns"; - storageIfc_->newChild("<@>"); + tb->NewOpenTag(ATTRIBUTE_OPEN_TAG); do - { + { ustring name = reader_->get_name(); ustring value = reader_->get_value(); @@ -145,8 +158,7 @@ void XMLDocShredder::processAttributes() if ((name.find(nspaceStr.c_str(), 0, 5)) == 0) { - storageIfc_->newChild(":" + value); - storageIfc_->nodeFinished(":" + value); + //TODO } /* otherwise, this is an ordinary attribute, so we construct a new child node of the @@ -157,40 +169,40 @@ void XMLDocShredder::processAttributes() else { string attname = "<@>"+name; - storageIfc_->newChild(attname); - storageIfc_->newChild("<@$>"); - storageIfc_->newText(value); - storageIfc_->nodeFinished("<@$>"); - storageIfc_->nodeFinished(attname); + tb->NewOpenTag(attname); + tb->NewOpenTag(ATTRIBUTE_DATA_OPEN_TAG); + tb->NewText(value); + tb->NewClosingTag(ATTRIBUTE_DATA_OPEN_TAG); + tb->NewClosingTag(attname); } } while (reader_->move_to_next_attribute()); - storageIfc_->nodeFinished("<@>"); + tb->NewClosingTag(ATTRIBUTE_OPEN_TAG); } void XMLDocShredder::processSignificantWhitespace() { - - if (reader_->has_value()){ - storageIfc_->newChild("<$>"); - storageIfc_->newText(reader_->get_value()); - storageIfc_->nodeFinished("<$>"); - }; + if (reader_->has_value()) + buffer += reader_->get_value(); + } void XMLDocShredder::processStartDocument(const string docName) { // tell storage interface to construct the document name - storageIfc_->newChild(""); + + tb->NewOpenTag(DOCUMENT_OPEN_TAG); } void XMLDocShredder::processEndDocument() { - /* tell the storage interface that document parsing has finished, and structures - * can now be written to disk. */ - storageIfc_->nodeFinished(""); - storageIfc_->parsingFinished(); + doText(); + /* tell the storage interface that document parsing has finished, and structures + * can now be written to disk. */ + tb->NewClosingTag(DOCUMENT_OPEN_TAG); + tree = tb->CloseDocument(); + } void XMLDocShredder::processComment() @@ -227,12 +239,8 @@ void XMLDocShredder::processCDATASection() * model. Instead, we simply pass the converted text value to the storage interface as * a text node attached to the current context node. */ - if (reader_->has_value()){ - storageIfc_->newChild("<$>"); - storageIfc_->newText(reader_->get_value()); - storageIfc_->nodeFinished("<$>"); - }; - + if (reader_->has_value()) + buffer+= reader_->get_value(); } void XMLDocShredder::processUnknownNodeType()