X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLDocShredder.cpp;h=d2e4a7503c105115d093d863344b1e6584c4ef5c;hb=6c60812f1f494f273d6074fcccf2707a6097cfaa;hp=f81251ed10ca6ada0c5a752f7c92d7815e173a5a;hpb=77294fdd983393727bcee7fda52dacdaf943e585;p=SXSI%2Fxpathcomp.git diff --git a/XMLDocShredder.cpp b/XMLDocShredder.cpp index f81251e..d2e4a75 100644 --- a/XMLDocShredder.cpp +++ b/XMLDocShredder.cpp @@ -59,6 +59,7 @@ XMLDocShredder::XMLDocShredder(const unsigned char * data, reader_ = new TextReader(data,size,""); setProperties(); storageIfc_ = new SXSIStorageInterface(); + buffer = ""; } XMLDocShredder::XMLDocShredder(const string inFileName) @@ -67,7 +68,7 @@ XMLDocShredder::XMLDocShredder(const string inFileName) reader_ = new TextReader(inFileName); setProperties(); storageIfc_ = new SXSIStorageInterface(); - + buffer = ""; } XMLDocShredder::~XMLDocShredder() @@ -84,10 +85,9 @@ void XMLDocShredder::processStartElement() ustring name = reader_->get_name(); bool empty = false; - if (!last_text) - storageIfc_->newText(""); //prevText - last_text = false; - + storageIfc_->newText(buffer); //prevText + buffer.erase(); + storageIfc_->newChild(name); /* We must be really carefull here. calling process attributes moves @@ -105,24 +105,18 @@ void XMLDocShredder::processStartElement() if (empty){ - DPRINT("Node " << name <<" is empty!\n") storageIfc_->newText(""); //myText - storageIfc_->nodeFinished(name); - storageIfc_->newText(""); //nextText + storageIfc_->nodeFinished(name); }; - - - } void XMLDocShredder::processEndElement() { // tell the storage interface that the current node has been completely processed - if (!last_text) - storageIfc_->newText(""); //nextText of previous node - last_text = false; + storageIfc_->newText(buffer); //prevText + buffer.erase(); storageIfc_->nodeFinished(reader_->get_name()); } @@ -131,20 +125,18 @@ void XMLDocShredder::processPCDATA() // send the content of this PCDATA node to the storage interface as a text node if (reader_->has_value()) - { - storageIfc_->newChild("<$>"); - storageIfc_->newText(reader_->get_value()); - last_text = true; - } - else - storageIfc_->newText(""); + { + buffer += reader_->get_value(); + }; + } void XMLDocShredder::processAttributes() { reader_->move_to_first_attribute(); - string nspaceStr = "xmlns"; + string nspaceStr = "xmlns"; + storageIfc_->newText(""); //prevText storageIfc_->newChild("<@>"); do { @@ -168,31 +160,28 @@ void XMLDocShredder::processAttributes() else { - storageIfc_->newChild(name); - storageIfc_->newChild("<$>"); - storageIfc_->newText(value); - storageIfc_->nodeFinished("<$>"); + storageIfc_->newText(""); //prevText + storageIfc_->newChild(name); + storageIfc_->newText(value); + storageIfc_->nodeFinished(name); } } while (reader_->move_to_next_attribute()); + storageIfc_->newText(""); //nextText storageIfc_->nodeFinished("<@>"); } void XMLDocShredder::processSignificantWhitespace() { - ustring value = reader_->get_value(); - - // each significant whitespace sequence constructs a text node - storageIfc_->newChild("<$>"); - storageIfc_->newText(value); - + // each significant whitespace sequence constructs a text node + buffer += reader_->get_value(); } void XMLDocShredder::processStartDocument(const string docName) { // tell storage interface to construct the document name - storageIfc_->newChild("ROOT"); + storageIfc_->newChild(""); } @@ -200,7 +189,8 @@ void XMLDocShredder::processEndDocument() { /* tell the storage interface that document parsing has finished, and structures * can now be written to disk. */ - storageIfc_->nodeFinished("ROOT"); + storageIfc_->newText(""); + storageIfc_->nodeFinished(""); storageIfc_->parsingFinished(); } @@ -240,10 +230,8 @@ void XMLDocShredder::processCDATASection() */ ustring value = reader_->get_value(); - storageIfc_->newChild("<$>"); storageIfc_->newText(value); last_text = true; - // storageIfc_->nodeFinished(); }