X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLDocShredder.cpp;h=1516c9fc1cd724c346c66877b250af4d94d843f3;hb=451e60ad59e35344dff62da5ca27fcd5eec1bff9;hp=722c291510915d059d25d860a62c27fdbd3bf769;hpb=c8f4fdfb1408aee5e3a4b402ff1ead9c3cdd9a4c;p=SXSI%2Fxpathcomp.git diff --git a/XMLDocShredder.cpp b/XMLDocShredder.cpp index 722c291..1516c9f 100644 --- a/XMLDocShredder.cpp +++ b/XMLDocShredder.cpp @@ -53,21 +53,25 @@ void XMLDocShredder::setProperties(){ } XMLDocShredder::XMLDocShredder(const unsigned char * data, - TextReader::size_type size) + TextReader::size_type size, + int sf, + bool iet, + bool dtc) { last_text = false; reader_ = new TextReader(data,size,""); setProperties(); - storageIfc_ = new SXSIStorageInterface(); + storageIfc_ = new SXSIStorageInterface(sf,iet,dtc); + buffer = ""; } -XMLDocShredder::XMLDocShredder(const string inFileName) +XMLDocShredder::XMLDocShredder(const string inFileName,int sf, bool iet, bool dtc) { last_text = false; reader_ = new TextReader(inFileName); setProperties(); - storageIfc_ = new SXSIStorageInterface(); - + storageIfc_ = new SXSIStorageInterface(sf,iet,dtc); + buffer = ""; } XMLDocShredder::~XMLDocShredder() @@ -84,10 +88,6 @@ void XMLDocShredder::processStartElement() ustring name = reader_->get_name(); bool empty = false; - if (!last_text) - storageIfc_->newText(""); //prevText - last_text = false; - storageIfc_->newChild(name); /* We must be really carefull here. calling process attributes moves @@ -105,45 +105,34 @@ void XMLDocShredder::processStartElement() if (empty){ - storageIfc_->newText(""); //myText - storageIfc_->nodeFinished(name); - storageIfc_->newText(""); //nextText + storageIfc_->nodeFinished(name); }; - - - } void XMLDocShredder::processEndElement() { - // tell the storage interface that the current node has been completely processed - if (!last_text) - storageIfc_->newText(""); //nextText of previous node - last_text = false; + // tell the storage interface that the current node has been completely processed storageIfc_->nodeFinished(reader_->get_name()); } void XMLDocShredder::processPCDATA() { - // send the content of this PCDATA node to the storage interface as a text node - - if (reader_->has_value()) - { - storageIfc_->newChild("<$>"); - storageIfc_->newText(reader_->get_value()); - last_text = true; - } - else - storageIfc_->newText(""); + // send the content of this PCDATA node to the storage interface as a text node + + if (reader_->has_value()){ + storageIfc_->newChild("<$>"); + storageIfc_->newText(reader_->get_value()); + storageIfc_->nodeFinished("<$>"); + }; } void XMLDocShredder::processAttributes() { reader_->move_to_first_attribute(); - string nspaceStr = "xmlns"; + string nspaceStr = "xmlns"; storageIfc_->newChild("<@>"); do { @@ -167,10 +156,12 @@ void XMLDocShredder::processAttributes() else { - storageIfc_->newChild(name); - storageIfc_->newChild("<$>"); - storageIfc_->newText(value); - storageIfc_->nodeFinished("<$>"); + string attname = "<@>"+name; + storageIfc_->newChild(attname); + storageIfc_->newChild("<@$>"); + storageIfc_->newText(value); + storageIfc_->nodeFinished("<@$>"); + storageIfc_->nodeFinished(attname); } } while (reader_->move_to_next_attribute()); @@ -179,13 +170,12 @@ void XMLDocShredder::processAttributes() void XMLDocShredder::processSignificantWhitespace() { - ustring value = reader_->get_value(); - - // each significant whitespace sequence constructs a text node - storageIfc_->newChild("<$>"); - storageIfc_->newText(value); - - + + if (reader_->has_value()){ + storageIfc_->newChild("<$>"); + storageIfc_->newText(reader_->get_value()); + storageIfc_->nodeFinished("<$>"); + }; } void XMLDocShredder::processStartDocument(const string docName) @@ -237,12 +227,11 @@ void XMLDocShredder::processCDATASection() * model. Instead, we simply pass the converted text value to the storage interface as * a text node attached to the current context node. */ - - ustring value = reader_->get_value(); - storageIfc_->newChild("<$>"); - storageIfc_->newText(value); - last_text = true; - // storageIfc_->nodeFinished(); + if (reader_->has_value()){ + storageIfc_->newChild("<$>"); + storageIfc_->newText(reader_->get_value()); + storageIfc_->nodeFinished("<$>"); + }; }