X-Git-Url: http://git.nguyen.vg/gitweb/?a=blobdiff_plain;f=XMLDocShredder.cpp;h=3daaf45dfbbcdc4a53cef508465a013e42d1d29f;hb=9be0c0e2a5597148fdc3a3cca2bdaf69da0aa27d;hp=6e9c41a3e5f2ff138bedc4a91edd4cec972e7b1f;hpb=3623eefccfb5fc69e19ad975a3669f51a2a8b276;p=SXSI%2Fxpathcomp.git diff --git a/XMLDocShredder.cpp b/XMLDocShredder.cpp index 6e9c41a..3daaf45 100644 --- a/XMLDocShredder.cpp +++ b/XMLDocShredder.cpp @@ -18,7 +18,7 @@ #include #include "XMLDocShredder.h" -#include "OCamlStorageInterface.h" +#include "SXSIStorageInterface.h" #include #include "Utils.h" @@ -55,20 +55,19 @@ void XMLDocShredder::setProperties(){ XMLDocShredder::XMLDocShredder(const unsigned char * data, TextReader::size_type size) { + last_text = false; reader_ = new TextReader(data,size,""); setProperties(); - storageIfc_ = new OCamlStorageInterface(); - //tagsID_ = new unordered_map(107); - //idTags_ = new unordered_map(107); + storageIfc_ = new SXSIStorageInterface(); } XMLDocShredder::XMLDocShredder(const string inFileName) { + last_text = false; reader_ = new TextReader(inFileName); setProperties(); - storageIfc_ = new OCamlStorageInterface(); - // tagsID_ = new unordered_map(107); - // idTags_ = new unordered_map(107); + storageIfc_ = new SXSIStorageInterface(); + } XMLDocShredder::~XMLDocShredder() @@ -78,23 +77,16 @@ XMLDocShredder::~XMLDocShredder() } -int XMLDocShredder::tagID(string name) -{ - int res = tagsID_[name]; - return res; -} -string XMLDocShredder::idTag(int id) -{ - - return idTags_[id]; -} - void XMLDocShredder::processStartElement() { // fetch element name; this will be the full qualified name ustring name = reader_->get_name(); bool empty = false; + + if (!last_text) + storageIfc_->newText(""); //prevText + last_text = false; storageIfc_->newChild(name); @@ -113,8 +105,8 @@ void XMLDocShredder::processStartElement() if (empty){ - DPRINT("Node " << name <<" is empty!\n") - storageIfc_->nodeFinished(); + storageIfc_->newText(""); //myText + storageIfc_->nodeFinished(name); }; @@ -125,18 +117,24 @@ void XMLDocShredder::processStartElement() void XMLDocShredder::processEndElement() { - // tell the storage interface that the current node has been completely processed - storageIfc_->nodeFinished(); + // tell the storage interface that the current node has been completely processed + if (!last_text) + storageIfc_->newText(""); //nextText of previous node + last_text = false; + storageIfc_->nodeFinished(reader_->get_name()); } void XMLDocShredder::processPCDATA() { // send the content of this PCDATA node to the storage interface as a text node + if (reader_->has_value()) { - storageIfc_->newChild("<$>"); storageIfc_->newText(reader_->get_value()); + last_text = true; } + else + storageIfc_->newText(""); } void XMLDocShredder::processAttributes() @@ -157,7 +155,7 @@ void XMLDocShredder::processAttributes() if ((name.find(nspaceStr.c_str(), 0, 5)) == 0) { storageIfc_->newChild(":" + value); - storageIfc_->nodeFinished(); + storageIfc_->nodeFinished(":" + value); } /* otherwise, this is an ordinary attribute, so we construct a new child node of the @@ -167,39 +165,38 @@ void XMLDocShredder::processAttributes() else { - storageIfc_->newChild(name); - storageIfc_->newChild("<$>"); - storageIfc_->newText(value); - storageIfc_->nodeFinished(); - // storageIfc_->nodeFinished(); + storageIfc_->newText(""); //prevText + storageIfc_->newChild(name); + storageIfc_->newText(value); + storageIfc_->nodeFinished(name); } } while (reader_->move_to_next_attribute()); - storageIfc_->nodeFinished(); + storageIfc_->newText(""); //nextText + storageIfc_->nodeFinished("<@>"); } void XMLDocShredder::processSignificantWhitespace() { - ustring value = reader_->get_value(); - + ustring value = reader_->get_value(); // each significant whitespace sequence constructs a text node - storageIfc_->newChild("<$>"); storageIfc_->newText(value); - //storageIfc_->nodeFinished(); } void XMLDocShredder::processStartDocument(const string docName) { // tell storage interface to construct the document name - // storageIfc_->newChild(""); + storageIfc_->newChild(""); + } void XMLDocShredder::processEndDocument() { /* tell the storage interface that document parsing has finished, and structures * can now be written to disk. */ - // storageIfc_->nodeFinished(); + storageIfc_->newText(""); + storageIfc_->nodeFinished(""); storageIfc_->parsingFinished(); } @@ -237,10 +234,10 @@ void XMLDocShredder::processCDATASection() * model. Instead, we simply pass the converted text value to the storage interface as * a text node attached to the current context node. */ + ustring value = reader_->get_value(); - storageIfc_->newChild("<$>"); storageIfc_->newText(value); - // storageIfc_->nodeFinished(); + last_text = true; }