projects
/
SXSI
/
xpathcomp.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Don't index empty texts
[SXSI/xpathcomp.git]
/
XMLDocShredder.cpp
diff --git
a/XMLDocShredder.cpp
b/XMLDocShredder.cpp
index
3daaf45
..
d2e4a75
100644
(file)
--- a/
XMLDocShredder.cpp
+++ b/
XMLDocShredder.cpp
@@
-59,6
+59,7
@@
XMLDocShredder::XMLDocShredder(const unsigned char * data,
reader_ = new TextReader(data,size,"");
setProperties();
storageIfc_ = new SXSIStorageInterface();
reader_ = new TextReader(data,size,"");
setProperties();
storageIfc_ = new SXSIStorageInterface();
+ buffer = "";
}
XMLDocShredder::XMLDocShredder(const string inFileName)
}
XMLDocShredder::XMLDocShredder(const string inFileName)
@@
-67,7
+68,7
@@
XMLDocShredder::XMLDocShredder(const string inFileName)
reader_ = new TextReader(inFileName);
setProperties();
storageIfc_ = new SXSIStorageInterface();
reader_ = new TextReader(inFileName);
setProperties();
storageIfc_ = new SXSIStorageInterface();
-
+ buffer = "";
}
XMLDocShredder::~XMLDocShredder()
}
XMLDocShredder::~XMLDocShredder()
@@
-84,10
+85,9
@@
void XMLDocShredder::processStartElement()
ustring name = reader_->get_name();
bool empty = false;
ustring name = reader_->get_name();
bool empty = false;
- if (!last_text)
- storageIfc_->newText(""); //prevText
- last_text = false;
-
+ storageIfc_->newText(buffer); //prevText
+ buffer.erase();
+
storageIfc_->newChild(name);
/* We must be really carefull here. calling process attributes moves
storageIfc_->newChild(name);
/* We must be really carefull here. calling process attributes moves
@@
-110,17
+110,13
@@
void XMLDocShredder::processStartElement()
};
};
-
-
-
}
void XMLDocShredder::processEndElement()
{
// tell the storage interface that the current node has been completely processed
}
void XMLDocShredder::processEndElement()
{
// tell the storage interface that the current node has been completely processed
- if (!last_text)
- storageIfc_->newText(""); //nextText of previous node
- last_text = false;
+ storageIfc_->newText(buffer); //prevText
+ buffer.erase();
storageIfc_->nodeFinished(reader_->get_name());
}
storageIfc_->nodeFinished(reader_->get_name());
}
@@
-129,19
+125,18
@@
void XMLDocShredder::processPCDATA()
// send the content of this PCDATA node to the storage interface as a text node
if (reader_->has_value())
// send the content of this PCDATA node to the storage interface as a text node
if (reader_->has_value())
- {
- storageIfc_->newText(reader_->get_value());
- last_text = true;
- }
- else
- storageIfc_->newText("");
+ {
+ buffer += reader_->get_value();
+ };
+
}
void XMLDocShredder::processAttributes()
{
reader_->move_to_first_attribute();
}
void XMLDocShredder::processAttributes()
{
reader_->move_to_first_attribute();
- string nspaceStr = "xmlns";
+ string nspaceStr = "xmlns";
+ storageIfc_->newText(""); //prevText
storageIfc_->newChild("<@>");
do
{
storageIfc_->newChild("<@>");
do
{
@@
-178,9
+173,8
@@
void XMLDocShredder::processAttributes()
void XMLDocShredder::processSignificantWhitespace()
{
void XMLDocShredder::processSignificantWhitespace()
{
- ustring value = reader_->get_value();
- // each significant whitespace sequence constructs a text node
- storageIfc_->newText(value);
+ // each significant whitespace sequence constructs a text node
+ buffer += reader_->get_value();
}
}