projects
/
SXSI
/
xpathcomp.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
bug fixes, added the count queries
[SXSI/xpathcomp.git]
/
XMLDocShredder.cpp
diff --git
a/XMLDocShredder.cpp
b/XMLDocShredder.cpp
index
6e9c41a
..
c048d2e
100644
(file)
--- a/
XMLDocShredder.cpp
+++ b/
XMLDocShredder.cpp
@@
-18,7
+18,7
@@
#include <iostream>
#include "XMLDocShredder.h"
#include <iostream>
#include "XMLDocShredder.h"
-#include "
OCaml
StorageInterface.h"
+#include "
SXSI
StorageInterface.h"
#include <libxml++/exceptions/parse_error.h>
#include "Utils.h"
#include <libxml++/exceptions/parse_error.h>
#include "Utils.h"
@@
-53,22
+53,25
@@
void XMLDocShredder::setProperties(){
}
XMLDocShredder::XMLDocShredder(const unsigned char * data,
}
XMLDocShredder::XMLDocShredder(const unsigned char * data,
- TextReader::size_type size)
+ TextReader::size_type size,
+ int sf,
+ bool iet,
+ bool dtc)
{
{
+ last_text = false;
reader_ = new TextReader(data,size,"");
setProperties();
reader_ = new TextReader(data,size,"");
setProperties();
- storageIfc_ = new OCamlStorageInterface();
- //tagsID_ = new unordered_map<int,string>(107);
- //idTags_ = new unordered_map<string,int>(107);
+ storageIfc_ = new SXSIStorageInterface(sf,iet,dtc);
+ buffer = "";
}
}
-XMLDocShredder::XMLDocShredder(const string inFileName)
+XMLDocShredder::XMLDocShredder(const string inFileName
,int sf, bool iet, bool dtc
)
{
{
+ last_text = false;
reader_ = new TextReader(inFileName);
setProperties();
reader_ = new TextReader(inFileName);
setProperties();
- storageIfc_ = new OCamlStorageInterface();
- // tagsID_ = new unordered_map<int,string>(107);
- // idTags_ = new unordered_map<string,int>(107);
+ storageIfc_ = new SXSIStorageInterface(sf,iet,dtc);
+ buffer = "";
}
XMLDocShredder::~XMLDocShredder()
}
XMLDocShredder::~XMLDocShredder()
@@
-78,24
+81,16
@@
XMLDocShredder::~XMLDocShredder()
}
}
-int XMLDocShredder::tagID(string name)
-{
- int res = tagsID_[name];
- return res;
-}
-string XMLDocShredder::idTag(int id)
-{
-
- return idTags_[id];
-}
-
void XMLDocShredder::processStartElement()
{
// fetch element name; this will be the full qualified name
ustring name = reader_->get_name();
bool empty = false;
void XMLDocShredder::processStartElement()
{
// fetch element name; this will be the full qualified name
ustring name = reader_->get_name();
bool empty = false;
-
+
+ storageIfc_->newText(buffer); //prevText
+ buffer.erase();
+
storageIfc_->newChild(name);
/* We must be really carefull here. calling process attributes moves
storageIfc_->newChild(name);
/* We must be really carefull here. calling process attributes moves
@@
-113,37
+108,38
@@
void XMLDocShredder::processStartElement()
if (empty){
if (empty){
- DPRINT("Node " << name <<" is empty!\n")
- storageIfc_->nodeFinished(
);
+ storageIfc_->newText(""); //myText
+ storageIfc_->nodeFinished(
name);
};
};
-
-
-
}
void XMLDocShredder::processEndElement()
{
}
void XMLDocShredder::processEndElement()
{
- // tell the storage interface that the current node has been completely processed
- storageIfc_->nodeFinished();
+ // tell the storage interface that the current node has been completely processed
+ storageIfc_->newText(buffer); //prevText
+ buffer.erase();
+ storageIfc_->nodeFinished(reader_->get_name());
}
void XMLDocShredder::processPCDATA()
{
// send the content of this PCDATA node to the storage interface as a text node
}
void XMLDocShredder::processPCDATA()
{
// send the content of this PCDATA node to the storage interface as a text node
+
if (reader_->has_value())
if (reader_->has_value())
- {
-
storageIfc_->newChild("<$>"
);
-
storageIfc_->newText(reader_->get_value())
;
- }
+ {
+
buffer += reader_->get_value(
);
+
}
;
+
}
void XMLDocShredder::processAttributes()
{
reader_->move_to_first_attribute();
}
void XMLDocShredder::processAttributes()
{
reader_->move_to_first_attribute();
- string nspaceStr = "xmlns";
+ string nspaceStr = "xmlns";
+ storageIfc_->newText(""); //prevText
storageIfc_->newChild("<@>");
do
{
storageIfc_->newChild("<@>");
do
{
@@
-157,7
+153,7
@@
void XMLDocShredder::processAttributes()
if ((name.find(nspaceStr.c_str(), 0, 5)) == 0)
{
storageIfc_->newChild(":" + value);
if ((name.find(nspaceStr.c_str(), 0, 5)) == 0)
{
storageIfc_->newChild(":" + value);
- storageIfc_->nodeFinished(
);
+ storageIfc_->nodeFinished(
":" + value);
}
/* otherwise, this is an ordinary attribute, so we construct a new child node of the
}
/* otherwise, this is an ordinary attribute, so we construct a new child node of the
@@
-167,39
+163,37
@@
void XMLDocShredder::processAttributes()
else
{
else
{
- storageIfc_->newChild(name);
- storageIfc_->newChild("<$>");
- storageIfc_->newText(value);
- storageIfc_->nodeFinished();
- // storageIfc_->nodeFinished();
+ storageIfc_->newText(""); //prevText
+ storageIfc_->newChild(name);
+ storageIfc_->newText(value);
+ storageIfc_->nodeFinished(name);
}
}
while (reader_->move_to_next_attribute());
}
}
while (reader_->move_to_next_attribute());
- storageIfc_->nodeFinished();
+ storageIfc_->newText(""); //nextText
+ storageIfc_->nodeFinished("<@>");
}
void XMLDocShredder::processSignificantWhitespace()
{
}
void XMLDocShredder::processSignificantWhitespace()
{
- ustring value = reader_->get_value();
-
- // each significant whitespace sequence constructs a text node
- storageIfc_->newChild("<$>");
- storageIfc_->newText(value);
- //storageIfc_->nodeFinished();
+ // each significant whitespace sequence constructs a text node
+ buffer += reader_->get_value();
}
void XMLDocShredder::processStartDocument(const string docName)
{
// tell storage interface to construct the document name
}
void XMLDocShredder::processStartDocument(const string docName)
{
// tell storage interface to construct the document name
- // storageIfc_->newChild("");
+ storageIfc_->newChild("");
+
}
void XMLDocShredder::processEndDocument()
{
/* tell the storage interface that document parsing has finished, and structures
* can now be written to disk. */
}
void XMLDocShredder::processEndDocument()
{
/* tell the storage interface that document parsing has finished, and structures
* can now be written to disk. */
- // storageIfc_->nodeFinished();
+ storageIfc_->newText("");
+ storageIfc_->nodeFinished("");
storageIfc_->parsingFinished();
}
storageIfc_->parsingFinished();
}
@@
-237,10
+231,10
@@
void XMLDocShredder::processCDATASection()
* model. Instead, we simply pass the converted text value to the storage interface as
* a text node attached to the current context node.
*/
* model. Instead, we simply pass the converted text value to the storage interface as
* a text node attached to the current context node.
*/
+
ustring value = reader_->get_value();
ustring value = reader_->get_value();
- storageIfc_->newChild("<$>");
storageIfc_->newText(value);
storageIfc_->newText(value);
-
// storageIfc_->nodeFinished()
;
+
last_text = true
;
}
}