Replaced OCamlBackend
[SXSI/xpathcomp.git] / XMLDocShredder.cpp
index 6e9c41a..f81251e 100644 (file)
@@ -18,7 +18,7 @@
 
 #include <iostream>
 #include "XMLDocShredder.h"
-#include "OCamlStorageInterface.h"
+#include "SXSIStorageInterface.h"
 #include <libxml++/exceptions/parse_error.h>
 #include "Utils.h"
 
@@ -55,20 +55,19 @@ void XMLDocShredder::setProperties(){
 XMLDocShredder::XMLDocShredder(const unsigned char * data,
                               TextReader::size_type size)                      
 {
+  last_text = false;
   reader_ = new TextReader(data,size,"");
   setProperties();
-  storageIfc_ = new OCamlStorageInterface();
-  //tagsID_ = new unordered_map<int,string>(107);
-  //idTags_ = new unordered_map<string,int>(107);
+  storageIfc_ = new SXSIStorageInterface();
 }
 
 XMLDocShredder::XMLDocShredder(const string inFileName)
 {
+  last_text = false;
   reader_ = new TextReader(inFileName);
   setProperties();
-  storageIfc_ = new OCamlStorageInterface();
-  //  tagsID_ = new unordered_map<int,string>(107);
-  // idTags_ = new unordered_map<string,int>(107);
+  storageIfc_ = new SXSIStorageInterface();
+
 }
 
 XMLDocShredder::~XMLDocShredder()
@@ -78,23 +77,16 @@ XMLDocShredder::~XMLDocShredder()
 
 }
 
-int XMLDocShredder::tagID(string name)
-{
-  int res = tagsID_[name];
-  return  res;
-}
-string XMLDocShredder::idTag(int id)
-{
-
-  return  idTags_[id];
-}
-
 
 void XMLDocShredder::processStartElement()
 {
        // fetch element name; this will be the full qualified name
        ustring name = reader_->get_name();
        bool empty = false;
+       
+       if (!last_text)
+         storageIfc_->newText(""); //prevText
+       last_text = false;
 
        storageIfc_->newChild(name);
 
@@ -114,7 +106,9 @@ void XMLDocShredder::processStartElement()
        
        if (empty){
          DPRINT("Node " << name <<" is empty!\n")
-           storageIfc_->nodeFinished();
+           storageIfc_->newText("");  //myText
+           storageIfc_->nodeFinished(name);
+           storageIfc_->newText("");  //nextText
        };
 
 
@@ -125,18 +119,25 @@ void XMLDocShredder::processStartElement()
 
 void XMLDocShredder::processEndElement()
 {
-       // tell the storage interface that the current node has been completely processed
-       storageIfc_->nodeFinished();
+  // tell the storage interface that the current node has been completely processed
+  if (!last_text)
+    storageIfc_->newText(""); //nextText of previous node
+  last_text = false;
+  storageIfc_->nodeFinished(reader_->get_name());
 }
 
 void XMLDocShredder::processPCDATA()
 {
        // send the content of this PCDATA node to the storage interface as a text node
+         
        if (reader_->has_value())
        {         
          storageIfc_->newChild("<$>");
          storageIfc_->newText(reader_->get_value());
+         last_text = true;
        }
+       else 
+         storageIfc_->newText("");
 }
 
 void XMLDocShredder::processAttributes()
@@ -157,7 +158,7 @@ void XMLDocShredder::processAttributes()
                if ((name.find(nspaceStr.c_str(), 0, 5)) == 0)
                {
                        storageIfc_->newChild(":" + value);
-                       storageIfc_->nodeFinished();    
+                       storageIfc_->nodeFinished(":" + value); 
                }
                
                /* otherwise, this is an ordinary attribute, so we construct a new child node of the 
@@ -170,12 +171,11 @@ void XMLDocShredder::processAttributes()
                        storageIfc_->newChild(name);
                        storageIfc_->newChild("<$>");
                        storageIfc_->newText(value);
-                       storageIfc_->nodeFinished();
-                       //                      storageIfc_->nodeFinished();
+                       storageIfc_->nodeFinished("<$>");
                }
        }
        while (reader_->move_to_next_attribute());
-       storageIfc_->nodeFinished();
+       storageIfc_->nodeFinished("<@>");
 }
 
 void XMLDocShredder::processSignificantWhitespace()
@@ -185,21 +185,22 @@ void XMLDocShredder::processSignificantWhitespace()
        // each significant whitespace sequence constructs a text node
        storageIfc_->newChild("<$>");
        storageIfc_->newText(value);
-       //storageIfc_->nodeFinished();
+
        
 }
 
 void XMLDocShredder::processStartDocument(const string docName)
 {
   // tell storage interface to construct the document name
-  //  storageIfc_->newChild("");  
+  storageIfc_->newChild("ROOT");  
+  
 }
 
 void XMLDocShredder::processEndDocument()
 {
        /* tell the storage interface that document parsing has finished, and structures
         * can now be written to disk. */
-  //  storageIfc_->nodeFinished();
+  storageIfc_->nodeFinished("ROOT");
   storageIfc_->parsingFinished();      
 }
 
@@ -237,9 +238,11 @@ void XMLDocShredder::processCDATASection()
         * model.  Instead, we simply pass the converted text value to the storage interface as 
         * a text node attached to the current context node.
         */
+  
        ustring value = reader_->get_value();
        storageIfc_->newChild("<$>");
        storageIfc_->newText(value);
+       last_text = true;
        //      storageIfc_->nodeFinished();
 
 }