Replaced OCamlBackend
authorkim <kim@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Tue, 25 Nov 2008 04:03:42 +0000 (04:03 +0000)
committerkim <kim@3cdefd35-fc62-479d-8e8d-bae585ffb9ca>
Tue, 25 Nov 2008 04:03:42 +0000 (04:03 +0000)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@13 3cdefd35-fc62-479d-8e8d-bae585ffb9ca

Makefile
StorageInterface.cpp
StorageInterface.h
XMLDocShredder.cpp
XMLDocShredder.h
tree.ml

index d0510b9..b7084f0 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -11,7 +11,7 @@ OCAMLPACKAGES = unix,ulex,camlp4
 
 PPINCLUDES=$(OCAMLINCLUDES:%=-ppopt %)
 
-CXXSOURCES =  XMLDocShredder.cpp OCamlStorageInterface.cpp StorageInterface.cpp OCamlDriver.cpp
+CXXSOURCES =  XMLDocShredder.cpp SXSIStorageInterface.cpp StorageInterface.cpp OCamlDriver.cpp
 CXXOBJECTS = $(CXXSOURCES:.cpp=.o)
 
 CXXINCLUDES =  \
@@ -24,17 +24,22 @@ CXXINCLUDES =       \
        -I/usr/lib/glibmm-2.4/include \
        -I/usr/lib/sigc++-2.0/include \
        -I/usr/lib/glib-2.0/include\
-       -I`ocamlc -where`
+       -I`ocamlc -where`\
+       -IXMLTree \
+       -IXMLTree/libcds/includes \
+       -IXMLTree/TextCollection 
 
 CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC -std=c++0x
-CXX = g++
 
 ifeq ($(DEBUG), true)
+CXX = g++ -DDEBUG
 OCAMLOPT = ocamlopt -g -cc "$(CXX)" 
 SYNT_DEBUG = -ppopt -DDEBUG
 else
+CXX = g++
 OCAMLOPT = ocamlopt -cc "$(CXX)" -noassert -inline 10000
 endif
+
 ifeq ($(PROFILE), true)
 SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE
 endif
@@ -48,7 +53,7 @@ SYNTAX= -syntax camlp4o $(PPINCLUDES) -ppopt pa_macro.cmo $(SYNT_PROF)
 
 
 
-LIBS= -lxml2 -lxml++-2.6 -lglibmm-2.4 -lgobject-2.0 -lglib-2.0 -lsigc-2.0
+LIBS=-lxml2 -lxml++-2.6 -lglibmm-2.4 -lgobject-2.0 -lglib-2.0 -lsigc-2.0 
 
 all: libcamlshredder.a  $(MLOBJS)
        $(OCAMLFIND) $(LINK) -o main -package "$(OCAMLPACKAGES)" $(SYNTAX) -cclib \
@@ -65,13 +70,16 @@ all: libcamlshredder.a  $(MLOBJS)
        $(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)"  $(SYNTAX) -c $<
 
 libcamlshredder.a: $(CXXOBJECTS)
-       $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) $(LIBS)
+       mkdir -p .libs/
+       cd .libs/ && ar x ../XMLTree/XMLTree.a
+       $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) ./.libs/*.o $(LIBS)
+       rm -rf .libs
 
 clean:
-       rm -f *~ *.cm* *.[oa] *.so main
+       rm -f *~ *.cm* *.[oa] *.so main .libs
 
 
-OCamlStorageInterface.o: OCamlStorageInterface.h OCamlStorageInterface.cpp StorageInterface.h
+SXSIStorageInterface.o: SXSIStorageInterface.h SXSIStorageInterface.cpp StorageInterface.h
 StorageInterface.o: StorageInterface.h
 XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp OCamlStorageInterface.h StorageInterface.h
 OCamlDriver.o: XMLDocShredder.h StorageInterface.h
index 821c397..4953b13 100644 (file)
@@ -13,4 +13,4 @@
 StorageInterface::~StorageInterface()
 {
        
-}
\ No newline at end of file
+}
index c6f94aa..5af866f 100644 (file)
@@ -20,13 +20,16 @@ using namespace std;
 
 class StorageInterface
 {
-public:
+ public:
        virtual ~StorageInterface();
        virtual void newChild(string name) = 0;
        virtual void newText(string text) = 0;
-       virtual void nodeFinished() = 0;
+       virtual void nodeFinished(string name) = 0;
        virtual void parsingFinished() = 0;
        virtual void* returnDocument() = 0;
+ private:
+       
+
 };
 
 #endif /*STORAGEINTERFACE_H_*/
index 6e9c41a..f81251e 100644 (file)
@@ -18,7 +18,7 @@
 
 #include <iostream>
 #include "XMLDocShredder.h"
-#include "OCamlStorageInterface.h"
+#include "SXSIStorageInterface.h"
 #include <libxml++/exceptions/parse_error.h>
 #include "Utils.h"
 
@@ -55,20 +55,19 @@ void XMLDocShredder::setProperties(){
 XMLDocShredder::XMLDocShredder(const unsigned char * data,
                               TextReader::size_type size)                      
 {
+  last_text = false;
   reader_ = new TextReader(data,size,"");
   setProperties();
-  storageIfc_ = new OCamlStorageInterface();
-  //tagsID_ = new unordered_map<int,string>(107);
-  //idTags_ = new unordered_map<string,int>(107);
+  storageIfc_ = new SXSIStorageInterface();
 }
 
 XMLDocShredder::XMLDocShredder(const string inFileName)
 {
+  last_text = false;
   reader_ = new TextReader(inFileName);
   setProperties();
-  storageIfc_ = new OCamlStorageInterface();
-  //  tagsID_ = new unordered_map<int,string>(107);
-  // idTags_ = new unordered_map<string,int>(107);
+  storageIfc_ = new SXSIStorageInterface();
+
 }
 
 XMLDocShredder::~XMLDocShredder()
@@ -78,23 +77,16 @@ XMLDocShredder::~XMLDocShredder()
 
 }
 
-int XMLDocShredder::tagID(string name)
-{
-  int res = tagsID_[name];
-  return  res;
-}
-string XMLDocShredder::idTag(int id)
-{
-
-  return  idTags_[id];
-}
-
 
 void XMLDocShredder::processStartElement()
 {
        // fetch element name; this will be the full qualified name
        ustring name = reader_->get_name();
        bool empty = false;
+       
+       if (!last_text)
+         storageIfc_->newText(""); //prevText
+       last_text = false;
 
        storageIfc_->newChild(name);
 
@@ -114,7 +106,9 @@ void XMLDocShredder::processStartElement()
        
        if (empty){
          DPRINT("Node " << name <<" is empty!\n")
-           storageIfc_->nodeFinished();
+           storageIfc_->newText("");  //myText
+           storageIfc_->nodeFinished(name);
+           storageIfc_->newText("");  //nextText
        };
 
 
@@ -125,18 +119,25 @@ void XMLDocShredder::processStartElement()
 
 void XMLDocShredder::processEndElement()
 {
-       // tell the storage interface that the current node has been completely processed
-       storageIfc_->nodeFinished();
+  // tell the storage interface that the current node has been completely processed
+  if (!last_text)
+    storageIfc_->newText(""); //nextText of previous node
+  last_text = false;
+  storageIfc_->nodeFinished(reader_->get_name());
 }
 
 void XMLDocShredder::processPCDATA()
 {
        // send the content of this PCDATA node to the storage interface as a text node
+         
        if (reader_->has_value())
        {         
          storageIfc_->newChild("<$>");
          storageIfc_->newText(reader_->get_value());
+         last_text = true;
        }
+       else 
+         storageIfc_->newText("");
 }
 
 void XMLDocShredder::processAttributes()
@@ -157,7 +158,7 @@ void XMLDocShredder::processAttributes()
                if ((name.find(nspaceStr.c_str(), 0, 5)) == 0)
                {
                        storageIfc_->newChild(":" + value);
-                       storageIfc_->nodeFinished();    
+                       storageIfc_->nodeFinished(":" + value); 
                }
                
                /* otherwise, this is an ordinary attribute, so we construct a new child node of the 
@@ -170,12 +171,11 @@ void XMLDocShredder::processAttributes()
                        storageIfc_->newChild(name);
                        storageIfc_->newChild("<$>");
                        storageIfc_->newText(value);
-                       storageIfc_->nodeFinished();
-                       //                      storageIfc_->nodeFinished();
+                       storageIfc_->nodeFinished("<$>");
                }
        }
        while (reader_->move_to_next_attribute());
-       storageIfc_->nodeFinished();
+       storageIfc_->nodeFinished("<@>");
 }
 
 void XMLDocShredder::processSignificantWhitespace()
@@ -185,21 +185,22 @@ void XMLDocShredder::processSignificantWhitespace()
        // each significant whitespace sequence constructs a text node
        storageIfc_->newChild("<$>");
        storageIfc_->newText(value);
-       //storageIfc_->nodeFinished();
+
        
 }
 
 void XMLDocShredder::processStartDocument(const string docName)
 {
   // tell storage interface to construct the document name
-  //  storageIfc_->newChild("");  
+  storageIfc_->newChild("ROOT");  
+  
 }
 
 void XMLDocShredder::processEndDocument()
 {
        /* tell the storage interface that document parsing has finished, and structures
         * can now be written to disk. */
-  //  storageIfc_->nodeFinished();
+  storageIfc_->nodeFinished("ROOT");
   storageIfc_->parsingFinished();      
 }
 
@@ -237,9 +238,11 @@ void XMLDocShredder::processCDATASection()
         * model.  Instead, we simply pass the converted text value to the storage interface as 
         * a text node attached to the current context node.
         */
+  
        ustring value = reader_->get_value();
        storageIfc_->newChild("<$>");
        storageIfc_->newText(value);
+       last_text = true;
        //      storageIfc_->nodeFinished();
 
 }
index 19ed10f..66a96b5 100644 (file)
@@ -45,8 +45,6 @@ public:
        virtual void processUnknownNodeType();
        virtual void processCDATASection();
        virtual void parse();
-       virtual int tagID(string);
-       virtual string idTag(int);
        
        StorageInterface *storageIfc_;
 
@@ -54,8 +52,7 @@ public:
 private:
        TextReader *reader_;
        void setProperties();
-       unordered_map<int,string> idTags_;
-       unordered_map<string,int> tagsID_;
+       bool last_text;
 };
 
 #endif /*XMLDOCSHREDDER_H_*/
diff --git a/tree.ml b/tree.ml
index 3ebd9ad..aa35214 100644 (file)
--- a/tree.ml
+++ b/tree.ml
@@ -23,7 +23,7 @@ sig
   val equal : t -> t -> bool
 end
 
-module Binary = 
+module OldBinary = 
 struct
 
   type string_content = string
@@ -227,11 +227,11 @@ struct
         (* abstract type, values are pointers to a XMLTree C++ object *)
     
     
-  external parse_xml_uri : bool -> float ->  string  -> t = "caml_call_shredder_uri"
-  let parse_xml_uri  uri  = parse_xml_uri true 1.0 uri
+  external parse_xml_uri : string  -> t = "caml_call_shredder_uri"
+  let parse_xml_uri uri = parse_xml_uri uri
     
-  external parse_xml_string : bool -> float ->  string  -> t = "caml_call_shredder_string"
-  let parse_xml_string  = parse_xml_string true 1.0
+  external parse_xml_string :  string  -> t = "caml_call_shredder_string"
+  let parse_xml_string uri = parse_xml_string uri
     
 
   module Text =
@@ -416,3 +416,4 @@ struct
   end
     
 end
+include XML