From: kim Date: Tue, 25 Nov 2008 04:03:42 +0000 (+0000) Subject: Replaced OCamlBackend X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=77294fdd983393727bcee7fda52dacdaf943e585;p=SXSI%2Fxpathcomp.git Replaced OCamlBackend git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@13 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index d0510b9..b7084f0 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ OCAMLPACKAGES = unix,ulex,camlp4 PPINCLUDES=$(OCAMLINCLUDES:%=-ppopt %) -CXXSOURCES = XMLDocShredder.cpp OCamlStorageInterface.cpp StorageInterface.cpp OCamlDriver.cpp +CXXSOURCES = XMLDocShredder.cpp SXSIStorageInterface.cpp StorageInterface.cpp OCamlDriver.cpp CXXOBJECTS = $(CXXSOURCES:.cpp=.o) CXXINCLUDES = \ @@ -24,17 +24,22 @@ CXXINCLUDES = \ -I/usr/lib/glibmm-2.4/include \ -I/usr/lib/sigc++-2.0/include \ -I/usr/lib/glib-2.0/include\ - -I`ocamlc -where` + -I`ocamlc -where`\ + -IXMLTree \ + -IXMLTree/libcds/includes \ + -IXMLTree/TextCollection CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC -std=c++0x -CXX = g++ ifeq ($(DEBUG), true) +CXX = g++ -DDEBUG OCAMLOPT = ocamlopt -g -cc "$(CXX)" SYNT_DEBUG = -ppopt -DDEBUG else +CXX = g++ OCAMLOPT = ocamlopt -cc "$(CXX)" -noassert -inline 10000 endif + ifeq ($(PROFILE), true) SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE endif @@ -48,7 +53,7 @@ SYNTAX= -syntax camlp4o $(PPINCLUDES) -ppopt pa_macro.cmo $(SYNT_PROF) -LIBS= -lxml2 -lxml++-2.6 -lglibmm-2.4 -lgobject-2.0 -lglib-2.0 -lsigc-2.0 +LIBS=-lxml2 -lxml++-2.6 -lglibmm-2.4 -lgobject-2.0 -lglib-2.0 -lsigc-2.0 all: libcamlshredder.a $(MLOBJS) $(OCAMLFIND) $(LINK) -o main -package "$(OCAMLPACKAGES)" $(SYNTAX) -cclib \ @@ -65,13 +70,16 @@ all: libcamlshredder.a $(MLOBJS) $(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $< libcamlshredder.a: $(CXXOBJECTS) - $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) $(LIBS) + mkdir -p .libs/ + cd .libs/ && ar x ../XMLTree/XMLTree.a + $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) ./.libs/*.o $(LIBS) + rm -rf .libs clean: - rm -f *~ *.cm* *.[oa] *.so main + rm -f *~ *.cm* *.[oa] *.so main .libs -OCamlStorageInterface.o: OCamlStorageInterface.h OCamlStorageInterface.cpp StorageInterface.h +SXSIStorageInterface.o: SXSIStorageInterface.h SXSIStorageInterface.cpp StorageInterface.h StorageInterface.o: StorageInterface.h XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp OCamlStorageInterface.h StorageInterface.h OCamlDriver.o: XMLDocShredder.h StorageInterface.h diff --git a/StorageInterface.cpp b/StorageInterface.cpp index 821c397..4953b13 100644 --- a/StorageInterface.cpp +++ b/StorageInterface.cpp @@ -13,4 +13,4 @@ StorageInterface::~StorageInterface() { -} \ No newline at end of file +} diff --git a/StorageInterface.h b/StorageInterface.h index c6f94aa..5af866f 100644 --- a/StorageInterface.h +++ b/StorageInterface.h @@ -20,13 +20,16 @@ using namespace std; class StorageInterface { -public: + public: virtual ~StorageInterface(); virtual void newChild(string name) = 0; virtual void newText(string text) = 0; - virtual void nodeFinished() = 0; + virtual void nodeFinished(string name) = 0; virtual void parsingFinished() = 0; virtual void* returnDocument() = 0; + private: + + }; #endif /*STORAGEINTERFACE_H_*/ diff --git a/XMLDocShredder.cpp b/XMLDocShredder.cpp index 6e9c41a..f81251e 100644 --- a/XMLDocShredder.cpp +++ b/XMLDocShredder.cpp @@ -18,7 +18,7 @@ #include #include "XMLDocShredder.h" -#include "OCamlStorageInterface.h" +#include "SXSIStorageInterface.h" #include #include "Utils.h" @@ -55,20 +55,19 @@ void XMLDocShredder::setProperties(){ XMLDocShredder::XMLDocShredder(const unsigned char * data, TextReader::size_type size) { + last_text = false; reader_ = new TextReader(data,size,""); setProperties(); - storageIfc_ = new OCamlStorageInterface(); - //tagsID_ = new unordered_map(107); - //idTags_ = new unordered_map(107); + storageIfc_ = new SXSIStorageInterface(); } XMLDocShredder::XMLDocShredder(const string inFileName) { + last_text = false; reader_ = new TextReader(inFileName); setProperties(); - storageIfc_ = new OCamlStorageInterface(); - // tagsID_ = new unordered_map(107); - // idTags_ = new unordered_map(107); + storageIfc_ = new SXSIStorageInterface(); + } XMLDocShredder::~XMLDocShredder() @@ -78,23 +77,16 @@ XMLDocShredder::~XMLDocShredder() } -int XMLDocShredder::tagID(string name) -{ - int res = tagsID_[name]; - return res; -} -string XMLDocShredder::idTag(int id) -{ - - return idTags_[id]; -} - void XMLDocShredder::processStartElement() { // fetch element name; this will be the full qualified name ustring name = reader_->get_name(); bool empty = false; + + if (!last_text) + storageIfc_->newText(""); //prevText + last_text = false; storageIfc_->newChild(name); @@ -114,7 +106,9 @@ void XMLDocShredder::processStartElement() if (empty){ DPRINT("Node " << name <<" is empty!\n") - storageIfc_->nodeFinished(); + storageIfc_->newText(""); //myText + storageIfc_->nodeFinished(name); + storageIfc_->newText(""); //nextText }; @@ -125,18 +119,25 @@ void XMLDocShredder::processStartElement() void XMLDocShredder::processEndElement() { - // tell the storage interface that the current node has been completely processed - storageIfc_->nodeFinished(); + // tell the storage interface that the current node has been completely processed + if (!last_text) + storageIfc_->newText(""); //nextText of previous node + last_text = false; + storageIfc_->nodeFinished(reader_->get_name()); } void XMLDocShredder::processPCDATA() { // send the content of this PCDATA node to the storage interface as a text node + if (reader_->has_value()) { storageIfc_->newChild("<$>"); storageIfc_->newText(reader_->get_value()); + last_text = true; } + else + storageIfc_->newText(""); } void XMLDocShredder::processAttributes() @@ -157,7 +158,7 @@ void XMLDocShredder::processAttributes() if ((name.find(nspaceStr.c_str(), 0, 5)) == 0) { storageIfc_->newChild(":" + value); - storageIfc_->nodeFinished(); + storageIfc_->nodeFinished(":" + value); } /* otherwise, this is an ordinary attribute, so we construct a new child node of the @@ -170,12 +171,11 @@ void XMLDocShredder::processAttributes() storageIfc_->newChild(name); storageIfc_->newChild("<$>"); storageIfc_->newText(value); - storageIfc_->nodeFinished(); - // storageIfc_->nodeFinished(); + storageIfc_->nodeFinished("<$>"); } } while (reader_->move_to_next_attribute()); - storageIfc_->nodeFinished(); + storageIfc_->nodeFinished("<@>"); } void XMLDocShredder::processSignificantWhitespace() @@ -185,21 +185,22 @@ void XMLDocShredder::processSignificantWhitespace() // each significant whitespace sequence constructs a text node storageIfc_->newChild("<$>"); storageIfc_->newText(value); - //storageIfc_->nodeFinished(); + } void XMLDocShredder::processStartDocument(const string docName) { // tell storage interface to construct the document name - // storageIfc_->newChild(""); + storageIfc_->newChild("ROOT"); + } void XMLDocShredder::processEndDocument() { /* tell the storage interface that document parsing has finished, and structures * can now be written to disk. */ - // storageIfc_->nodeFinished(); + storageIfc_->nodeFinished("ROOT"); storageIfc_->parsingFinished(); } @@ -237,9 +238,11 @@ void XMLDocShredder::processCDATASection() * model. Instead, we simply pass the converted text value to the storage interface as * a text node attached to the current context node. */ + ustring value = reader_->get_value(); storageIfc_->newChild("<$>"); storageIfc_->newText(value); + last_text = true; // storageIfc_->nodeFinished(); } diff --git a/XMLDocShredder.h b/XMLDocShredder.h index 19ed10f..66a96b5 100644 --- a/XMLDocShredder.h +++ b/XMLDocShredder.h @@ -45,8 +45,6 @@ public: virtual void processUnknownNodeType(); virtual void processCDATASection(); virtual void parse(); - virtual int tagID(string); - virtual string idTag(int); StorageInterface *storageIfc_; @@ -54,8 +52,7 @@ public: private: TextReader *reader_; void setProperties(); - unordered_map idTags_; - unordered_map tagsID_; + bool last_text; }; #endif /*XMLDOCSHREDDER_H_*/ diff --git a/tree.ml b/tree.ml index 3ebd9ad..aa35214 100644 --- a/tree.ml +++ b/tree.ml @@ -23,7 +23,7 @@ sig val equal : t -> t -> bool end -module Binary = +module OldBinary = struct type string_content = string @@ -227,11 +227,11 @@ struct (* abstract type, values are pointers to a XMLTree C++ object *) - external parse_xml_uri : bool -> float -> string -> t = "caml_call_shredder_uri" - let parse_xml_uri uri = parse_xml_uri true 1.0 uri + external parse_xml_uri : string -> t = "caml_call_shredder_uri" + let parse_xml_uri uri = parse_xml_uri uri - external parse_xml_string : bool -> float -> string -> t = "caml_call_shredder_string" - let parse_xml_string = parse_xml_string true 1.0 + external parse_xml_string : string -> t = "caml_call_shredder_string" + let parse_xml_string uri = parse_xml_string uri module Text = @@ -416,3 +416,4 @@ struct end end +include XML