summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
280fbeb)
git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@13
3cdefd35-fc62-479d-8e8d-
bae585ffb9ca
PPINCLUDES=$(OCAMLINCLUDES:%=-ppopt %)
PPINCLUDES=$(OCAMLINCLUDES:%=-ppopt %)
-CXXSOURCES = XMLDocShredder.cpp OCamlStorageInterface.cpp StorageInterface.cpp OCamlDriver.cpp
+CXXSOURCES = XMLDocShredder.cpp SXSIStorageInterface.cpp StorageInterface.cpp OCamlDriver.cpp
CXXOBJECTS = $(CXXSOURCES:.cpp=.o)
CXXINCLUDES = \
CXXOBJECTS = $(CXXSOURCES:.cpp=.o)
CXXINCLUDES = \
-I/usr/lib/glibmm-2.4/include \
-I/usr/lib/sigc++-2.0/include \
-I/usr/lib/glib-2.0/include\
-I/usr/lib/glibmm-2.4/include \
-I/usr/lib/sigc++-2.0/include \
-I/usr/lib/glib-2.0/include\
+ -I`ocamlc -where`\
+ -IXMLTree \
+ -IXMLTree/libcds/includes \
+ -IXMLTree/TextCollection
CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC -std=c++0x
CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC -std=c++0x
OCAMLOPT = ocamlopt -g -cc "$(CXX)"
SYNT_DEBUG = -ppopt -DDEBUG
else
OCAMLOPT = ocamlopt -g -cc "$(CXX)"
SYNT_DEBUG = -ppopt -DDEBUG
else
OCAMLOPT = ocamlopt -cc "$(CXX)" -noassert -inline 10000
endif
OCAMLOPT = ocamlopt -cc "$(CXX)" -noassert -inline 10000
endif
ifeq ($(PROFILE), true)
SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE
endif
ifeq ($(PROFILE), true)
SYNT_PROF = $(SYNT_DEBUG) -ppopt -DPROFILE
endif
-LIBS= -lxml2 -lxml++-2.6 -lglibmm-2.4 -lgobject-2.0 -lglib-2.0 -lsigc-2.0
+LIBS=-lxml2 -lxml++-2.6 -lglibmm-2.4 -lgobject-2.0 -lglib-2.0 -lsigc-2.0
all: libcamlshredder.a $(MLOBJS)
$(OCAMLFIND) $(LINK) -o main -package "$(OCAMLPACKAGES)" $(SYNTAX) -cclib \
all: libcamlshredder.a $(MLOBJS)
$(OCAMLFIND) $(LINK) -o main -package "$(OCAMLPACKAGES)" $(SYNTAX) -cclib \
$(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $<
libcamlshredder.a: $(CXXOBJECTS)
$(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $<
libcamlshredder.a: $(CXXOBJECTS)
- $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) $(LIBS)
+ mkdir -p .libs/
+ cd .libs/ && ar x ../XMLTree/XMLTree.a
+ $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) ./.libs/*.o $(LIBS)
+ rm -rf .libs
- rm -f *~ *.cm* *.[oa] *.so main
+ rm -f *~ *.cm* *.[oa] *.so main .libs
-OCamlStorageInterface.o: OCamlStorageInterface.h OCamlStorageInterface.cpp StorageInterface.h
+SXSIStorageInterface.o: SXSIStorageInterface.h SXSIStorageInterface.cpp StorageInterface.h
StorageInterface.o: StorageInterface.h
XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp OCamlStorageInterface.h StorageInterface.h
OCamlDriver.o: XMLDocShredder.h StorageInterface.h
StorageInterface.o: StorageInterface.h
XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp OCamlStorageInterface.h StorageInterface.h
OCamlDriver.o: XMLDocShredder.h StorageInterface.h
StorageInterface::~StorageInterface()
{
StorageInterface::~StorageInterface()
{
-}
\ No newline at end of file
virtual ~StorageInterface();
virtual void newChild(string name) = 0;
virtual void newText(string text) = 0;
virtual ~StorageInterface();
virtual void newChild(string name) = 0;
virtual void newText(string text) = 0;
- virtual void nodeFinished() = 0;
+ virtual void nodeFinished(string name) = 0;
virtual void parsingFinished() = 0;
virtual void* returnDocument() = 0;
virtual void parsingFinished() = 0;
virtual void* returnDocument() = 0;
};
#endif /*STORAGEINTERFACE_H_*/
};
#endif /*STORAGEINTERFACE_H_*/
#include <iostream>
#include "XMLDocShredder.h"
#include <iostream>
#include "XMLDocShredder.h"
-#include "OCamlStorageInterface.h"
+#include "SXSIStorageInterface.h"
#include <libxml++/exceptions/parse_error.h>
#include "Utils.h"
#include <libxml++/exceptions/parse_error.h>
#include "Utils.h"
XMLDocShredder::XMLDocShredder(const unsigned char * data,
TextReader::size_type size)
{
XMLDocShredder::XMLDocShredder(const unsigned char * data,
TextReader::size_type size)
{
reader_ = new TextReader(data,size,"");
setProperties();
reader_ = new TextReader(data,size,"");
setProperties();
- storageIfc_ = new OCamlStorageInterface();
- //tagsID_ = new unordered_map<int,string>(107);
- //idTags_ = new unordered_map<string,int>(107);
+ storageIfc_ = new SXSIStorageInterface();
}
XMLDocShredder::XMLDocShredder(const string inFileName)
{
}
XMLDocShredder::XMLDocShredder(const string inFileName)
{
reader_ = new TextReader(inFileName);
setProperties();
reader_ = new TextReader(inFileName);
setProperties();
- storageIfc_ = new OCamlStorageInterface();
- // tagsID_ = new unordered_map<int,string>(107);
- // idTags_ = new unordered_map<string,int>(107);
+ storageIfc_ = new SXSIStorageInterface();
+
}
XMLDocShredder::~XMLDocShredder()
}
XMLDocShredder::~XMLDocShredder()
-int XMLDocShredder::tagID(string name)
-{
- int res = tagsID_[name];
- return res;
-}
-string XMLDocShredder::idTag(int id)
-{
-
- return idTags_[id];
-}
-
void XMLDocShredder::processStartElement()
{
// fetch element name; this will be the full qualified name
ustring name = reader_->get_name();
bool empty = false;
void XMLDocShredder::processStartElement()
{
// fetch element name; this will be the full qualified name
ustring name = reader_->get_name();
bool empty = false;
+
+ if (!last_text)
+ storageIfc_->newText(""); //prevText
+ last_text = false;
storageIfc_->newChild(name);
storageIfc_->newChild(name);
if (empty){
DPRINT("Node " << name <<" is empty!\n")
if (empty){
DPRINT("Node " << name <<" is empty!\n")
- storageIfc_->nodeFinished();
+ storageIfc_->newText(""); //myText
+ storageIfc_->nodeFinished(name);
+ storageIfc_->newText(""); //nextText
void XMLDocShredder::processEndElement()
{
void XMLDocShredder::processEndElement()
{
- // tell the storage interface that the current node has been completely processed
- storageIfc_->nodeFinished();
+ // tell the storage interface that the current node has been completely processed
+ if (!last_text)
+ storageIfc_->newText(""); //nextText of previous node
+ last_text = false;
+ storageIfc_->nodeFinished(reader_->get_name());
}
void XMLDocShredder::processPCDATA()
{
// send the content of this PCDATA node to the storage interface as a text node
}
void XMLDocShredder::processPCDATA()
{
// send the content of this PCDATA node to the storage interface as a text node
if (reader_->has_value())
{
storageIfc_->newChild("<$>");
storageIfc_->newText(reader_->get_value());
if (reader_->has_value())
{
storageIfc_->newChild("<$>");
storageIfc_->newText(reader_->get_value());
+ else
+ storageIfc_->newText("");
}
void XMLDocShredder::processAttributes()
}
void XMLDocShredder::processAttributes()
if ((name.find(nspaceStr.c_str(), 0, 5)) == 0)
{
storageIfc_->newChild(":" + value);
if ((name.find(nspaceStr.c_str(), 0, 5)) == 0)
{
storageIfc_->newChild(":" + value);
- storageIfc_->nodeFinished();
+ storageIfc_->nodeFinished(":" + value);
}
/* otherwise, this is an ordinary attribute, so we construct a new child node of the
}
/* otherwise, this is an ordinary attribute, so we construct a new child node of the
storageIfc_->newChild(name);
storageIfc_->newChild("<$>");
storageIfc_->newText(value);
storageIfc_->newChild(name);
storageIfc_->newChild("<$>");
storageIfc_->newText(value);
- storageIfc_->nodeFinished();
- // storageIfc_->nodeFinished();
+ storageIfc_->nodeFinished("<$>");
}
}
while (reader_->move_to_next_attribute());
}
}
while (reader_->move_to_next_attribute());
- storageIfc_->nodeFinished();
+ storageIfc_->nodeFinished("<@>");
}
void XMLDocShredder::processSignificantWhitespace()
}
void XMLDocShredder::processSignificantWhitespace()
// each significant whitespace sequence constructs a text node
storageIfc_->newChild("<$>");
storageIfc_->newText(value);
// each significant whitespace sequence constructs a text node
storageIfc_->newChild("<$>");
storageIfc_->newText(value);
- //storageIfc_->nodeFinished();
}
void XMLDocShredder::processStartDocument(const string docName)
{
// tell storage interface to construct the document name
}
void XMLDocShredder::processStartDocument(const string docName)
{
// tell storage interface to construct the document name
- // storageIfc_->newChild("");
+ storageIfc_->newChild("ROOT");
+
}
void XMLDocShredder::processEndDocument()
{
/* tell the storage interface that document parsing has finished, and structures
* can now be written to disk. */
}
void XMLDocShredder::processEndDocument()
{
/* tell the storage interface that document parsing has finished, and structures
* can now be written to disk. */
- // storageIfc_->nodeFinished();
+ storageIfc_->nodeFinished("ROOT");
storageIfc_->parsingFinished();
}
storageIfc_->parsingFinished();
}
* model. Instead, we simply pass the converted text value to the storage interface as
* a text node attached to the current context node.
*/
* model. Instead, we simply pass the converted text value to the storage interface as
* a text node attached to the current context node.
*/
ustring value = reader_->get_value();
storageIfc_->newChild("<$>");
storageIfc_->newText(value);
ustring value = reader_->get_value();
storageIfc_->newChild("<$>");
storageIfc_->newText(value);
// storageIfc_->nodeFinished();
}
// storageIfc_->nodeFinished();
}
virtual void processUnknownNodeType();
virtual void processCDATASection();
virtual void parse();
virtual void processUnknownNodeType();
virtual void processCDATASection();
virtual void parse();
- virtual int tagID(string);
- virtual string idTag(int);
StorageInterface *storageIfc_;
StorageInterface *storageIfc_;
private:
TextReader *reader_;
void setProperties();
private:
TextReader *reader_;
void setProperties();
- unordered_map<int,string> idTags_;
- unordered_map<string,int> tagsID_;
};
#endif /*XMLDOCSHREDDER_H_*/
};
#endif /*XMLDOCSHREDDER_H_*/
val equal : t -> t -> bool
end
val equal : t -> t -> bool
end
struct
type string_content = string
struct
type string_content = string
(* abstract type, values are pointers to a XMLTree C++ object *)
(* abstract type, values are pointers to a XMLTree C++ object *)
- external parse_xml_uri : bool -> float -> string -> t = "caml_call_shredder_uri"
- let parse_xml_uri uri = parse_xml_uri true 1.0 uri
+ external parse_xml_uri : string -> t = "caml_call_shredder_uri"
+ let parse_xml_uri uri = parse_xml_uri uri
- external parse_xml_string : bool -> float -> string -> t = "caml_call_shredder_string"
- let parse_xml_string = parse_xml_string true 1.0
+ external parse_xml_string : string -> t = "caml_call_shredder_string"
+ let parse_xml_string uri = parse_xml_string uri