From: kim Date: Tue, 27 Jan 2009 13:14:54 +0000 (+0000) Subject: - Removed the direct calls to TextCollection, use XMLTree wrapper instead X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=b821684aac2e3114c2eb28188020d7a09b5de2a5;p=SXSI%2Fxpathcomp.git - Removed the direct calls to TextCollection, use XMLTree wrapper instead - Fix the Makefile - Delete crutf git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@73 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index 4b37144..6b15fff 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ DEBUG=false PROFILE=true +VERBOSE=false MLSRCS = memory.ml tag.ml tagSet.ml tree.ml automaton.ml ulexer.ml xPath.ml main.ml MLISRCS = memory.mli automaton.mli tag.mli tagSet.mli tree.mli ulexer.mli xPath.mli @@ -29,7 +30,12 @@ CXXINCLUDES = \ -IXMLTree/libcds/includes \ -IXMLTree/TextCollection -CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC -std=c++0x +CXXFLAGS = -O3 -Wall $(INCLUDEDIRS) -fPIC +ifeq ($(VERBOSE),true) +HIDE= +else +HIDE=@ +endif ifeq ($(DEBUG), true) CXX = g++ -DDEBUG @@ -37,7 +43,7 @@ OCAMLOPT = ocamlopt -g -cc "$(CXX)" SYNT_DEBUG = -ppopt -DDEBUG else CXX = g++ -OCAMLOPT = ocamlopt -cc "$(CXX)" -noassert -inline 10000 +OCAMLOPT = ocamlopt -cc "$(CXX)" -noassert -inline 100 endif ifeq ($(PROFILE), true) @@ -56,28 +62,35 @@ SYNTAX= -syntax camlp4o $(PPINCLUDES) -ppopt pa_macro.cmo $(SYNT_PROF) LIBS=-lxml2 -lxml++-2.6 -lglibmm-2.4 -lgobject-2.0 -lglib-2.0 -lsigc-2.0 -all: version libcamlshredder.a $(MLOBJS) +all: main - $(OCAMLFIND) $(LINK) -o main -package "$(OCAMLPACKAGES)" $(SYNTAX) -cclib \ +main: libcamlshredder.a $(MLOBJS) + @echo [LINK] $@ + $(HIDE) $(OCAMLFIND) $(LINK) -o main -package "$(OCAMLPACKAGES)" $(SYNTAX) -cclib \ "$(LIBS) ./libcamlshredder.a" $(MLOBJS) .SUFFIXES: .ml .mli .cmx .cmi .cpp .PHONY:compute_depend version .cpp.o: - $(CXX) $(CXXINCLUDES) -c $(CXXFLAGS) $< + @echo [CPP] $@ + $(HIDE) $(CXX) $(CXXINCLUDES) -c $(CXXFLAGS) $< .ml.cmx: - $(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $< + @echo [OCAMLOPT] $@ + $(HIDE) $(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $< .mli.cmi: - $(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $< + @echo [OCAMLOPT] $@ + $(HIDE) $(OCAMLFIND) $(OCAMLOPT) -package "$(OCAMLPACKAGES)" $(SYNTAX) -c $< libcamlshredder.a: $(CXXOBJECTS) XMLTree/XMLTree.a - mkdir -p .libs/ - cd .libs/ && ar x ../XMLTree/XMLTree.a - $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) ./.libs/*.o $(LIBS) - rm -rf .libs + @echo [LIB] $@ + $(HIDE) mkdir -p .libs/ + $(HIDE) cd .libs/ && ar x ../XMLTree/XMLTree.a + $(HIDE) $(OCAMLMKLIB) -o camlshredder -custom $(CXXOBJECTS) ./.libs/*.o $(LIBS) + $(HIDE) rm -rf .libs clean: - rm -f *~ *.cm* *.[oa] *.so main .libs + @echo [CLEAN] + $(HIDE) rm -f *~ *.cm* *.[oa] *.so main .libs testSuccint: $(CXXOBJECTS) XMLTree/XMLTree.a @@ -90,12 +103,11 @@ $(LIBS) testSuccint.cpp SXSIStorageInterface.o: SXSIStorageInterface.h SXSIStorageInterface.cpp StorageInterface.h StorageInterface.o: StorageInterface.h -XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp OCamlStorageInterface.h StorageInterface.h +XMLDocShredder.o: XMLDocShredder.h XMLDocShredder.cpp SXSIStorageInterface.h StorageInterface.h OCamlDriver.o: XMLDocShredder.h StorageInterface.h compute_depend: - $(OCAMLFIND) $(OCAMLDEP) -package "$(OCAMLPACKAGES)" $(SYNTAX) $(MLSRCS) $(MLISRCS) >depend - - + @echo [DEP] + $(HIDE) $(OCAMLFIND) $(OCAMLDEP) -package "$(OCAMLPACKAGES)" $(SYNTAX) $(MLSRCS) $(MLISRCS) >depend include depend diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 636a952..d8f1933 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -17,7 +17,7 @@ extern "C" { #include } //extern C -#include "TextCollection/TextCollection.h" +//#include "TextCollection/TextCollection.h" #include "XMLDocShredder.h" #include "XMLTree.h" #include "Utils.h" @@ -71,7 +71,7 @@ void traversal_rec(XMLTree* tree, treeNode id){ DocID tid; if (id == NULLT) return; - int tag = tree->Tag(id); + // int tag = tree->Tag(id); if (id) { tid = tree->PrevText(id); char * data = (char *) (tree->getTextCollection())->GetText(tid); @@ -97,41 +97,39 @@ extern "C" CAMLprim value caml_cpp_traversal(value tree){ CAMLreturn(Val_unit); } - -extern "C" CAMLprim value caml_text_collection_get_text(value tc, value id){ - CAMLparam2(tc,id); - - const char* txt = (const char*) ((TextCollection*) tc)->GetText((DocID) Int_val(id)); +extern "C" CAMLprim value caml_text_collection_get_text(value tree, value id){ + CAMLparam2(tree,id); + const char* txt = (const char*) (XMLTREE(tree)->GetText((DocID) Int_val(id))); CAMLreturn (caml_copy_string(txt)); } -extern "C" CAMLprim value caml_text_collection_empty_text(value tc,value id){ - CAMLparam2(tc,id); - CAMLreturn ( Val_int(((TextCollection*) tc)->EmptyText((DocID) Int_val(id)))); +extern "C" CAMLprim value caml_text_collection_empty_text(value tree,value id){ + CAMLparam2(tree,id); + CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id)))); } -extern "C" CAMLprim value caml_text_collection_is_contains(value tc,value str){ - CAMLparam2(tc,str); +extern "C" CAMLprim value caml_text_collection_is_contains(value tree,value str){ + CAMLparam2(tree,str); uchar * cstr = (uchar *) String_val(str); - CAMLreturn ( Val_bool((int) ((TextCollection*) tc)->IsContains(cstr))); + CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr))); } -extern "C" CAMLprim value caml_text_collection_count_contains(value tc,value str){ - CAMLparam2(tc,str); +extern "C" CAMLprim value caml_text_collection_count_contains(value tree,value str){ + CAMLparam2(tree,str); uchar * cstr = (uchar *) String_val(str); - CAMLreturn ( Val_int(((TextCollection*) tc)->CountContains(cstr))); + CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr)))); } -extern "C" CAMLprim value caml_text_collection_contains(value tc,value str){ - CAMLparam2(tc,str); +extern "C" CAMLprim value caml_text_collection_contains(value tree,value str){ + CAMLparam2(tree,str); CAMLlocal1(resarray); uchar * cstr = (uchar *) String_val(str); std::vector results; - results = ((TextCollection*) tc)->Contains(cstr); + results = XMLTREE(tree)->Contains(cstr); resarray = caml_alloc_tuple(results.size()); - for (int i=0; iTag(TREENODEVAL(id)); - - CAMLreturn (Val_unit); + CAMLparam2(tree,id); + CAMLreturn (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); } extern "C" CAMLprim value caml_xml_tree_nullt(value unit){ CAMLparam1(unit); diff --git a/OCamlSXSIDrive.cpp b/OCamlSXSIDrive.cpp deleted file mode 100644 index 2fbee33..0000000 --- a/OCamlSXSIDrive.cpp +++ /dev/null @@ -1,21 +0,0 @@ -/************************************** - * OCamlDriver.cpp - * ------------------- - * The Ocaml Driver which calls the C++ methods and - * adds a C wrapper interface with OCaml code. - * - * Author: Kim Nguyen - * Date: 19/11/08 - */ - -/* OCaml memory managment */ -extern "C" { -#include -#include -#include -#include -#include -} //extern C - -#include "XMLDocShredder.h" - diff --git a/OCamlStorageInterface.cpp b/OCamlStorageInterface.cpp deleted file mode 100644 index 52f3cb5..0000000 --- a/OCamlStorageInterface.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/******************************************* - * OCamlStorageInterface.cpp - * ------------------------ - * - * - * Author: Kim Nguyen - * Date: 04/11/08 - */ - - -#include "OCamlStorageInterface.h" -#include "Utils.h" - -/* see caml/mlvalues.h - */ -/* tags */ -#define NODE 0 -#define STRING 1 - -/* fields */ -#define PCDATA 0 -#define ID 0 -#define TAG 1 -#define LEFT 2 -#define RIGHT 3 -#define FATHER 4 - -#define NIL (Val_unit) - -/* The OCaml function which computes the hash value of a tag */ -static value *caml_hash_tag = NULL; - -OCamlStorageInterface::OCamlStorageInterface() -{ - CAMLparam0(); - CAMLlocal4(node,nodeptr,dummytag,father); - nodeid=1; - if (caml_hash_tag == NULL) { - /* First time around, look up by name */ - caml_hash_tag = caml_named_value("caml_hash_tag"); - - } - - dummytag = caml_callback(*caml_hash_tag, caml_copy_string("")); - - // Atomic block, initialize must be called for every field - // Before any other allocation takes place. In particular, - // One should NOT place the call to caml_callback as an argument to - // caml_initialize but rather store its result in a variable. - father = caml_alloc_shr(1,0); - caml_initialize(&Field(father,0),NIL); - - node = caml_alloc_shr(5,0); - caml_initialize(&Field(node,ID),Val_int(nodeid++)); - caml_initialize(&Field(node,TAG),dummytag); - caml_initialize(&Field(node,LEFT),NIL); - caml_initialize(&Field(node,RIGHT),NIL); - caml_initialize(&Field(node,FATHER),father); - - nodeptr = caml_alloc_shr(1,LEFT); - caml_initialize(&Field(nodeptr,0),node); - - stack.push_front(nodeptr); - caml_register_global_root(&stack.front()); - - CAMLreturn0; -} - -OCamlStorageInterface::~OCamlStorageInterface() -{ - caml_remove_global_root(&stack.back()); -} - -void OCamlStorageInterface::newChild(string name) -{ - CAMLparam0(); - CAMLlocal5(nnode,onode,tag,taghash,id); - CAMLlocal3(father,nnodeptr,onodeptr); - DPRINT("newChild " << name <<"\n") - onode = stack.front(); - /* Allocate the new Node(tag,l,r) */ - - /* Compute the new tag hash and store it in the new block */ - tag = caml_copy_string(name.c_str()); - taghash = caml_callback(*caml_hash_tag, tag); - id = Val_int(nodeid++); - - /* Again, initialization must be atomic */ - father=caml_alloc_shr(1,0); - caml_initialize(&Field(father,0),onode); - - nnode = caml_alloc_shr(5,0); - caml_initialize(&Field(nnode,ID),id); - caml_initialize(&Field(nnode,TAG),taghash); - caml_initialize(&Field(nnode,LEFT),NIL); - caml_initialize(&Field(nnode,RIGHT),NIL); - caml_initialize(&Field(nnode,FATHER),father); - - nnodeptr = caml_alloc_shr(1,LEFT); - caml_initialize(&Field(nnodeptr,0),nnode); - - switch (Tag_val(onode)){ - case LEFT: - caml_modify(&Field(Field(onode,0),LEFT),nnodeptr); - Tag_val(onode) = RIGHT; - break; - - case RIGHT: - caml_modify(&Field(Field(onode,0),RIGHT),nnodeptr); - Tag_val(onode) = NODE; - break; - }; - - stack.push_front(nnodeptr); - - CAMLreturn0; -} - - - -void OCamlStorageInterface::newText(string text) -{ - CAMLparam0(); - CAMLlocal3(pcdata,snode,node); - DPRINT("newText " << text <<"\n") - - pcdata = caml_copy_string(text.c_str()); - snode = caml_alloc_shr(1,STRING); - caml_initialize(&(Field(snode,PCDATA)),pcdata); - node = stack.front(); - - switch (Tag_val(node)){ - case LEFT: - caml_modify(&Field(Field(node,0),LEFT),snode); - Tag_val(node) = RIGHT; - break; - case RIGHT: - caml_modify(&Field(Field(node,0),RIGHT),snode); - Tag_val(node) = NODE; - break; - - - }; - - CAMLreturn0; -} - - - -void OCamlStorageInterface::nodeFinished() -{ - - CAMLparam0(); - CAMLlocal1(node); - DPRINT("nodeFinished\n") - node = stack.front(); - - switch (Tag_val(node)){ - case LEFT: - DPRINT("Tagged left\n") - Tag_val(node) = RIGHT; - break; - - case RIGHT: - DPRINT("Tagged right\n") - Tag_val(node) = NODE; - - case NODE: - DPRINT("Under NODE\n"); - while (Tag_val(node) == NODE){ - stack.pop_front(); - node = stack.front(); - }; - break; - }; - - CAMLreturn0; -} - - void OCamlStorageInterface::parsingFinished() -{ - CAMLparam0(); - CAMLlocal1(rnode); - caml_register_global_root(&document); - document = stack.front(); - - stack.pop_front(); /* removes the root */ - rnode = stack.front(); - - DPRINT ("Stack size is "<< stack.size() <<"\n") - /* reinitializes the stack */ - caml_modify(&Field(Field(rnode,0),ID),Val_int(nodeid=1)); - caml_modify(&Field(Field(rnode,0),LEFT),NIL); - caml_modify(&Field(Field(rnode,0),RIGHT),NIL); - caml_modify(&Field(Field(Field(rnode,0),FATHER),0),NIL); - Tag_val(rnode) = LEFT; - - - caml_modify(&Field(Field(Field(document,0),FATHER),0),NIL); - Tag_val(document) = NODE; - - CAMLreturn0; - - -} -value OCamlStorageInterface::getDocument (){ - CAMLparam0(); - CAMLlocal1(doc); - doc = document; - caml_remove_global_root(&document); - CAMLreturn(doc); -} -void *OCamlStorageInterface::returnDocument(){ - - return ((void *) getDocument()); - -} diff --git a/OCamlStorageInterface.h b/OCamlStorageInterface.h deleted file mode 100644 index 08e44ce..0000000 --- a/OCamlStorageInterface.h +++ /dev/null @@ -1,48 +0,0 @@ -/***************************************** - * OCamlStorageInterface.h - * ------------------------ - * Header file for an OCaml Storage Interface - * - * Author: Kim Nguyen - * Date: 04/11/08 - */ - -#ifndef OCAMLSTORAGEINTERFACE_H_ -#define OCAMLSTORAGEINTERFACE_H_ - -#include "StorageInterface.h" -extern "C" { -/* OCaml memory managment */ - -#include -#include -#include -#include - -}// extern C - -#include - -using namespace std; - -class OCamlStorageInterface: public StorageInterface -{ - public: - OCamlStorageInterface(); - virtual ~OCamlStorageInterface(); - virtual void newChild(string name); - virtual void newText(string text); - virtual void nodeFinished(); - virtual void parsingFinished(); - virtual void* returnDocument(); - - private: - value getDocument(); - list stack; - value document; - int nodeid; -}; - - -#endif /*OCAMLSTORAGEINTERFACE_H_*/ - diff --git a/XMLDocShredder.h b/XMLDocShredder.h index 00f50d6..437452e 100644 --- a/XMLDocShredder.h +++ b/XMLDocShredder.h @@ -15,16 +15,11 @@ #include #include #include -#include #include "StorageInterface.h" using namespace std; using namespace xmlpp; -/* For Hashmap. Seems fairly well supported */ -using namespace __gnu_cxx; -typedef pair cons_str; -typedef pair cons_int; class XMLDocShredder { diff --git a/tag.ml b/tag.ml index b692df7..f0d9062 100644 --- a/tag.ml +++ b/tag.ml @@ -82,6 +82,7 @@ struct let pcdata = T.pcdata external tag : string -> t = "%identity" external clear_pool : unit -> unit = "%identity" + let init _ = () let compare = String.compare let equal = (=) let print fmt s = Format.fprintf fmt "%s" s diff --git a/tag.mli b/tag.mli index 766ac93..41b5485 100644 --- a/tag.mli +++ b/tag.mli @@ -8,6 +8,7 @@ type t val attribute : t val pcdata : t val tag : string -> t +val init : string array -> unit val clear_pool : unit -> unit val compare : t -> t -> int val equal : t -> t -> bool diff --git a/tree.ml b/tree.ml index 0c1f10b..c3a2255 100644 --- a/tree.ml +++ b/tree.ml @@ -52,7 +52,7 @@ struct module Text = struct - type t (* pointer to the text collection *) + (* Todo *) external nullt : unit -> [`Text ] node = "caml_xml_tree_nullt" let nil = nullt () @@ -98,8 +98,6 @@ struct external tag : t -> [`Tree ] node -> Tag.t = "caml_xml_tree_tag" external tag_id : t -> [`Tree ] node -> unit = "caml_xml_tree_tag_id" - external text_collection : t -> Text.t = "caml_xml_tree_text_collection" - let is_last t n = equal nil (next_sibling t n) external prev_text : t -> [`Tree] node -> [`Text ] node = "caml_xml_tree_prev_text" @@ -113,7 +111,6 @@ struct external is_ancestor : t -> [`Tree ] node -> [`Tree ] node -> bool = "caml_xml_tree_is_ancestor" let print_skel t = - let textcol = text_collection t in let rec aux id = if (is_nil id) then Printf.eprintf "#\n" @@ -124,11 +121,11 @@ struct (Tag.to_string (tag t id)) (node_xml_id t id) (int_of_node (prev_text t id)) - (Text.get_text textcol (prev_text t id)) + (Text.get_text t (prev_text t id)) (int_of_node (my_text t id)) - (Text.get_text textcol (my_text t id)) + (Text.get_text t (my_text t id)) (int_of_node (next_text t id)) - (Text.get_text textcol (next_text t id)); + (Text.get_text t (next_text t id)); aux(first_child t id); aux(next_sibling t id); end @@ -136,17 +133,16 @@ struct aux (root t) let traversal t = - let textcol = text_collection t in let rec aux id = if not (is_nil id) then begin (* ignore (tag t id); - ignore (Text.get_text textcol (prev_text t id)); + ignore (Text.get_text t (prev_text t id)); if (is_leaf t id) - then ignore (Text.get_text textcol (my_text t id)); + then ignore (Text.get_text t (my_text t id)); if (is_last t id) - then ignore (Text.get_text textcol (next_text t id)); *) + then ignore (Text.get_text t (next_text t id)); *) aux (first_child t id); aux (next_sibling t id); end @@ -168,8 +164,7 @@ struct type doc = t - type t = { doc : doc; - text : Text.t; + type t = { doc : doc; node : descr } let dump { doc=t } = Tree.print_skel t @@ -179,7 +174,6 @@ struct open Tree let node_of_t t = { doc= t; - text = text_collection t; node = Node(NC (root t)) } @@ -201,7 +195,7 @@ struct let equal a b = (compare a b) == 0 let string t = match t.node with - | String i -> Text.get_text t.text i + | String i -> Text.get_text t.doc i | _ -> assert false let norm (n : [`Tree ] node ) = if is_nil n then Nil else Node (NC n) @@ -231,13 +225,13 @@ struct match n.node with | Node (NC t) when is_leaf n.doc t -> let txt = my_text n.doc t in - if Text.is_empty n.text txt + if Text.is_empty n.doc txt then Nil else Node(SC (txt,Tree.nil)) | Node (NC t) -> let fs = first_child n.doc t in let txt = prev_text n.doc fs in - if Text.is_empty n.text txt + if Text.is_empty n.doc txt then norm fs else Node (SC (txt, fs)) | Node(SC (i,_)) -> String i @@ -253,7 +247,7 @@ struct | Node(NC t) -> let ns = next_sibling n.doc t in let txt = next_text n.doc t in - if Text.is_empty n.text txt + if Text.is_empty n.doc txt then norm ns else Node (SC (txt, ns)) | Nil | String _ -> failwith "next_sibling" @@ -287,7 +281,7 @@ struct | _ -> false let contains t s = - Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.text s) + Array.fold_left (fun a i -> DocIdSet.add i a) DocIdSet.empty (Text.contains t.doc s) let contains_old t s = let regexp = Str.regexp_string s in @@ -367,7 +361,7 @@ struct let rec aux n = match n.node with | Nil -> () - | String i -> () (*ignore(Text.get_text t.text i) *) + | String i -> () (*ignore(Text.get_text t.doc i) *) | Node(_) -> (* tag_id n; *) aux (first_child n); @@ -438,13 +432,13 @@ module DEBUGTREE match n.node with | Node (NC t) when is_leaf_ n.doc t -> let txt = my_text_ n.doc t in - if is_empty_ n.text txt + if is_empty_ n.doc txt then Nil else Node(SC (txt,XML.Tree.nil)) | Node (NC t) -> let fs = first_child_ n.doc t in let txt = prev_text_ n.doc fs in - if is_empty_ n.text txt + if is_empty_ n.doc txt then norm fs else Node (SC (txt, fs)) | Node(SC (i,_)) -> String i @@ -460,7 +454,7 @@ module DEBUGTREE | Node(NC t) -> let ns = next_sibling_ n.doc t in let txt = next_text_ n.doc t in - if is_empty_ n.text txt + if is_empty_ n.doc txt then norm ns else Node (SC (txt, ns)) | Nil | String _ -> failwith "next_sibling"