From 83e9f9d8f219fece86afbedd1332d5ad97971d1c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Kim=20Nguy=E1=BB=85n?= Date: Fri, 6 Apr 2012 14:04:14 +0200 Subject: [PATCH] Finish adapting to new libxml-tree API - Code is much cleaner - Speed is mostly the same, often faster but two offenders: Q9 : 155 -> 190ms Q28: 2s -> 3.5 s Need to be investigated. --- configure | 9 +- myocamlbuild.ml | 3 +- src/OCamlDriver.cpp | 322 --------------------------------------- src/bp_stub.cpp | 113 ++++++++++++++ src/libcamlshredder.clib | 3 +- src/main.ml | 2 +- src/runtime.ml | 13 +- src/tree.ml | 11 +- src/utils_stub.cpp | 11 ++ src/xml-tree_stub.cpp | 1 + 10 files changed, 152 insertions(+), 336 deletions(-) delete mode 100644 src/OCamlDriver.cpp create mode 100644 src/bp_stub.cpp create mode 100644 src/utils_stub.cpp diff --git a/configure b/configure index a9eccc8..c192a28 100755 --- a/configure +++ b/configure @@ -24,6 +24,13 @@ Conf.check "libxml-tree" (Conf.absolute) ("%s/../XMLTree/libxml-tree.a") (Sys.fi Conf.check "TextCollection" (Conf.absolute) ("%s/../TextCollection/libTextCollection.a") (Sys.file_exists);; +let libs_files = [ + (Conf.absolute) ("%s/../bp/libbp.a"); + (Conf.absolute) ("%s/../libcds/lib/libcds.a"); + (Conf.absolute) ("%s/../XMLTree/libxml-tree.a"); + (Conf.absolute) ("%s/../TextCollection/libTextCollection.a"); +] + let libs_I= [ Conf.absolute "-I%s/../bp"; Conf.absolute "-I%s/../libcds/includes"; @@ -56,7 +63,7 @@ Conf.def_str "cxx_cmd" "g++";; Conf.def_list "cxx_includes" (libs_I @ ocamlI);; Conf.def_list "cxx_lpaths" libs_L;; Conf.def_list "cxx_libs" libs_l;; - +Conf.def_list "cxx_libs_objects" libs_files;; Conf.finish ();; diff --git a/myocamlbuild.ml b/myocamlbuild.ml index d0277af..7a792d1 100644 --- a/myocamlbuild.ml +++ b/myocamlbuild.ml @@ -17,6 +17,8 @@ let cxx_link_flags = ref [ _S cxx_lpaths; _S cxx_libs] let native_link_flags = ref (List.map (fun s -> s ^ ".cmxa") ocaml_link) let byte_link_flags = ref ("-custom" :: (List.map (fun s -> s ^ ".cma") ocaml_link)) let link_flags = [ A"-linkpkg" ] +let libs_files = List.map (fun s -> "file:" ^ s) cxx_libs_objects + let native_compile_flags = ref [A"-fno-PIC"] let compile_flags = ref [] @@ -140,7 +142,6 @@ let () = dispatch begin | After_rules -> dep [ "link" ] cstub_lib; - rule "c++: cpp & depends -> o" ~prod:"%.o" ~deps:[ "%.cpp" ] cxx_compile; let syntax_flags = S ([ A "-syntax"; A "camlp4o"; S (ppopt [A "-printer" ; A"Camlp4OCamlAstDumper"]); diff --git a/src/OCamlDriver.cpp b/src/OCamlDriver.cpp deleted file mode 100644 index 223f3e3..0000000 --- a/src/OCamlDriver.cpp +++ /dev/null @@ -1,322 +0,0 @@ -/************************************** - * OCamlDriver.cpp - * ------------------- - * An Ocaml Driver which calls the C++ methods and - * adds a C wrapper interface with OCaml code. - * - * Author: Kim Nguyen - * Date: 04/11/08 - */ - -/*** - * Conventions: - * functions never doing any allocation (non caml_alloc*, caml_copy_string,...) - * have NOALLOC in the comment and their external declaration can have "noalloc" - */ - - -#include -#include - -#include "XMLTree.h" -#include "XMLTreeBuilder.h" -#include "Utils.h" -#include "common_stub.hpp" - -#define CAMLRAISEMSG(msg) (sxsi_raise_msg((char*) (msg))) - -#define XMLTREE(x) (Obj_val(x)) - -#define HSET(x) (Obj_val(x)) - -#define XMLTREEBUILDER(x) (Obj_val(x)) - - -#define TREENODEVAL(i) ((treeNode) (Int_val(i))) -#define TAGVAL(i) ((TagType) (Int_val(i))) -#define XMLTREE_ROOT 0 -#define NoAlloc - -extern "C" { -#include -#include -#include -#include -} - - -extern "C" value caml_clz(value i) -{ - return Val_long( ((sizeof(unsigned long)*8) - __builtin_clzl(Long_val(i))) - 1); -} - -extern "C" value caml_leading_bit(value i) -{ - return Val_long( ( 1 << (sizeof(unsigned long)*8 - __builtin_clzl(Long_val(i)) - 1))); -} - - -/** - * Interface to the TextCollection - */ - -/** - * Utility functions - */ - -extern "C" value caml_text_collection_get_text(value tree, value id){ - CAMLparam2(tree,id); - CAMLlocal1(str); - uchar* txt = XMLTREE(tree)->GetText((DocID) Int_val(id)); - str = caml_copy_string((const char*)txt); - CAMLreturn (str); -} - -extern "C" value caml_text_collection_empty_text(value tree,value id){ - CAMLparam2(tree,id); - CAMLreturn ( Val_int((XMLTREE(tree))->EmptyText((DocID) Int_val(id)))); -} - -bool docId_comp(DocID x, DocID y) { return x < y; } - -/** - * Existential queries - */ - -extern "C" value caml_text_collection_is_prefix(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_bool((int) XMLTREE(tree)->IsPrefix(cstr))); -} - -extern "C" value caml_text_collection_is_suffix(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_bool((int) XMLTREE(tree)->IsSuffix(cstr))); -} -extern "C" value caml_text_collection_is_equal(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_bool((int) XMLTREE(tree)->IsEqual(cstr))); -} -extern "C" value caml_text_collection_is_contains(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsContains(cstr))); -} - -extern "C" value caml_text_collection_is_lessthan(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn ( Val_bool((int) XMLTREE(tree)->IsLessThan(cstr))); -} - - -/** - * Count Queries - */ - -/** - * Global counting - */ -extern "C" value caml_text_collection_count(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_int((XMLTREE(tree)->Count(cstr)))); -} - -extern "C" value caml_text_collection_count_prefix(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_int((XMLTREE(tree)->CountPrefix(cstr)))); -} - -extern "C" value caml_text_collection_count_suffix(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_int((XMLTREE(tree)->CountSuffix(cstr)))); -} - -extern "C" value caml_text_collection_count_equal(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_int((XMLTREE(tree)->CountEqual(cstr)))); -} - -extern "C" value caml_text_collection_count_contains(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_int((XMLTREE(tree)->CountContains(cstr)))); -} - -extern "C" value caml_text_collection_count_lessthan(value tree,value str){ - CAMLparam2(tree,str); - uchar * cstr = (uchar *) String_val(str); - CAMLreturn (Val_int((XMLTREE(tree)->CountLessThan(cstr)))); -} - -static value sort_alloc_array(std::vector results, value resarray){ - std::sort(results.begin(), results.end(), docId_comp); - size_t s = results.size(); - resarray = caml_alloc_tuple(s); - for (size_t i = 0; i < s ;i++){ - caml_initialize(&Field(resarray,i),Val_int(results[i])); - }; - return resarray; - -} - -/** - * Full reporting queries - */ - -extern "C" value caml_text_collection_prefix(value tree,value str){ - CAMLparam2(tree,str); - CAMLlocal1(resarray); - uchar * cstr = (uchar *) String_val(str); - std::vector results = XMLTREE(tree)->Prefix(cstr); - CAMLreturn (sort_alloc_array(results,resarray)); -} - -extern "C" value caml_text_collection_suffix(value tree,value str){ - CAMLparam2(tree,str); - CAMLlocal1(resarray); - uchar * cstr = (uchar *) String_val(str); - std::vector results = XMLTREE(tree)->Suffix(cstr); - CAMLreturn (sort_alloc_array(results,resarray)); -} - -extern "C" value caml_text_collection_equals(value tree,value str){ - CAMLparam2(tree,str); - CAMLlocal1(resarray); - uchar * cstr = (uchar *) strdup(String_val(str)); - std::vector results = XMLTREE(tree)->Equals(cstr); - free(cstr); - CAMLreturn (sort_alloc_array(results,resarray)); -} - -extern "C" value caml_text_collection_contains(value tree,value str){ - CAMLparam2(tree,str); - CAMLlocal1(resarray); - uchar * cstr = (uchar *) String_val(str); - std::vector results = XMLTREE(tree)->Contains(cstr); - CAMLreturn (sort_alloc_array(results,resarray)); -} - -extern "C" value caml_text_collection_lessthan(value tree,value str){ - CAMLparam2(tree,str); - CAMLlocal1(resarray); - uchar * cstr = (uchar *) String_val(str); - std::vector results = XMLTREE(tree)->LessThan(cstr); - CAMLreturn (sort_alloc_array(results,resarray)); -} - - -////////////////////// BP - -extern "C" value caml_bitmap_create(value size) -{ - CAMLparam1(size); - size_t bits = Long_val(size); - size_t words = bits / (8*sizeof(unsigned int)); - unsigned int *buffer = (unsigned int*) calloc(words+1, sizeof(unsigned int)); - if (buffer == NULL) - CAMLRAISEMSG("BP: cannot allocate memory"); - CAMLreturn( (value) buffer); -} - -extern "C" value caml_bitmap_resize(value bitmap, value nsize) -{ - CAMLparam2(bitmap, nsize); - size_t bits = Long_val(nsize); - size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int); - unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes); - if (buffer == NULL) - CAMLRAISEMSG("BP: cannot reallocate memory"); - CAMLreturn((value) buffer); -} - -extern "C" value caml_bitmap_setbit(value bitmap, value i, value b) -{ - CAMLparam3(bitmap, i, b); - unsigned int j = Int_val(i); - unsigned int x = Bool_val(b); - bp_setbit ((unsigned int*) bitmap, j, x); - CAMLreturn(Val_unit); -} - -extern "C" void caml_bp_delete(value b) -{ - CAMLparam1(b); - bp * B = Obj_val(b); - bp_delete(B); - CAMLreturn0; -} - -extern "C" value caml_bp_construct(value bitmap, value npar) -{ - CAMLparam2(bitmap, npar); - CAMLlocal1(res); - bp * b = bp_construct(Int_val(npar), (unsigned int *) bitmap, OPT_DEGREE); - res = sxsi_alloc_custom(caml_bp_delete); - Obj_val(res) = b; - CAMLreturn(res); -} - -extern "C" value caml_bp_first_child(value b, value idx) -{ - CAMLparam2(b, idx); - CAMLreturn (Val_int( bp_first_child(Obj_val(b), Int_val(idx)))); -} - - -extern "C" value caml_bp_next_sibling(value b, value idx) -{ - CAMLparam2(b, idx); - CAMLreturn (Val_int(bp_next_sibling(Obj_val(b), Int_val(idx)))); -} - -extern "C" value caml_bp_preorder_rank(value b, value idx) -{ - CAMLparam2(b, idx); - CAMLreturn (Val_int(bp_preorder_rank(Obj_val(b), Int_val(idx)) - 1)); -} - - -extern "C" value caml_bp_load(value file) -{ - CAMLparam1(file); - CAMLlocal1(result); - bp *B; - int f1 = Int_val(file); - int f2 = dup(f1); - FILE * fd = fdopen(f2, "r"); - if (fd == NULL) - CAMLRAISEMSG("Error opening bp file"); - B = loadTree(fd); - fclose(fd); - result = sxsi_alloc_custom(caml_bp_delete); - Obj_val(result) = B; - CAMLreturn(result); -} - -extern "C" value caml_bp_save(value b, value file) -{ - CAMLparam2(b, file); - bp *B = Obj_val(b); - int f1 = Int_val(file); - int f2 = dup(f1); - FILE * fd = fdopen(f2, "a"); - fflush(stderr); - if (fd == NULL) - CAMLRAISEMSG("Error saving bp file"); - saveTree(B, fd); - fclose(fd); - CAMLreturn(Val_unit); -} - -extern "C" value caml_bp_alloc_stats(value unit) -{ - CAMLparam1(unit); - CAMLreturn (Val_long(bp_get_alloc_stats())); -} diff --git a/src/bp_stub.cpp b/src/bp_stub.cpp new file mode 100644 index 0000000..349f5b3 --- /dev/null +++ b/src/bp_stub.cpp @@ -0,0 +1,113 @@ +#include +extern "C" { +#include +} +#include +#include "common_stub.hpp" + +extern "C" value caml_bitmap_create(value size) +{ + CAMLparam1(size); + size_t bits = Long_val(size); + size_t words = bits / (8*sizeof(unsigned int)); + unsigned int *buffer = (unsigned int*) calloc(words+1, sizeof(unsigned int)); + if (buffer == NULL) + sxsi_raise_msg("BP: cannot allocate memory"); + CAMLreturn( (value) buffer); +} + +extern "C" value caml_bitmap_resize(value bitmap, value nsize) +{ + CAMLparam2(bitmap, nsize); + size_t bits = Long_val(nsize); + size_t bytes = (bits / (8 * sizeof(unsigned int)) + 1 ) * sizeof(unsigned int); + unsigned int * buffer = (unsigned int*) realloc((void *) bitmap, bytes); + if (buffer == NULL) + sxsi_raise_msg("BP: cannot reallocate memory"); + CAMLreturn((value) buffer); +} + +extern "C" value caml_bitmap_setbit(value bitmap, value i, value b) +{ + CAMLparam3(bitmap, i, b); + unsigned int j = Int_val(i); + unsigned int x = Bool_val(b); + bp_setbit ((unsigned int*) bitmap, j, x); + CAMLreturn(Val_unit); +} + +extern "C" void caml_bp_delete(value b) +{ + CAMLparam1(b); + bp * B = Obj_val(b); + bp_delete(B); + CAMLreturn0; +} + +extern "C" value caml_bp_construct(value bitmap, value npar) +{ + CAMLparam2(bitmap, npar); + CAMLlocal1(res); + bp * b = bp_construct(Int_val(npar), (unsigned int *) bitmap, OPT_DEGREE); + res = sxsi_alloc_custom(caml_bp_delete); + Obj_val(res) = b; + CAMLreturn(res); +} + +extern "C" value caml_bp_first_child(value b, value idx) +{ + CAMLparam2(b, idx); + CAMLreturn (Val_int( bp_first_child(Obj_val(b), Int_val(idx)))); +} + + +extern "C" value caml_bp_next_sibling(value b, value idx) +{ + CAMLparam2(b, idx); + CAMLreturn (Val_int(bp_next_sibling(Obj_val(b), Int_val(idx)))); +} + +extern "C" value caml_bp_preorder_rank(value b, value idx) +{ + CAMLparam2(b, idx); + CAMLreturn (Val_int(bp_preorder_rank(Obj_val(b), Int_val(idx)) - 1)); +} + + +extern "C" value caml_bp_load(value file) +{ + CAMLparam1(file); + CAMLlocal1(result); + bp *B; + int f1 = Int_val(file); + int f2 = dup(f1); + FILE * fd = fdopen(f2, "r"); + if (fd == NULL) + sxsi_raise_msg("Error opening bp file"); + B = loadTree(fd); + fclose(fd); + result = sxsi_alloc_custom(caml_bp_delete); + Obj_val(result) = B; + CAMLreturn(result); +} + +extern "C" value caml_bp_save(value b, value file) +{ + CAMLparam2(b, file); + bp *B = Obj_val(b); + int f1 = Int_val(file); + int f2 = dup(f1); + FILE * fd = fdopen(f2, "a"); + fflush(stderr); + if (fd == NULL) + sxsi_raise_msg("Error saving bp file"); + saveTree(B, fd); + fclose(fd); + CAMLreturn(Val_unit); +} + +extern "C" value caml_bp_alloc_stats(value unit) +{ + CAMLparam1(unit); + CAMLreturn (Val_long(bp_get_alloc_stats())); +} diff --git a/src/libcamlshredder.clib b/src/libcamlshredder.clib index ccb0db4..7dc17cf 100644 --- a/src/libcamlshredder.clib +++ b/src/libcamlshredder.clib @@ -1,4 +1,5 @@ -OCamlDriver.o +utils_stub.o +bp_stub.o xml-tree-builder_stub.o xml-tree_stub.o common_stub.o diff --git a/src/main.ml b/src/main.ml index cc14ce2..100d0e4 100644 --- a/src/main.ml +++ b/src/main.ml @@ -140,7 +140,7 @@ let _ = ~msg:"Loading file" (Tree.load ~sample:!Options.sample_factor - ~load_text:true) + ~load_text:(not !Options.disable_text_collection)) !Options.input_file else let v = diff --git a/src/runtime.ml b/src/runtime.ml index a7103f1..3082f50 100644 --- a/src/runtime.ml +++ b/src/runtime.ml @@ -175,11 +175,11 @@ module Make (U : ResJIT.S) : S with type result_set = U.NS.t = DEFINE LOOP (t, states, ctx) = ( let _t = t in LOG(__ "top-down-run" 3 - "Entering node %i with loop (tag %s, context %i) with states %a" - (Node.to_int _t) - (Tag.to_string (Tree.tag tree _t)) - (Node.to_int (ctx)) - (StateSet.print) (states)); + "Entering node %i with loop (tag %s, context %i) with states %a" + (Node.to_int _t) + (Tag.to_string (Tree.tag tree _t)) + (Node.to_int (ctx)) + (StateSet.print) (states)); if _t == Tree.nil then nil_res else let tag = Tree.tag tree _t in @@ -254,6 +254,9 @@ DEFINE LOOP_TAG (t, states, tag, ctx) = ( l3jit_dispatch tr_list res1 res2 t slot1 slot2 and l2jit_dispatch_instr t ctx instr = + let () = LOG(__ "top-down-run" 3 "Dispatching instr: %a on node %i (context=%i)" + L2JIT.print_jump instr (Node.to_int t) (Node.to_int ctx)) + in match instr with | L2JIT.NOP () -> nil_res | L2JIT.FIRST_CHILD s -> LOOP ((Tree.first_child tree t), s, ctx) diff --git a/src/tree.ml b/src/tree.ml index 10a499d..bc81cf4 100644 --- a/src/tree.ml +++ b/src/tree.ml @@ -51,7 +51,7 @@ module TreeBuilder = struct type t external create : unit -> t = "caml_xml_tree_builder_create" - external open_document : t -> bool -> int -> bool -> int -> unit = "caml_xml_tree_builder_open_document" + external open_document : t -> int -> bool -> int -> unit = "caml_xml_tree_builder_open_document" external close_document : t -> tree = "caml_xml_tree_builder_close_document" external open_tag : t -> string -> unit = "caml_xml_tree_builder_open_tag" external close_tag : t -> string -> unit = "caml_xml_tree_builder_close_tag" @@ -133,8 +133,7 @@ struct Expat.set_end_element_handler parser_ (end_element_handler parser_ build buf); Expat.set_character_data_handler parser_ (character_data_handler parser_ build buf); Logger.print Format.err_formatter "Started parsing@\n"; - open_document build !Options.index_empty_texts !Options.sample_factor - !Options.disable_text_collection !Options.text_index_type; + open_document build !Options.sample_factor !Options.disable_text_collection !Options.text_index_type; open_tag build ""; parser_, finalize @@ -184,7 +183,7 @@ let bit_vector_unsafe_set v i b = let bit_vector_create n = let len = if n <= 0 then 0 else (n - 1) / 8 + 1 in - String.make len '\000' + String.make len '\000' type t = { doc : tree; @@ -264,9 +263,11 @@ let next_sibling t n = tree_next_sibling t.doc n external tree_next_element : tree -> [`Tree] Node.t -> [`Tree] Node.t = "caml_xml_tree_next_element" "noalloc" let next_element t n = tree_next_element t.doc n + external tree_tagged_sibling : tree -> [`Tree] Node.t -> Tag.t -> [`Tree] Node.t = "caml_xml_tree_tagged_sibling" "noalloc" let tagged_sibling t n tag = tree_tagged_sibling t.doc n tag + external tree_select_sibling : tree -> [`Tree ] Node.t -> unordered_set -> [`Tree] Node.t = "caml_xml_tree_select_sibling" "noalloc" let select_sibling t n tag_set = tree_select_sibling t.doc n tag_set @@ -318,7 +319,7 @@ open Format let dump_tag_table t = let tag = ref 0 in let printer ppf set = - Logger.print ppf "%s: %a" + Logger.print ppf "%s: %a" (Tag.to_string !tag) TagSet.print (TagSet.inj_positive set); incr tag in diff --git a/src/utils_stub.cpp b/src/utils_stub.cpp new file mode 100644 index 0000000..dc3688a --- /dev/null +++ b/src/utils_stub.cpp @@ -0,0 +1,11 @@ +#include "common_stub.hpp" + +extern "C" value caml_clz(value i) +{ + return Val_long( ((sizeof(unsigned long)*8) - __builtin_clzl(Long_val(i))) - 1); +} + +extern "C" value caml_leading_bit(value i) +{ + return Val_long( ( 1 << (sizeof(unsigned long)*8 - __builtin_clzl(Long_val(i)) - 1))); +} diff --git a/src/xml-tree_stub.cpp b/src/xml-tree_stub.cpp index cdb7fe1..2551c66 100644 --- a/src/xml-tree_stub.cpp +++ b/src/xml-tree_stub.cpp @@ -1,6 +1,7 @@ #include #include "xml-tree.hpp" #include "common_stub.hpp" +#include using namespace SXSI; -- 2.17.1