From: kim Date: Tue, 27 Jan 2009 14:53:07 +0000 (+0000) Subject: Don't index empty texts X-Git-Url: http://git.nguyen.vg/gitweb/?a=commitdiff_plain;h=95367aa932a9e179976e59ea326542c50905f5b3;p=SXSI%2Fxpathcomp.git Don't index empty texts git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@75 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- diff --git a/Makefile b/Makefile index 6b15fff..63c78f5 100644 --- a/Makefile +++ b/Makefile @@ -71,6 +71,7 @@ main: libcamlshredder.a $(MLOBJS) .SUFFIXES: .ml .mli .cmx .cmi .cpp .PHONY:compute_depend version + .cpp.o: @echo [CPP] $@ $(HIDE) $(CXX) $(CXXINCLUDES) -c $(CXXFLAGS) $< diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index d8f1933..c906f0f 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -220,6 +220,17 @@ extern "C" CAMLprim value caml_xml_tree_tag_id(value tree,value id){ CAMLparam2(tree,id); CAMLreturn (Val_int(XMLTREE(tree)->Tag(TREENODEVAL(id)))); } + +extern "C" CAMLprim value caml_xml_tree_register_tag(value tree,value str){ + CAMLparam2(tree,str); + CAMLlocal1(id); + unsigned char* tag; + tag = (unsigned char*) (String_val(str)); + id = Val_int(XMLTREE(tree)->RegisterTag(tag)); + free(tag); + CAMLreturn (id); +} + extern "C" CAMLprim value caml_xml_tree_nullt(value unit){ CAMLparam1(unit); CAMLreturn (NULLT); diff --git a/SXSIStorageInterface.cpp b/SXSIStorageInterface.cpp index af7ba7f..e5bd7cf 100644 --- a/SXSIStorageInterface.cpp +++ b/SXSIStorageInterface.cpp @@ -15,8 +15,7 @@ SXSIStorageInterface::SXSIStorageInterface() { tree = new XMLTree(); - tree->OpenDocument(true,64); - + tree->OpenDocument(false,64); } SXSIStorageInterface::~SXSIStorageInterface() @@ -70,8 +69,5 @@ void SXSIStorageInterface::printStats(){ std::cerr << _new_text << " calls to newText\n"; std::cerr << _new_empty_text << " calls to newEmptyText\n"; std::cerr << _length_text << " bytes (=" << _length_text/1024 << "kb ) added to TextCollection\n"; - std::cerr << _heap_base << " bytes of memory (initial)\n"; - std::cerr << _heap_parsing << " bytes of memory (during parsing)\n"; - std::cerr << _heap_done << " bytes of memory (final)\n"; return; } diff --git a/benchmark/depend b/benchmark/depend index 35976b3..9572b5f 100644 --- a/benchmark/depend +++ b/benchmark/depend @@ -2,3 +2,4 @@ benchmark.cmo: benchmark.cmi benchmark.cmx: benchmark.cmi main.cmo: benchmark.cmi main.cmx: benchmark.cmx +benchmark.cmi: diff --git a/benchmark/main.ml b/benchmark/main.ml index a761bc2..98a9519 100644 --- a/benchmark/main.ml +++ b/benchmark/main.ml @@ -70,7 +70,7 @@ struct ( ".*Compiling query :[ \\t]*\\([0-9]+\\.[0-9]*\\)ms.*", [ Query_compile_time 1]); - ( ".*TopDown (No BackTrack) :[ \\t]*\\([0-9]+\\.[0-9]*\\)ms.*", + ( ".*Execution time :[ \\t]*\\([0-9]+\\.[0-9]*\\)ms.*", [ Query_execution_time 1]); ( ".*Serializing results :[ \\t]*\\([0-9]+\\.[0-9]*\\)ms.*", @@ -91,10 +91,17 @@ end module I = INIT_TESTER (CONF) module Test = MK (SXSI) (MK (SaxonBXQuery) (I)) + + let l = Test.test_engine [] (make_queryset - ["/home/kim/Documents/Work/Code/xpathcomp/tests/small.xml"] + ["/home/kim/Documents/Work/Code/xpathcomp/tests/tiny.xml"] ["/descendant::*/descendant::*/descendant::*"]) ;; + + + + + List.iter (function (e,d),s -> Printf.printf "\n-------------- %s -----------------" e; Array.iter ( fun i -> diff --git a/main.ml b/main.ml index 6295044..48f81bd 100644 --- a/main.ml +++ b/main.ml @@ -7,7 +7,7 @@ INCLUDE "debug.ml" open Automaton -let a = ref None + let l = ref [] ;; let time f x = @@ -23,33 +23,35 @@ let total_time () = List.fold_left (+.) 0. !l;; let main filename query output = - Printf.eprintf "Parsing document : %!"; - let v = time Tree.Binary.parse_xml_uri filename in - MM(v,__LOCATION__); - a := Some (v); - a := None; - Printf.eprintf "Parsing query : "; - let query = try - time - XPath.Parser.parse_string query - with - Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 + + (* Just a trick to allow the C++ code to print debugging stuff first *) + let v = time (fun () -> let v = Tree.Binary.parse_xml_uri filename; + in Printf.eprintf "Parsing document : %!";v + ) () in - Printf.eprintf "Compiling query : "; - let auto = time XPath.Compile.compile query in - XPath.Ast.print Format.err_formatter query; - Format.eprintf "\n%!"; -(* Format.eprintf "Internal rep of the tree is :\n%!"; - Tree.Binary.dump v; *) - Printf.eprintf "TopDown (No BackTrack) : \n"; - time (fun v -> ignore (TopDown.accept auto v)) v; - Printf.eprintf "Number of nodes in the result set : %i\n" (BST.cardinal auto.result); - begin - match output with - | None -> () - | Some f -> - - Printf.eprintf "Serializing results : "; + MM(v,__LOCATION__); + Printf.eprintf "Parsing query : "; + let query = try + time + XPath.Parser.parse_string query + with + Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 + in + Printf.eprintf "Compiling query : "; + let auto = time XPath.Compile.compile query in + XPath.Ast.print Format.err_formatter query; + Format.eprintf "\n%!"; + (* Format.eprintf "Internal rep of the tree is :\n%!"; + Tree.Binary.dump v; *) + Printf.eprintf "Execution time : "; + time (fun v -> ignore (TopDown.accept auto v)) v; + Printf.eprintf "Number of nodes in the result set : %i\n" (BST.cardinal auto.result); + begin + match output with + | None -> () + | Some f -> + + Printf.eprintf "Serializing results : "; time( fun () -> let oc = open_out f in output_string oc "\n"; diff --git a/tag.ml b/tag.ml index f0d9062..8c77691 100644 --- a/tag.ml +++ b/tag.ml @@ -29,13 +29,16 @@ struct end module HMap = Map.Make (struct type t = int let compare x y = x - y end) + module HTag = struct type t = int let attribute = T.hash T.attribute let pcdata = T.hash T.pcdata + let pool = ref HMap.empty + let add_pool s = let hash = T.hash s in pool := HMap.add hash s !pool @@ -45,10 +48,11 @@ let clear_pool () = add_pool ""; add_pool T.attribute; add_pool T.pcdata - let _ = clear_pool () +let init l = + clear_pool () let tag s = let hash = T.hash s in @@ -91,4 +95,3 @@ struct end include STag -let _ = Callback.register "caml_hash_tag" tag diff --git a/tree.ml b/tree.ml index c3a2255..8ebcdcc 100644 --- a/tree.ml +++ b/tree.ml @@ -453,7 +453,11 @@ module DEBUGTREE | Node (SC (_,ns)) -> norm ns | Node(NC t) -> let ns = next_sibling_ n.doc t in - let txt = next_text_ n.doc t in + let txt = + if XML.Tree.is_nil ns then + next_text_ n.doc t + else prev_text_ n.doc ns + in if is_empty_ n.doc txt then norm ns else Node (SC (txt, ns)) @@ -466,7 +470,6 @@ module DEBUGTREE | { doc=d; node=Node(SC (i,_) )} -> text_xml_id_ d i | _ -> failwith "id" - (* Wrapper around critical function *) let string t = time ("TextCollection.GetText()") (string) t let left = first_child @@ -558,4 +561,3 @@ module DEBUGTREE end module Binary = DEBUGTREE -