From 7b6e25ac7c3b4e06c8386c1090bb69ae97a47143 Mon Sep 17 00:00:00 2001 From: kim Date: Wed, 28 Apr 2010 06:45:18 +0000 Subject: [PATCH] . git-svn-id: svn+ssh://idea.nguyen.vg/svn/sxsi/trunk/xpathcomp@803 3cdefd35-fc62-479d-8e8d-bae585ffb9ca --- OCamlDriver.cpp | 5 +++-- XMLDocShredder.cpp | 7 +++++-- main.ml | 23 +++++++++++++++++++++++ tree.ml | 4 ++++ tree.mli | 5 +++++ 5 files changed, 40 insertions(+), 4 deletions(-) diff --git a/OCamlDriver.cpp b/OCamlDriver.cpp index 21e5398..7a97e18 100644 --- a/OCamlDriver.cpp +++ b/OCamlDriver.cpp @@ -255,13 +255,14 @@ extern "C" value caml_text_collection_count_lessthan(value tree,value str){ } static value sort_alloc_array(std::vector results, value resarray){ - std::sort(results.begin(), results.end(), docId_comp); + std::sort(results.begin(), results.end(), docId_comp); size_t s = results.size(); resarray = caml_alloc_tuple(s); for (size_t i = 0; i < s ;i++){ caml_initialize(&Field(resarray,i),Val_int(results[i])); }; - return resarray; + return resarray; + } /** diff --git a/XMLDocShredder.cpp b/XMLDocShredder.cpp index 7f7d408..cb70e19 100644 --- a/XMLDocShredder.cpp +++ b/XMLDocShredder.cpp @@ -102,8 +102,11 @@ void XMLDocShredder::processStartElement() // fetch element name; this will be the full qualified name ustring name = reader_->get_name(); bool empty = false; - - tb->NewOpenTag(name); + size_t found = name.find_first_of(':'); + if (found == ustring::npos) + tb->NewOpenTag(name); + else + tb->NewOpenTag(name.substr(found+1,name.length() - found - 1)); /* We must be really carefull here. calling process attributes moves the document pointer on the last attribute, hence calling reader_->is_empty diff --git a/main.ml b/main.ml index 0650b6b..f440b3a 100644 --- a/main.ml +++ b/main.ml @@ -54,6 +54,28 @@ let test_loop2 tree tag = Hashtbl.add f (hash 101) `Foo; g t' Tree.root +let test_text doc = + let _ = Printf.eprintf "Contains(bree)" in + let _ = time (Tree.test_contains doc) "bree" in + let _ = Printf.eprintf "Contains(brain)" in + let _ = time (Tree.test_contains doc) "brain" in + let _ = Printf.eprintf "Contains(brain)" in + let i = time (Tree.test_contains doc) "brain" in + let _ = Printf.eprintf "%i\nContains(Australia)" i in + let i = time (Tree.test_contains doc) "AUSTRALIA" in + let _ = Printf.eprintf "%i\n Contains(1930)" i in + let i = time (Tree.test_contains doc) "1930" in + let _ = Printf.eprintf "%i\n startswith(bar)" i in + let i = time (Tree.test_prefix doc) "bar" in + let _ = Printf.eprintf "%i\n endswith(LAND)" i in + let i = time (Tree.test_suffix doc) "LAND" in + let _ = Printf.eprintf "%i\n =(2001)" i in + let i = time (Tree.test_equals doc) "2001" in + let _ = Printf.eprintf "%i\n =(Nguyen)" i in + let i = time (Tree.test_equals doc) "Nguyen" in + Printf.eprintf "%i\n" i ; + () + type pointers external build_pointers : Tree.t -> pointers = "caml_build_pointers" external iter_pointers : pointers -> int = "caml_iter_pointers" @@ -71,6 +93,7 @@ let main v query_string output = Ulexer.Loc.Exc_located ((x,y),e) -> Printf.eprintf "character %i-%i %s\n" x y (Printexc.to_string e);exit 1 in let _ = Printf.eprintf "Number of nodes %i\n%!" (Tree.size v) in + let _ = test_text v in (* let _ = Tree.stats v in let _ = Printf.eprintf "Timing first_child/next_sibling %!" in let _ = time (Tree.benchmark_fcns) v in diff --git a/tree.ml b/tree.ml index a034636..0a1bdda 100644 --- a/tree.ml +++ b/tree.ml @@ -788,3 +788,7 @@ let stats t = +let test_prefix t s = Array.length (text_prefix t.doc s) +let test_suffix t s = Array.length (text_suffix t.doc s) +let test_contains t s = Array.length (text_contains t.doc s) +let test_equals t s = Array.length (text_equals t.doc s) diff --git a/tree.mli b/tree.mli index 4a8f12c..280d97d 100644 --- a/tree.mli +++ b/tree.mli @@ -91,3 +91,8 @@ val benchmark_jump : t -> Tag.t -> unit val benchmark_fcns : t -> unit val benchmark_lcps : t -> unit val stats : t -> unit + +val test_suffix : t -> string -> int +val test_prefix : t -> string -> int +val test_equals : t -> string -> int +val test_contains : t -> string -> int -- 2.17.1