Add a command line option to disable the indexing of ignorable whitespaces.
authorKim Nguyễn <kn@lri.fr>
Wed, 2 May 2012 12:26:42 +0000 (14:26 +0200)
committerKim Nguyễn <kn@lri.fr>
Wed, 2 May 2012 12:27:19 +0000 (14:27 +0200)
src/options.ml
src/tree.ml
tests/perf_tests/xmark_10.xml_timing.83e9f9d8f219 [new file with mode: 0644]

index cec31bc..0c7c92d 100644 (file)
@@ -1,7 +1,7 @@
 open Utils
 open Format
 
-let index_empty_texts = ref false
+let index_empty_texts = ref true
 let sample_factor = ref 64
 let disable_text_collection = ref false
 let tc_threshold = ref 60000
@@ -67,8 +67,8 @@ let spec = Arg.align
     "-f", Arg.Set_int(sample_factor),
     "<n> sample factor [default=64]";
 
-    "-i", Arg.Set(index_empty_texts),
-    " index empty texts [default=false]";
+    "-ne", Arg.Clear(index_empty_texts),
+    " don't index empty texts [default=index]";
 
     "-d", Arg.Set(disable_text_collection),
     " disable text collection[default=false]";
index df72de7..2b8c1aa 100644 (file)
@@ -79,11 +79,13 @@ struct
   let do_text b t =
     if Buffer.length t > 0 then begin
       let s = Buffer.contents t in
-      begin
-       open_tag b "<$>";
-       text b s;
-       close_tag b "<$>";
-      end;
+      if (!Options.index_empty_texts) || not (is_whitespace s) then
+       begin
+         open_tag b "<$>";
+         Printf.eprintf "Inserting >>%s<<\n" s;
+         text b s;
+         close_tag b "<$>";
+       end;
       Buffer.clear t
     end
 
diff --git a/tests/perf_tests/xmark_10.xml_timing.83e9f9d8f219 b/tests/perf_tests/xmark_10.xml_timing.83e9f9d8f219
new file mode 100644 (file)
index 0000000..a769340
--- /dev/null
@@ -0,0 +1,4 @@
+/child::site/child::regions
+Execution time: 1.353979ms
+Number of results: 1
+/child::site/child::closed_auctions