From: Kim Nguyễn Date: Wed, 30 May 2012 12:17:47 +0000 (+0200) Subject: Add tests for wordbased index. X-Git-Url: http://git.nguyen.vg/gitweb/?p=SXSI%2Fxpathcomp.git;a=commitdiff_plain;h=2e74356449e2ea3ab50ccb96ec81e4cf657bae5f Add tests for wordbased index. --- diff --git a/tests/non_regression_tests/medline.srx b/tests/non_regression_tests/medline.srx deleted file mode 120000 index b38b5e2..0000000 --- a/tests/non_regression_tests/medline.srx +++ /dev/null @@ -1 +0,0 @@ -../docs/medline.srx \ No newline at end of file diff --git a/tests/non_regression_tests/medline_full.srx b/tests/non_regression_tests/medline_full.srx new file mode 120000 index 0000000..7dd02be --- /dev/null +++ b/tests/non_regression_tests/medline_full.srx @@ -0,0 +1 @@ +/raid0/kn/docs/medline/xml/medline_full_wordbased.srx \ No newline at end of file diff --git a/tests/non_regression_tests/medline_full.xml.queries b/tests/non_regression_tests/medline_full.xml.queries new file mode 100644 index 0000000..953dd4a --- /dev/null +++ b/tests/non_regression_tests/medline_full.xml.queries @@ -0,0 +1,10 @@ +W1%/descendant::Article[ descendant::AbstractText[ contains ( . , "blood sample") ] ]%/descendant::Article/descendant::AbstractText[ . ftcontains "blood sample" all words ordered ] +W2%/descendant::Article[ descendant::AbstractText[ contains ( . , "is such that") ] ]%/descendant::Article/descendant::AbstractText[ . ftcontains "is such that" all words ordered ] +W3%/descendant::Article[ descendant::AbstractText[ contains( . , "various types of") and contains( . , "immune cells") ] ]%/descendant::Article[descendant::AbstractText[ . ftcontains "various types of" all words ordered ftand "immune cells" all words ordered ]] +W4%/descendant::Article[ descendant::AbstractText[ contains( . , "of the bone marrow") ] ]%/descendant::Article[descendant::AbstractText[ . ftcontains "of the bone marrow" all words ordered ]] +W5%/descendant::Article[ descendant::AbstractText[ contains( . , "cell") and not(contains( ., "blood")) ] ]%/descendant::Article[descendant::AbstractText[ . ftcontains "cell" ftand ftnot "blood" ]] +#W5%/descendant::MedlineCitation/child::Article/child::AuthorList/child::Author[ child::LastName[starts-with( ., "Bar")]]%/descendant::MedlineCitation/child::Article/child::AuthorList/child::Author[ child::LastName ftcontains "Bar" at start ] +#W6%/descendant::*[ descendant::LastName[ contains( ., "Nguyen") ] ]%/descendant::*[ descendant::LastName ftcontains "Nguyen" entire content ] +#W7%/descendant::*/descendant::*[ contains( ., "epididymis") ]%/descendant::*/descendant::*[ . ftcontains "epididymis" ] +#W8%/descendant::*[ descendant::PublicationType[ ends-with( ., "Article") ]]%/descendant::*[ descendant::PublicationType ftcontains "Article" at end ] +#W9%/descendant::MedlineCitation[ descendant::Country[ contains( ., "AUSTRALIA") ] ]%/descendant::MedlineCitation[ descendant::Country ftcontains "AUSTRALIA" ] diff --git a/tests/non_regression_tests/medline_full.xml.queries.old b/tests/non_regression_tests/medline_full.xml.queries.old new file mode 100644 index 0000000..5ad5e3c --- /dev/null +++ b/tests/non_regression_tests/medline_full.xml.queries.old @@ -0,0 +1,9 @@ +M1%/descendant::Article[ descendant::AbstractText[ contains (., "foot") or contains( ., "feet") ] ] +M2%/descendant::Article[ descendant::AbstractText[ contains ( . , "plus") ] ] +M3%/descendant::Article[ descendant::AbstractText[ contains ( . , "plus") or contains ( . , "for") ] ] +M4%/descendant::Article[ descendant::AbstractText[ contains ( . , "plus") and not(contains ( . , "for")) ] ] +M5%/descendant::MedlineCitation/child::Article/child::AuthorList/child::Author[ child::LastName[starts-with( ., "Bar")]] +M6%/descendant::*[ descendant::LastName[ contains( ., "Nguyen") ] ] +M7%/descendant::*/descendant::*[ contains( ., "epididymis") ] +M8%/descendant::*[ descendant::PublicationType[ ends-with( ., "Article") ]] +M9%/descendant::MedlineCitation[ descendant::Country[ contains( ., "AUSTRALIA") ] ] diff --git a/tests/non_regression_tests/monet.sh b/tests/non_regression_tests/monet.sh index d1e2c35..b662aa2 100755 --- a/tests/non_regression_tests/monet.sh +++ b/tests/non_regression_tests/monet.sh @@ -50,6 +50,8 @@ $tquery" NUM_RESULTS="$num" done + if [ -z "$4" ] + then for i in `seq 1 "$repeat"` do { @@ -69,7 +71,10 @@ $tquery" $tprint" done - + else + TIME_MAT="999999" + TIME_PRINT="0" + fi while pidof Mserver >/dev/null do diff --git a/tests/non_regression_tests/qizx.sh b/tests/non_regression_tests/qizx.sh index 2226239..763ed53 100755 --- a/tests/non_regression_tests/qizx.sh +++ b/tests/non_regression_tests/qizx.sh @@ -5,7 +5,7 @@ source utils.sh function stop_qizx() { sleep 2 - rm -rf mat.xq count.xq +# rm -rf mat.xq count.xq QIZXPIDS=`ps xwww --format "%p,%a" | grep java | grep qizx | cut -f 1 -d ' '` for i in $QIZXPIDS do @@ -40,6 +40,7 @@ function do_qizx() { query="$1" doc=`basename "$2"` repeat="$3" + count="$4" echo 'let $doc := collection("'"$doc"'") return count($doc'"$query"')' > count.xq echo 'let $doc := collection("'"$doc"'") return $doc'"$query" > mat.xq @@ -101,12 +102,15 @@ function do_qizx() { then break fi - done + done IT=0 - while true - do - OUTPUT=`$QIZX -g /raid0/kn/qizxlib/ -l xmark -r "$repeat" mat.xq -out /dev/null 2>&1` + if [ -z "$count" ] + then + + while true + do + OUTPUT=`$QIZX -g /raid0/kn/qizxlib/ -l xmark -r "$repeat" mat.xq -out /dev/null 2>&1` if echo "$OUTPUT" | grep -q 'java' >/dev/null 2>&1 then if [ "$IT" = 5 ] @@ -135,6 +139,9 @@ function do_qizx() { break fi done + else + time_mat="999999" + fi echo $time_count echo 0 diff --git a/tests/non_regression_tests/sxsi.sh b/tests/non_regression_tests/sxsi.sh index 0ef26c4..6221e83 100755 --- a/tests/non_regression_tests/sxsi.sh +++ b/tests/non_regression_tests/sxsi.sh @@ -4,6 +4,7 @@ source utils.sh function do_sxsi() { query="$1" + count="$4" doc=`basename "$2" .xml`.srx repeat="$3" TIME_MAT="" @@ -15,12 +16,17 @@ function do_sxsi() { NUM_RESULT=`echo "$output" | grep "Number of results" | grep -o '[0-9]*'` time=`echo "$output" | grep "Execution time" | cut -f 2 -d',' | cut -f1 -d ':' | grep -o '[0-9.]*'` TIME_COUNT=`echo "$time" | tail -n +2 | average` - - output=`./main.native -b -r "$3" "$doc" "$query" /dev/null 2>&1` - mtime=`echo "$output" | grep "Execution time" | cut -f 2 -d',' | cut -f1 -d ':'| grep -o '[0-9.]*'` - ptime=`echo "$output" | grep 'Serialization time' | cut -f 3 -d ' ' | grep -o '[0-9.]*'` - TIME_MAT=`echo "$mtime" | tail -n +2 | average` - TIME_PRINT="$ptime" + if [ -z "$count" ] + then + output=`./main.native -b -r "$3" "$doc" "$query" /dev/null 2>&1` + mtime=`echo "$output" | grep "Execution time" | cut -f 2 -d',' | cut -f1 -d ':'| grep -o '[0-9.]*'` + ptime=`echo "$output" | grep 'Serialization time' | cut -f 3 -d ' ' | grep -o '[0-9.]*'` + TIME_MAT=`echo "$mtime" | tail -n +2 | average` + TIME_PRINT="$ptime" + else + TIME_MAT="999999" + TIME_PRINT="0" + fi echo "$TIME_COUNT" echo "$TIME_MAT" diff --git a/tests/non_regression_tests/test.sh b/tests/non_regression_tests/test.sh index c02a2be..40ddcd4 100755 --- a/tests/non_regression_tests/test.sh +++ b/tests/non_regression_tests/test.sh @@ -34,13 +34,13 @@ then repeat_sxsi=1 repeat_qizx=2 else - repeat_monet=10 + repeat_monet=1 repeat_sxsi=10 repeat_qizx=4 fi -for TESTDOC in "xmark_10.xml" #"medline.xml" "xmark_01.04.xml" +for TESTDOC in medline_full.xml #"xmark_10.xml" #"medline.xml" "xmark_01.04.xml" do echo echo "$TESTDOC" @@ -68,7 +68,7 @@ do read sxsi_mat_time read sxsi_print_time read sxsi_count - } < <(`pwd`/sxsi.sh "$query" "$TESTDOC" "$repeat_sxsi") + } < <(`pwd`/sxsi.sh "$query" "$TESTDOC" "$repeat_sxsi" "$2") echo " ok" echo -n Running MonetBD/XQuery @@ -86,7 +86,7 @@ do read monet_mat_time read monet_print_time read monet_count - } < <(`pwd`/monet.sh "$query" "$TESTDOC" "$repeat_monet") + } < <(`pwd`/monet.sh "$query" "$TESTDOC" "$repeat_monet" "$2") fi echo " ok" @@ -97,7 +97,7 @@ do read qizx_mat_time read qizx_print_time read qizx_count - } < <(`pwd`/qizx.sh "$query_qizx" "$TESTDOC" "$repeat_qizx") + } < <(`pwd`/qizx.sh "$query_qizx" "$TESTDOC" "$repeat_qizx" "$2") echo " ok"