From 02ce6de12b5662837918ca9bbbcf62c14b04b6d7 Mon Sep 17 00:00:00 2001 From: JINO-ROHIT Date: Tue, 10 Dec 2024 17:52:58 +0530 Subject: [PATCH 1/4] [DRAFT]tests for nanobeir evaluator --- tests/test_nanobeir_evaluator.py | 43 ++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/test_nanobeir_evaluator.py diff --git a/tests/test_nanobeir_evaluator.py b/tests/test_nanobeir_evaluator.py new file mode 100644 index 000000000..49780999c --- /dev/null +++ b/tests/test_nanobeir_evaluator.py @@ -0,0 +1,43 @@ +from __future__ import annotations + +import pytest + +from sentence_transformers import SentenceTransformer +from sentence_transformers.evaluation import NanoBEIREvaluator + + +def test_nanobeir_evaluator(): + """Tests that the NanoBERTEvaluator can be loaded and produces expected metrics""" + datasets = ["QuoraRetrieval", "MSMARCO"] + query_prompts = { + "QuoraRetrieval": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ", + "MSMARCO": "Instruct: Given a web search query, retrieve relevant passages that answer the query\\nQuery: " + } + + model = SentenceTransformer("sentence-transformers-testing/stsb-bert-tiny-safetensors") + + evaluator = NanoBEIREvaluator( + dataset_names=datasets, + query_prompts=query_prompts, + ) + + results = evaluator(model) + + assert len(results) > 0 + assert all(isinstance(results[metric], float) for metric in results) + +# def test_nanobeir_evaluator_with_invalid_dataset(): +# """Test that NanoBEIREvaluator raises an error for invalid dataset names.""" +# invalid_datasets = ["invalidDataset"] + +# with pytest.raises(ValueError, match=f"Dataset(s) {invalid_datasets} not found in the NanoBEIR collection.Valid dataset names are: ['climatefever', 'dbpedia', 'fever', 'fiqa2018', 'hotpotqa', 'msmarco', 'nfcorpus', 'nq', 'quoraretrieval', 'scidocs', 'arguana', 'scifact', 'touche2020']"): +# NanoBEIREvaluator( +# dataset_names=invalid_datasets, +# ) + +# def test_nanobeir_evaluator_empty_inputs(): +# """Test that NanoBEIREvaluator behaves correctly with empty datasets.""" +# with pytest.raises(ValueError, match="dataset_names cannot be empty. Use None to evaluate on all datasets."): +# NanoBEIREvaluator( +# dataset_names=[], +# ) From 21757efa65a6dee371db282f16e5e761e031c02b Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Fri, 10 Jan 2025 13:23:28 +0100 Subject: [PATCH 2/4] Move evaluator to evaluation folder --- tests/{ => evaluation}/test_nanobeir_evaluator.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) rename tests/{ => evaluation}/test_nanobeir_evaluator.py (96%) diff --git a/tests/test_nanobeir_evaluator.py b/tests/evaluation/test_nanobeir_evaluator.py similarity index 96% rename from tests/test_nanobeir_evaluator.py rename to tests/evaluation/test_nanobeir_evaluator.py index 49780999c..27bced447 100644 --- a/tests/test_nanobeir_evaluator.py +++ b/tests/evaluation/test_nanobeir_evaluator.py @@ -1,7 +1,5 @@ from __future__ import annotations -import pytest - from sentence_transformers import SentenceTransformer from sentence_transformers.evaluation import NanoBEIREvaluator @@ -11,7 +9,7 @@ def test_nanobeir_evaluator(): datasets = ["QuoraRetrieval", "MSMARCO"] query_prompts = { "QuoraRetrieval": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ", - "MSMARCO": "Instruct: Given a web search query, retrieve relevant passages that answer the query\\nQuery: " + "MSMARCO": "Instruct: Given a web search query, retrieve relevant passages that answer the query\\nQuery: ", } model = SentenceTransformer("sentence-transformers-testing/stsb-bert-tiny-safetensors") @@ -26,6 +24,7 @@ def test_nanobeir_evaluator(): assert len(results) > 0 assert all(isinstance(results[metric], float) for metric in results) + # def test_nanobeir_evaluator_with_invalid_dataset(): # """Test that NanoBEIREvaluator raises an error for invalid dataset names.""" # invalid_datasets = ["invalidDataset"] From c837a69f9a17679a0f4f19f343b7f3545229e389 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Fri, 10 Jan 2025 13:35:26 +0100 Subject: [PATCH 3/4] Reintroduce the nice "raises" tests --- tests/evaluation/test_nanobeir_evaluator.py | 33 +++++++++++++-------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/tests/evaluation/test_nanobeir_evaluator.py b/tests/evaluation/test_nanobeir_evaluator.py index 27bced447..6f7f04cdf 100644 --- a/tests/evaluation/test_nanobeir_evaluator.py +++ b/tests/evaluation/test_nanobeir_evaluator.py @@ -1,5 +1,9 @@ from __future__ import annotations +import re + +import pytest + from sentence_transformers import SentenceTransformer from sentence_transformers.evaluation import NanoBEIREvaluator @@ -25,18 +29,21 @@ def test_nanobeir_evaluator(): assert all(isinstance(results[metric], float) for metric in results) -# def test_nanobeir_evaluator_with_invalid_dataset(): -# """Test that NanoBEIREvaluator raises an error for invalid dataset names.""" -# invalid_datasets = ["invalidDataset"] +def test_nanobeir_evaluator_with_invalid_dataset(): + """Test that NanoBEIREvaluator raises an error for invalid dataset names.""" + invalid_datasets = ["invalidDataset"] + + with pytest.raises( + ValueError, + match=re.escape( + r"Dataset(s) ['invalidDataset'] not found in the NanoBEIR collection." + r"Valid dataset names are: ['climatefever', 'dbpedia', 'fever', 'fiqa2018', 'hotpotqa', 'msmarco', 'nfcorpus', 'nq', 'quoraretrieval', 'scidocs', 'arguana', 'scifact', 'touche2020']" + ), + ): + NanoBEIREvaluator(dataset_names=invalid_datasets) -# with pytest.raises(ValueError, match=f"Dataset(s) {invalid_datasets} not found in the NanoBEIR collection.Valid dataset names are: ['climatefever', 'dbpedia', 'fever', 'fiqa2018', 'hotpotqa', 'msmarco', 'nfcorpus', 'nq', 'quoraretrieval', 'scidocs', 'arguana', 'scifact', 'touche2020']"): -# NanoBEIREvaluator( -# dataset_names=invalid_datasets, -# ) -# def test_nanobeir_evaluator_empty_inputs(): -# """Test that NanoBEIREvaluator behaves correctly with empty datasets.""" -# with pytest.raises(ValueError, match="dataset_names cannot be empty. Use None to evaluate on all datasets."): -# NanoBEIREvaluator( -# dataset_names=[], -# ) +def test_nanobeir_evaluator_empty_inputs(): + """Test that NanoBEIREvaluator behaves correctly with empty datasets.""" + with pytest.raises(ValueError, match="dataset_names cannot be empty. Use None to evaluate on all datasets."): + NanoBEIREvaluator(dataset_names=[]) From a612c56532836034f81dbc8979b6e85169fb0895 Mon Sep 17 00:00:00 2001 From: Tom Aarsen Date: Fri, 10 Jan 2025 13:37:04 +0100 Subject: [PATCH 4/4] Update test because we fixed a space between sentences in master --- tests/evaluation/test_nanobeir_evaluator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/evaluation/test_nanobeir_evaluator.py b/tests/evaluation/test_nanobeir_evaluator.py index 6f7f04cdf..cebe7bee7 100644 --- a/tests/evaluation/test_nanobeir_evaluator.py +++ b/tests/evaluation/test_nanobeir_evaluator.py @@ -36,7 +36,7 @@ def test_nanobeir_evaluator_with_invalid_dataset(): with pytest.raises( ValueError, match=re.escape( - r"Dataset(s) ['invalidDataset'] not found in the NanoBEIR collection." + r"Dataset(s) ['invalidDataset'] not found in the NanoBEIR collection. " r"Valid dataset names are: ['climatefever', 'dbpedia', 'fever', 'fiqa2018', 'hotpotqa', 'msmarco', 'nfcorpus', 'nq', 'quoraretrieval', 'scidocs', 'arguana', 'scifact', 'touche2020']" ), ):