From 910cacabe6ab4c30099c4f745205a6f05dacbd39 Mon Sep 17 00:00:00 2001 From: Dario Lopez Padial Date: Thu, 8 Feb 2024 20:53:18 +0100 Subject: [PATCH 1/9] evaluating several spanish embedding models --- evaluation/embeddings_model/mteb_benchmark.py | 21 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../PlanTL-GOB-ES/roberta-base-bne/STS17.json | 22 ++++++++++++++++ .../PlanTL-GOB-ES/roberta-base-bne/STS22.json | 22 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../STS17.json | 22 ++++++++++++++++ .../STS22.json | 22 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../STS17.json | 22 ++++++++++++++++ .../STS22.json | 22 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../sentence_similarity_spanish_es/STS17.json | 22 ++++++++++++++++ .../sentence_similarity_spanish_es/STS22.json | 22 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../intfloat/multilingual-e5-large/STS17.json | 22 ++++++++++++++++ .../intfloat/multilingual-e5-large/STS22.json | 22 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../STS17.json | 22 ++++++++++++++++ .../STS22.json | 22 ++++++++++++++++ requirements.txt | 1 + 44 files changed, 1036 insertions(+) create mode 100644 evaluation/embeddings_model/mteb_benchmark.py create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/STS17.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/STS22.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/STS17.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/STS22.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/STS17.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/STS22.json create mode 100644 evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/STS17.json create mode 100644 evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/STS22.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/STS17.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/STS22.json create mode 100644 evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS17.json create mode 100644 evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json diff --git a/evaluation/embeddings_model/mteb_benchmark.py b/evaluation/embeddings_model/mteb_benchmark.py new file mode 100644 index 0000000..8d3d855 --- /dev/null +++ b/evaluation/embeddings_model/mteb_benchmark.py @@ -0,0 +1,21 @@ +from mteb import MTEB +from sentence_transformers import SentenceTransformer + + +# https://github.com/embeddings-benchmark/mteb + + +# TODO: write results on model cards huggingface +# Define the sentence-transformers model name +# model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn" +# model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es" +# model_name = "PlanTL-GOB-ES/roberta-base-bne" + +# model_name = "hiiamsid/sentence_similarity_spanish_es" +model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" +# model_name = "intfloat/multilingual-e5-large" + + +model = SentenceTransformer(model_name) +evaluation = MTEB(task_langs=["es"]) +results = evaluation.run(model, output_folder=f"results/{model_name}") diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/AmazonReviewsClassification.json new file mode 100644 index 0000000..d1cfec4 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.34144, + "accuracy_stderr": 0.016988419585117386, + "f1": 0.33636678117905106, + "f1_stderr": 0.013519205351623978, + "main_score": 0.34144 + }, + "evaluation_time": 100.22 + }, + "validation": { + "es": { + "accuracy": 0.33520000000000005, + "accuracy_stderr": 0.013966817819388924, + "f1": 0.330814637640804, + "f1_stderr": 0.010710885426829122, + "main_score": 0.33520000000000005 + }, + "evaluation_time": 100.33 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MTOPDomainClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MTOPDomainClassification.json new file mode 100644 index 0000000..b1a21a2 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.7890593729152768, + "accuracy_stderr": 0.018794435810413904, + "f1": 0.7876848449116528, + "f1_stderr": 0.017456518212651397, + "main_score": 0.7890593729152768 + }, + "evaluation_time": 28.83 + }, + "validation": { + "es": { + "accuracy": 0.7865749836280289, + "accuracy_stderr": 0.02095295080769619, + "f1": 0.7891430481808399, + "f1_stderr": 0.019727057188543985, + "main_score": 0.7865749836280289 + }, + "evaluation_time": 19.34 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MTOPIntentClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MTOPIntentClassification.json new file mode 100644 index 0000000..948b4c9 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6316877918612408, + "accuracy_stderr": 0.01618058056575705, + "f1": 0.40606514595674226, + "f1_stderr": 0.008349574016439362, + "main_score": 0.6316877918612408 + }, + "evaluation_time": 89.32 + }, + "validation": { + "es": { + "accuracy": 0.644204322200393, + "accuracy_stderr": 0.018306294524708992, + "f1": 0.4008335277417391, + "f1_stderr": 0.015864741206238393, + "main_score": 0.644204322200393 + }, + "evaluation_time": 80.09 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MassiveIntentClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MassiveIntentClassification.json new file mode 100644 index 0000000..16ace86 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.5029926025554808, + "accuracy_stderr": 0.0127299316323427, + "f1": 0.4743998239333547, + "f1_stderr": 0.014234461765030411, + "main_score": 0.5029926025554808 + }, + "evaluation_time": 63.7 + }, + "validation": { + "es": { + "accuracy": 0.5098868666994589, + "accuracy_stderr": 0.01210714080560374, + "f1": 0.4804865677543891, + "f1_stderr": 0.013410987834272919, + "main_score": 0.5098868666994589 + }, + "evaluation_time": 55.74 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MassiveScenarioClassification.json new file mode 100644 index 0000000..1b2abe4 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.5482178883658372, + "accuracy_stderr": 0.019817617388854396, + "f1": 0.5326008604217796, + "f1_stderr": 0.01830565953868972, + "main_score": 0.5482178883658372 + }, + "evaluation_time": 28.95 + }, + "validation": { + "es": { + "accuracy": 0.536694540088539, + "accuracy_stderr": 0.028431954052779713, + "f1": 0.526995563025894, + "f1_stderr": 0.027563486339162634, + "main_score": 0.536694540088539 + }, + "evaluation_time": 23.03 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/STS17.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/STS17.json new file mode 100644 index 0000000..dca2df3 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.6571868805745334, + "spearman": 0.7074876873146115 + }, + "euclidean": { + "pearson": 0.6784231601234936, + "spearman": 0.7060150120314596 + }, + "manhattan": { + "pearson": 0.6838790384087841, + "spearman": 0.712420763445709 + } + }, + "evaluation_time": 3.11 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/STS22.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/STS22.json new file mode 100644 index 0000000..c3ccb2a --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/roberta-base-bne/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.14297354292597228, + "spearman": 0.48236753079813854 + }, + "euclidean": { + "pearson": 0.28919439095575666, + "spearman": 0.4845776660733576 + }, + "manhattan": { + "pearson": 0.29897180519935346, + "spearman": 0.48815426704531506 + } + }, + "evaluation_time": 74.71 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/AmazonReviewsClassification.json new file mode 100644 index 0000000..a30f4a0 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.28186, + "accuracy_stderr": 0.017661041871871543, + "f1": 0.2806067493281096, + "f1_stderr": 0.01775108480910862, + "main_score": 0.28186 + }, + "evaluation_time": 93.01 + }, + "validation": { + "es": { + "accuracy": 0.28406, + "accuracy_stderr": 0.014107884320478386, + "f1": 0.28246198081890056, + "f1_stderr": 0.014232525443229387, + "main_score": 0.28406 + }, + "evaluation_time": 91.96 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MTOPDomainClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MTOPDomainClassification.json new file mode 100644 index 0000000..25b2fa9 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.7927284856571047, + "accuracy_stderr": 0.014499662991338199, + "f1": 0.7831712189404422, + "f1_stderr": 0.011730917631352086, + "main_score": 0.7927284856571047 + }, + "evaluation_time": 26.76 + }, + "validation": { + "es": { + "accuracy": 0.8023575638506877, + "accuracy_stderr": 0.014858102808007794, + "f1": 0.8024051617881145, + "f1_stderr": 0.01452340761132967, + "main_score": 0.8023575638506877 + }, + "evaluation_time": 17.76 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MTOPIntentClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MTOPIntentClassification.json new file mode 100644 index 0000000..c3bcee0 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.5307538358905938, + "accuracy_stderr": 0.017212120729854646, + "f1": 0.3579310659827192, + "f1_stderr": 0.011572431982612487, + "main_score": 0.5307538358905938 + }, + "evaluation_time": 82.73 + }, + "validation": { + "es": { + "accuracy": 0.5364767518009168, + "accuracy_stderr": 0.0202500569378784, + "f1": 0.31238300055355966, + "f1_stderr": 0.012765541417361052, + "main_score": 0.5364767518009168 + }, + "evaluation_time": 74.18 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MassiveIntentClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MassiveIntentClassification.json new file mode 100644 index 0000000..0c34756 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.5474781439139207, + "accuracy_stderr": 0.014695053508966507, + "f1": 0.5388416786451913, + "f1_stderr": 0.011391899035244148, + "main_score": 0.5474781439139207 + }, + "evaluation_time": 58.9 + }, + "validation": { + "es": { + "accuracy": 0.5647319232661092, + "accuracy_stderr": 0.01970785559946992, + "f1": 0.5528968514715037, + "f1_stderr": 0.011608052194429121, + "main_score": 0.5647319232661092 + }, + "evaluation_time": 56.98 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MassiveScenarioClassification.json new file mode 100644 index 0000000..108cd52 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6311365164761265, + "accuracy_stderr": 0.019017462978698926, + "f1": 0.6335648828399776, + "f1_stderr": 0.01624440039840478, + "main_score": 0.6311365164761265 + }, + "evaluation_time": 27.45 + }, + "validation": { + "es": { + "accuracy": 0.6377274963108706, + "accuracy_stderr": 0.02220489522288182, + "f1": 0.6427239311554347, + "f1_stderr": 0.021055866780710124, + "main_score": 0.6377274963108706 + }, + "evaluation_time": 21.66 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/STS17.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/STS17.json new file mode 100644 index 0000000..276d318 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.7935023671448772, + "spearman": 0.8138426859678183 + }, + "euclidean": { + "pearson": 0.798022745583715, + "spearman": 0.7969494579586819 + }, + "manhattan": { + "pearson": 0.7984973437956621, + "spearman": 0.798509079048555 + } + }, + "evaluation_time": 2.68 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/STS22.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/STS22.json new file mode 100644 index 0000000..ef242a8 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.6113034593400181, + "spearman": 0.5948046751699049 + }, + "euclidean": { + "pearson": 0.6224913312842192, + "spearman": 0.5999849091787655 + }, + "manhattan": { + "pearson": 0.6249518846355359, + "spearman": 0.5997154342133152 + } + }, + "evaluation_time": 64.23 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/AmazonReviewsClassification.json new file mode 100644 index 0000000..95e65dc --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.31126, + "accuracy_stderr": 0.021366525220540653, + "f1": 0.3083336989666412, + "f1_stderr": 0.021891648956010274, + "main_score": 0.31126 + }, + "evaluation_time": 107.25 + }, + "validation": { + "es": { + "accuracy": 0.31274, + "accuracy_stderr": 0.024424913510594046, + "f1": 0.3097240980379627, + "f1_stderr": 0.02527386766754536, + "main_score": 0.31274 + }, + "evaluation_time": 101.4 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MTOPDomainClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MTOPDomainClassification.json new file mode 100644 index 0000000..6eb42c6 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6983655770513677, + "accuracy_stderr": 0.01705774648629189, + "f1": 0.6954179269281322, + "f1_stderr": 0.014475827923814345, + "main_score": 0.6983655770513677 + }, + "evaluation_time": 28.81 + }, + "validation": { + "es": { + "accuracy": 0.7032743942370662, + "accuracy_stderr": 0.02680119186208919, + "f1": 0.7051810035522595, + "f1_stderr": 0.026870602586277877, + "main_score": 0.7032743942370662 + }, + "evaluation_time": 18.88 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MTOPIntentClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MTOPIntentClassification.json new file mode 100644 index 0000000..bdf64bc --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.4206470980653769, + "accuracy_stderr": 0.016341812730608463, + "f1": 0.28080057864363395, + "f1_stderr": 0.009827048392585197, + "main_score": 0.4206470980653769 + }, + "evaluation_time": 91.18 + }, + "validation": { + "es": { + "accuracy": 0.43575638506876224, + "accuracy_stderr": 0.014179904031318599, + "f1": 0.2657820980345229, + "f1_stderr": 0.010098206033812574, + "main_score": 0.43575638506876224 + }, + "evaluation_time": 81.01 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MassiveIntentClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MassiveIntentClassification.json new file mode 100644 index 0000000..73f8916 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.4548419636852724, + "accuracy_stderr": 0.012759738965487887, + "f1": 0.44885529357908005, + "f1_stderr": 0.008829485114224506, + "main_score": 0.4548419636852724 + }, + "evaluation_time": 65.03 + }, + "validation": { + "es": { + "accuracy": 0.46281357599606493, + "accuracy_stderr": 0.019077805007935426, + "f1": 0.44830835897009375, + "f1_stderr": 0.016809451715923032, + "main_score": 0.46281357599606493 + }, + "evaluation_time": 57.19 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MassiveScenarioClassification.json new file mode 100644 index 0000000..ff71335 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.5189979825151312, + "accuracy_stderr": 0.03456265894321546, + "f1": 0.5128912429896337, + "f1_stderr": 0.027600059301321533, + "main_score": 0.5189979825151312 + }, + "evaluation_time": 30.65 + }, + "validation": { + "es": { + "accuracy": 0.5210526315789473, + "accuracy_stderr": 0.031159244561808783, + "f1": 0.5181690893133215, + "f1_stderr": 0.024982169894008227, + "main_score": 0.5210526315789473 + }, + "evaluation_time": 24.03 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/STS17.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/STS17.json new file mode 100644 index 0000000..acd19ee --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.7289229131253915, + "spearman": 0.7471156922260873 + }, + "euclidean": { + "pearson": 0.7674848009173773, + "spearman": 0.7490253318999661 + }, + "manhattan": { + "pearson": 0.7632879736586534, + "spearman": 0.7413210498100508 + } + }, + "evaluation_time": 2.99 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/STS22.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/STS22.json new file mode 100644 index 0000000..d3e699b --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.5541274847731279, + "spearman": 0.5982212065438565 + }, + "euclidean": { + "pearson": 0.5984247086566415, + "spearman": 0.6007911312824832 + }, + "manhattan": { + "pearson": 0.6144051524166955, + "spearman": 0.600705222369792 + } + }, + "evaluation_time": 72.89 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/AmazonReviewsClassification.json new file mode 100644 index 0000000..f32faba --- /dev/null +++ b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.3508, + "accuracy_stderr": 0.02129976525692243, + "f1": 0.3443949403617718, + "f1_stderr": 0.01759914786553492, + "main_score": 0.3508 + }, + "evaluation_time": 96.97 + }, + "validation": { + "es": { + "accuracy": 0.34538, + "accuracy_stderr": 0.022516562792753245, + "f1": 0.3391582112075734, + "f1_stderr": 0.018063156348266843, + "main_score": 0.34538 + }, + "evaluation_time": 99.36 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MTOPDomainClassification.json b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MTOPDomainClassification.json new file mode 100644 index 0000000..da1f44d --- /dev/null +++ b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.8585723815877252, + "accuracy_stderr": 0.009637393496881178, + "f1": 0.8547711636074229, + "f1_stderr": 0.009801142326641072, + "main_score": 0.8585723815877252 + }, + "evaluation_time": 26.16 + }, + "validation": { + "es": { + "accuracy": 0.8573018991486576, + "accuracy_stderr": 0.013988831481303218, + "f1": 0.8568403693895226, + "f1_stderr": 0.014658470836623546, + "main_score": 0.8573018991486576 + }, + "evaluation_time": 17.41 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MTOPIntentClassification.json b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MTOPIntentClassification.json new file mode 100644 index 0000000..d00263c --- /dev/null +++ b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6520013342228153, + "accuracy_stderr": 0.01046009626954406, + "f1": 0.4574064160214279, + "f1_stderr": 0.012462086626021643, + "main_score": 0.6520013342228153 + }, + "evaluation_time": 82.96 + }, + "validation": { + "es": { + "accuracy": 0.6493123772102162, + "accuracy_stderr": 0.01208335770659423, + "f1": 0.4230617448088956, + "f1_stderr": 0.011546084342406499, + "main_score": 0.6493123772102162 + }, + "evaluation_time": 74.52 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MassiveIntentClassification.json b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MassiveIntentClassification.json new file mode 100644 index 0000000..7cf8263 --- /dev/null +++ b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.584969737726967, + "accuracy_stderr": 0.013962208387043002, + "f1": 0.5749275038392078, + "f1_stderr": 0.0109764564691608, + "main_score": 0.584969737726967 + }, + "evaluation_time": 58.33 + }, + "validation": { + "es": { + "accuracy": 0.5975405804230203, + "accuracy_stderr": 0.008188943933755735, + "f1": 0.5861008946633441, + "f1_stderr": 0.01052282467632772, + "main_score": 0.5975405804230203 + }, + "evaluation_time": 54.38 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MassiveScenarioClassification.json new file mode 100644 index 0000000..5a40c7d --- /dev/null +++ b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6421318090114324, + "accuracy_stderr": 0.013160169593034413, + "f1": 0.6423408330542625, + "f1_stderr": 0.01289265042810299, + "main_score": 0.6421318090114324 + }, + "evaluation_time": 26.8 + }, + "validation": { + "es": { + "accuracy": 0.6451549434333497, + "accuracy_stderr": 0.014303930132083758, + "f1": 0.6443104176023486, + "f1_stderr": 0.012370614510047168, + "main_score": 0.6451549434333497 + }, + "evaluation_time": 21.1 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/STS17.json b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/STS17.json new file mode 100644 index 0000000..57cfc97 --- /dev/null +++ b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.8569180483447304, + "spearman": 0.853683083252358 + }, + "euclidean": { + "pearson": 0.8479205681874118, + "spearman": 0.8421300986329777 + }, + "manhattan": { + "pearson": 0.8469377411674575, + "spearman": 0.8413845091113608 + } + }, + "evaluation_time": 2.72 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/STS22.json b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/STS22.json new file mode 100644 index 0000000..bcba764 --- /dev/null +++ b/evaluation/embeddings_model/results/hiiamsid/sentence_similarity_spanish_es/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.5178787163299071, + "spearman": 0.650708615141359 + }, + "euclidean": { + "pearson": 0.4552859487670042, + "spearman": 0.6101052727994778 + }, + "manhattan": { + "pearson": 0.47557787371752575, + "spearman": 0.6139638312284731 + } + }, + "evaluation_time": 57.11 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json new file mode 100644 index 0000000..9705040 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.42662000000000005, + "accuracy_stderr": 0.017049914955799626, + "f1": 0.4068365404065844, + "f1_stderr": 0.01382586222119599, + "main_score": 0.42662000000000005 + }, + "evaluation_time": 326.48 + }, + "validation": { + "es": { + "accuracy": 0.42425999999999997, + "accuracy_stderr": 0.016388788850918788, + "f1": 0.4056125153413965, + "f1_stderr": 0.012509405368921719, + "main_score": 0.42425999999999997 + }, + "evaluation_time": 353.78 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MTOPDomainClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MTOPDomainClassification.json new file mode 100644 index 0000000..a51b368 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.8994663108739157, + "accuracy_stderr": 0.012936802201346857, + "f1": 0.8951163764548161, + "f1_stderr": 0.01312004703792324, + "main_score": 0.8994663108739157 + }, + "evaluation_time": 80.89 + }, + "validation": { + "es": { + "accuracy": 0.8940406024885397, + "accuracy_stderr": 0.01782618235059498, + "f1": 0.8940253789536395, + "f1_stderr": 0.017708036561476393, + "main_score": 0.8940406024885397 + }, + "evaluation_time": 52.53 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MTOPIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MTOPIntentClassification.json new file mode 100644 index 0000000..8c6bccc --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6684122748499, + "accuracy_stderr": 0.017744693197759932, + "f1": 0.4286434253640251, + "f1_stderr": 0.00929097964002798, + "main_score": 0.6684122748499 + }, + "evaluation_time": 219.75 + }, + "validation": { + "es": { + "accuracy": 0.6719056974459725, + "accuracy_stderr": 0.020700802200529514, + "f1": 0.43339130114497265, + "f1_stderr": 0.012370933242521186, + "main_score": 0.6719056974459725 + }, + "evaluation_time": 187.55 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MassiveIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MassiveIntentClassification.json new file mode 100644 index 0000000..c92ab85 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6468056489576328, + "accuracy_stderr": 0.014676114294591824, + "f1": 0.6305387527880884, + "f1_stderr": 0.012490480700675826, + "main_score": 0.6468056489576328 + }, + "evaluation_time": 160.24 + }, + "validation": { + "es": { + "accuracy": 0.6552877520905067, + "accuracy_stderr": 0.010104114597311994, + "f1": 0.6203891094243168, + "f1_stderr": 0.011213322080177559, + "main_score": 0.6552877520905067 + }, + "evaluation_time": 127.37 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json new file mode 100644 index 0000000..3108e4a --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6885003362474781, + "accuracy_stderr": 0.015955430017300504, + "f1": 0.6805282214017474, + "f1_stderr": 0.017638966212523616, + "main_score": 0.6885003362474781 + }, + "evaluation_time": 91.32 + }, + "validation": { + "es": { + "accuracy": 0.6896704377766847, + "accuracy_stderr": 0.019058772115803528, + "f1": 0.6783973796362204, + "f1_stderr": 0.021594319708095513, + "main_score": 0.6896704377766847 + }, + "evaluation_time": 92.43 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/STS17.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/STS17.json new file mode 100644 index 0000000..ec35c76 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.8794572056845176, + "spearman": 0.8742189027425435 + }, + "euclidean": { + "pearson": 0.8739718678516306, + "spearman": 0.8742364503055847 + }, + "manhattan": { + "pearson": 0.8742652537534602, + "spearman": 0.8764501432578528 + } + }, + "evaluation_time": 11.11 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/STS22.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/STS22.json new file mode 100644 index 0000000..31b6078 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.6194194051731156, + "spearman": 0.6823157523696245 + }, + "euclidean": { + "pearson": 0.6467776806801534, + "spearman": 0.6823157523696245 + }, + "manhattan": { + "pearson": 0.6482066635828536, + "spearman": 0.6832827783355593 + } + }, + "evaluation_time": 221.89 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json new file mode 100644 index 0000000..d519b5a --- /dev/null +++ b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.39986, + "accuracy_stderr": 0.012456018625548055, + "f1": 0.3912505535737757, + "f1_stderr": 0.013236448747410179, + "main_score": 0.39986 + }, + "evaluation_time": 91.6 + }, + "validation": { + "es": { + "accuracy": 0.40171999999999997, + "accuracy_stderr": 0.017711962059579957, + "f1": 0.39407171808239566, + "f1_stderr": 0.013704627824214425, + "main_score": 0.40171999999999997 + }, + "evaluation_time": 92.84 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json new file mode 100644 index 0000000..fe53568 --- /dev/null +++ b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.8696464309539692, + "accuracy_stderr": 0.010700641656603945, + "f1": 0.8685603104400267, + "f1_stderr": 0.00883607408807341, + "main_score": 0.8696464309539692 + }, + "evaluation_time": 24.41 + }, + "validation": { + "es": { + "accuracy": 0.8741322855271776, + "accuracy_stderr": 0.009896257341120424, + "f1": 0.8764287824251873, + "f1_stderr": 0.008328855669008938, + "main_score": 0.8741322855271776 + }, + "evaluation_time": 16.84 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json new file mode 100644 index 0000000..9bede5b --- /dev/null +++ b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6659106070713809, + "accuracy_stderr": 0.01147955106551365, + "f1": 0.4556629396458189, + "f1_stderr": 0.008120826136162898, + "main_score": 0.6659106070713809 + }, + "evaluation_time": 74.85 + }, + "validation": { + "es": { + "accuracy": 0.6698755730189914, + "accuracy_stderr": 0.01814723523803665, + "f1": 0.4371930462578926, + "f1_stderr": 0.010185798603599118, + "main_score": 0.6698755730189914 + }, + "evaluation_time": 66.31 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json new file mode 100644 index 0000000..69b8305 --- /dev/null +++ b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6443174176193679, + "accuracy_stderr": 0.012155985792885286, + "f1": 0.6310097866043903, + "f1_stderr": 0.00951304551736477, + "main_score": 0.6443174176193679 + }, + "evaluation_time": 61.45 + }, + "validation": { + "es": { + "accuracy": 0.6472700442695524, + "accuracy_stderr": 0.015947131428677534, + "f1": 0.6369800505053479, + "f1_stderr": 0.012678240653370955, + "main_score": 0.6472700442695524 + }, + "evaluation_time": 54.1 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json new file mode 100644 index 0000000..dd71cdc --- /dev/null +++ b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.7041694687289846, + "accuracy_stderr": 0.018443289119139346, + "f1": 0.7106913080791534, + "f1_stderr": 0.017361087842520318, + "main_score": 0.7041694687289846 + }, + "evaluation_time": 25.3 + }, + "validation": { + "es": { + "accuracy": 0.7063453025086079, + "accuracy_stderr": 0.022839521428910776, + "f1": 0.7122182428764718, + "f1_stderr": 0.02260563584893285, + "main_score": 0.7063453025086079 + }, + "evaluation_time": 20.49 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS17.json b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS17.json new file mode 100644 index 0000000..6fd0ca3 --- /dev/null +++ b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.8412540596043537, + "spearman": 0.8514401623242552 + }, + "euclidean": { + "pearson": 0.8559436342707457, + "spearman": 0.8383497843599057 + }, + "manhattan": { + "pearson": 0.8559579145030306, + "spearman": 0.8381179176150355 + } + }, + "evaluation_time": 2.99 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json new file mode 100644 index 0000000..54708d5 --- /dev/null +++ b/evaluation/embeddings_model/results/sentence-transformers/paraphrase-multilingual-mpnet-base-v2/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.5750628871456058, + "spearman": 0.5991118690318434 + }, + "euclidean": { + "pearson": 0.5317953351885004, + "spearman": 0.5694079446385838 + }, + "manhattan": { + "pearson": 0.5358547184959347, + "spearman": 0.5708970324585654 + } + }, + "evaluation_time": 13.38 + } +} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dab0ff7..1329869 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,4 +30,5 @@ black==23.9.1 isort==5.12.0 # Evaluation +mteb==1.1.1 # ragas==0.1.0rc1 From 50f8b4810f086f4d2f759e6ed022f9dd9b9b5210 Mon Sep 17 00:00:00 2001 From: Dario Lopez Padial Date: Thu, 8 Feb 2024 22:06:48 +0100 Subject: [PATCH 2/9] evaluating several spanish embedding models --- evaluation/embeddings_model/README.md | 3 +++ evaluation/embeddings_model/mteb_benchmark.py | 6 ++++- .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../RoBERTalex/MTOPDomainClassification.json | 25 +++++++++++++++++++ .../RoBERTalex/MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../PlanTL-GOB-ES/RoBERTalex/STS17.json | 22 ++++++++++++++++ .../PlanTL-GOB-ES/RoBERTalex/STS22.json | 22 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../intfloat/multilingual-e5-base/STS17.json | 22 ++++++++++++++++ .../intfloat/multilingual-e5-base/STS22.json | 22 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../multilingual-e5-large-instruct/STS17.json | 22 ++++++++++++++++ .../multilingual-e5-large-instruct/STS22.json | 22 ++++++++++++++++ .../AmazonReviewsClassification.json | 25 +++++++++++++++++++ .../MTOPDomainClassification.json | 25 +++++++++++++++++++ .../MTOPIntentClassification.json | 25 +++++++++++++++++++ .../MassiveIntentClassification.json | 25 +++++++++++++++++++ .../MassiveScenarioClassification.json | 25 +++++++++++++++++++ .../intfloat/multilingual-e5-small/STS17.json | 22 ++++++++++++++++ .../intfloat/multilingual-e5-small/STS22.json | 22 ++++++++++++++++ 30 files changed, 684 insertions(+), 1 deletion(-) create mode 100644 evaluation/embeddings_model/README.md create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/STS17.json create mode 100644 evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/STS22.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-base/STS17.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-base/STS22.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/STS17.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/STS22.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-small/STS17.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-small/STS22.json diff --git a/evaluation/embeddings_model/README.md b/evaluation/embeddings_model/README.md new file mode 100644 index 0000000..4d95b7f --- /dev/null +++ b/evaluation/embeddings_model/README.md @@ -0,0 +1,3 @@ +```` +python mteb_benchmark.py +```` diff --git a/evaluation/embeddings_model/mteb_benchmark.py b/evaluation/embeddings_model/mteb_benchmark.py index 8d3d855..fdc3f36 100644 --- a/evaluation/embeddings_model/mteb_benchmark.py +++ b/evaluation/embeddings_model/mteb_benchmark.py @@ -10,10 +10,14 @@ # model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn" # model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es" # model_name = "PlanTL-GOB-ES/roberta-base-bne" +# model_name = "PlanTL-GOB-ES/RoBERTalex" # model_name = "hiiamsid/sentence_similarity_spanish_es" -model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" +# model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" +# model_name = "intfloat/multilingual-e5-small" +# model_name = "intfloat/multilingual-e5-base" # model_name = "intfloat/multilingual-e5-large" +model_name = "intfloat/multilingual-e5-large-instruct" model = SentenceTransformer(model_name) diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/AmazonReviewsClassification.json new file mode 100644 index 0000000..dcfd204 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.28464, + "accuracy_stderr": 0.013201757458762826, + "f1": 0.28283860986221493, + "f1_stderr": 0.013275949214868197, + "main_score": 0.28464 + }, + "evaluation_time": 83.99 + }, + "validation": { + "es": { + "accuracy": 0.28562000000000004, + "accuracy_stderr": 0.012776681885372275, + "f1": 0.28401552345829, + "f1_stderr": 0.01318780132891389, + "main_score": 0.28562000000000004 + }, + "evaluation_time": 85.49 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MTOPDomainClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MTOPDomainClassification.json new file mode 100644 index 0000000..bcf07c6 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6472981987991995, + "accuracy_stderr": 0.017254988407739143, + "f1": 0.6427407924709827, + "f1_stderr": 0.015872706671016652, + "main_score": 0.6472981987991995 + }, + "evaluation_time": 22.91 + }, + "validation": { + "es": { + "accuracy": 0.6368696791093647, + "accuracy_stderr": 0.015657631466246456, + "f1": 0.6379678994326683, + "f1_stderr": 0.016513288566508744, + "main_score": 0.6368696791093647 + }, + "evaluation_time": 16.43 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MTOPIntentClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MTOPIntentClassification.json new file mode 100644 index 0000000..d8274cb --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.5137424949966645, + "accuracy_stderr": 0.014183610624100198, + "f1": 0.3339230024195149, + "f1_stderr": 0.006785519855578168, + "main_score": 0.5137424949966645 + }, + "evaluation_time": 62.33 + }, + "validation": { + "es": { + "accuracy": 0.5255402750491159, + "accuracy_stderr": 0.014134615951135193, + "f1": 0.31369491577935615, + "f1_stderr": 0.007614119469626184, + "main_score": 0.5255402750491159 + }, + "evaluation_time": 53.82 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MassiveIntentClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MassiveIntentClassification.json new file mode 100644 index 0000000..fda62b1 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.3995965030262273, + "accuracy_stderr": 0.013365742091501185, + "f1": 0.38066220585503113, + "f1_stderr": 0.012757862196520506, + "main_score": 0.3995965030262273 + }, + "evaluation_time": 44.15 + }, + "validation": { + "es": { + "accuracy": 0.4039350713231677, + "accuracy_stderr": 0.014115954305100325, + "f1": 0.389852650174955, + "f1_stderr": 0.011795009031701133, + "main_score": 0.4039350713231677 + }, + "evaluation_time": 39.45 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MassiveScenarioClassification.json new file mode 100644 index 0000000..007d7c2 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.4341627437794216, + "accuracy_stderr": 0.016131330396949813, + "f1": 0.4153157741539995, + "f1_stderr": 0.015506374128159356, + "main_score": 0.4341627437794216 + }, + "evaluation_time": 24.83 + }, + "validation": { + "es": { + "accuracy": 0.42626660108214465, + "accuracy_stderr": 0.01728354373573784, + "f1": 0.417570735459108, + "f1_stderr": 0.01837268916532102, + "main_score": 0.42626660108214465 + }, + "evaluation_time": 21.49 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/STS17.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/STS17.json new file mode 100644 index 0000000..062cdfc --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.5898887137325016, + "spearman": 0.6056959324219369 + }, + "euclidean": { + "pearson": 0.6021018717990662, + "spearman": 0.5919850402560723 + }, + "manhattan": { + "pearson": 0.601682356181486, + "spearman": 0.5908126350359733 + } + }, + "evaluation_time": 3.04 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/STS22.json b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/STS22.json new file mode 100644 index 0000000..9178ef5 --- /dev/null +++ b/evaluation/embeddings_model/results/PlanTL-GOB-ES/RoBERTalex/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.1748871126981829, + "spearman": 0.5340230180445371 + }, + "euclidean": { + "pearson": 0.18634789050159703, + "spearman": 0.5355671756803463 + }, + "manhattan": { + "pearson": 0.18929028193878622, + "spearman": 0.534061934047722 + } + }, + "evaluation_time": 60.64 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json new file mode 100644 index 0000000..6728535 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.42444, + "accuracy_stderr": 0.012424105601611732, + "f1": 0.40080078518414153, + "f1_stderr": 0.013095015650956282, + "main_score": 0.42444 + }, + "evaluation_time": 88.7 + }, + "validation": { + "es": { + "accuracy": 0.42696000000000006, + "accuracy_stderr": 0.011949158966220176, + "f1": 0.4043513077526323, + "f1_stderr": 0.011728633483086935, + "main_score": 0.42696000000000006 + }, + "evaluation_time": 86.47 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MTOPDomainClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MTOPDomainClassification.json new file mode 100644 index 0000000..d85f342 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.8963975983989325, + "accuracy_stderr": 0.011143157056689695, + "f1": 0.891074854397049, + "f1_stderr": 0.01216334073496693, + "main_score": 0.8963975983989325 + }, + "evaluation_time": 24.34 + }, + "validation": { + "es": { + "accuracy": 0.895350360183366, + "accuracy_stderr": 0.014765448679355385, + "f1": 0.8944226038689115, + "f1_stderr": 0.015212141325453472, + "main_score": 0.895350360183366 + }, + "evaluation_time": 17.01 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MTOPIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MTOPIntentClassification.json new file mode 100644 index 0000000..6f61e8b --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6016010673782521, + "accuracy_stderr": 0.02006896872458128, + "f1": 0.38053232747655114, + "f1_stderr": 0.01000607538587285, + "main_score": 0.6016010673782521 + }, + "evaluation_time": 60.01 + }, + "validation": { + "es": { + "accuracy": 0.6285527177472168, + "accuracy_stderr": 0.021711579573402386, + "f1": 0.4086835785780292, + "f1_stderr": 0.01667467832582759, + "main_score": 0.6285527177472168 + }, + "evaluation_time": 49.43 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MassiveIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MassiveIntentClassification.json new file mode 100644 index 0000000..b70e588 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6061869535978479, + "accuracy_stderr": 0.014569103077457903, + "f1": 0.5905725920494994, + "f1_stderr": 0.013704811949043527, + "main_score": 0.6061869535978479 + }, + "evaluation_time": 46.82 + }, + "validation": { + "es": { + "accuracy": 0.6116084604033449, + "accuracy_stderr": 0.009530877595500524, + "f1": 0.5849156265348016, + "f1_stderr": 0.007343124300470493, + "main_score": 0.6116084604033449 + }, + "evaluation_time": 38.92 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json new file mode 100644 index 0000000..b0e3937 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6655346334902488, + "accuracy_stderr": 0.020357861908343676, + "f1": 0.6585247474609848, + "f1_stderr": 0.02222564054482553, + "main_score": 0.6655346334902488 + }, + "evaluation_time": 23.36 + }, + "validation": { + "es": { + "accuracy": 0.6621741269060502, + "accuracy_stderr": 0.02427316298763553, + "f1": 0.6529374865279445, + "f1_stderr": 0.026203010823487284, + "main_score": 0.6621741269060502 + }, + "evaluation_time": 20.42 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/STS17.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/STS17.json new file mode 100644 index 0000000..7ecc41d --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.881732190530417, + "spearman": 0.8726415906137802 + }, + "euclidean": { + "pearson": 0.875124185104797, + "spearman": 0.8726480201918054 + }, + "manhattan": { + "pearson": 0.8728974854518827, + "spearman": 0.8699213796500065 + } + }, + "evaluation_time": 3.05 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/STS22.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/STS22.json new file mode 100644 index 0000000..0ba5fa4 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-base/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.6297172527772763, + "spearman": 0.6778932790265533 + }, + "euclidean": { + "pearson": 0.6544511364565571, + "spearman": 0.6778932790265533 + }, + "manhattan": { + "pearson": 0.6556987601004287, + "spearman": 0.676832267241606 + } + }, + "evaluation_time": 61.27 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/AmazonReviewsClassification.json new file mode 100644 index 0000000..59fbd64 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.43838, + "accuracy_stderr": 0.022102027056358418, + "f1": 0.43282629896178965, + "f1_stderr": 0.018567250773313265, + "main_score": 0.43838 + }, + "evaluation_time": 253.87 + }, + "validation": { + "es": { + "accuracy": 0.43635999999999997, + "accuracy_stderr": 0.021848716209425214, + "f1": 0.43086772526608674, + "f1_stderr": 0.01810361413237288, + "main_score": 0.43635999999999997 + }, + "evaluation_time": 252.62 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MTOPDomainClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MTOPDomainClassification.json new file mode 100644 index 0000000..b4fe074 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.926851234156104, + "accuracy_stderr": 0.008141834412829419, + "f1": 0.9228302424341528, + "f1_stderr": 0.00854088854571826, + "main_score": 0.926851234156104 + }, + "evaluation_time": 63.89 + }, + "validation": { + "es": { + "accuracy": 0.9279633267845447, + "accuracy_stderr": 0.011993404797067606, + "f1": 0.9276334671220227, + "f1_stderr": 0.011711168862854823, + "main_score": 0.9279633267845447 + }, + "evaluation_time": 42.66 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MTOPIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MTOPIntentClassification.json new file mode 100644 index 0000000..174806d --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.7562374916611074, + "accuracy_stderr": 0.008647781413261522, + "f1": 0.5428329296539051, + "f1_stderr": 0.008997802468065093, + "main_score": 0.7562374916611074 + }, + "evaluation_time": 170.66 + }, + "validation": { + "es": { + "accuracy": 0.7672560576293386, + "accuracy_stderr": 0.011660521998195134, + "f1": 0.5170325525114795, + "f1_stderr": 0.013485250210225938, + "main_score": 0.7672560576293386 + }, + "evaluation_time": 149.6 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MassiveIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MassiveIntentClassification.json new file mode 100644 index 0000000..2fcf1a7 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.7064559515803632, + "accuracy_stderr": 0.013008910418026038, + "f1": 0.691088863036923, + "f1_stderr": 0.011246689620791766, + "main_score": 0.7064559515803632 + }, + "evaluation_time": 116.37 + }, + "validation": { + "es": { + "accuracy": 0.716822429906542, + "accuracy_stderr": 0.010480821649285566, + "f1": 0.6974702958245047, + "f1_stderr": 0.009952681627827285, + "main_score": 0.716822429906542 + }, + "evaluation_time": 104.63 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MassiveScenarioClassification.json new file mode 100644 index 0000000..dd0c3bc --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.7407868190988567, + "accuracy_stderr": 0.0162567570937746, + "f1": 0.7444976382087986, + "f1_stderr": 0.01654090629042849, + "main_score": 0.7407868190988567 + }, + "evaluation_time": 67.57 + }, + "validation": { + "es": { + "accuracy": 0.7435317265125431, + "accuracy_stderr": 0.018487507836732888, + "f1": 0.7454619728615854, + "f1_stderr": 0.018304112374084507, + "main_score": 0.7435317265125431 + }, + "evaluation_time": 53.32 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/STS17.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/STS17.json new file mode 100644 index 0000000..56f7a8f --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.8839330755301231, + "spearman": 0.8808707609862002 + }, + "euclidean": { + "pearson": 0.8906739013006639, + "spearman": 0.8855879309960102 + }, + "manhattan": { + "pearson": 0.8909220675650926, + "spearman": 0.8870329620432645 + } + }, + "evaluation_time": 8.05 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/STS22.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/STS22.json new file mode 100644 index 0000000..a4f517d --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large-instruct/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.617458109600269, + "spearman": 0.6794844295718688 + }, + "euclidean": { + "pearson": 0.5200672441820391, + "spearman": 0.6606894685619743 + }, + "manhattan": { + "pearson": 0.5153800338405959, + "spearman": 0.6604611123923422 + } + }, + "evaluation_time": 182.0 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json new file mode 100644 index 0000000..176d73c --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/AmazonReviewsClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "mteb_dataset_name": "AmazonReviewsClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.41286000000000006, + "accuracy_stderr": 0.008222432730037987, + "f1": 0.3828993292038569, + "f1_stderr": 0.0163931769693198, + "main_score": 0.41286000000000006 + }, + "evaluation_time": 46.18 + }, + "validation": { + "es": { + "accuracy": 0.41592, + "accuracy_stderr": 0.008035023335373707, + "f1": 0.38598275216578676, + "f1_stderr": 0.01500187757196051, + "main_score": 0.41592 + }, + "evaluation_time": 44.4 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MTOPDomainClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MTOPDomainClassification.json new file mode 100644 index 0000000..aa3c0aa --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MTOPDomainClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "mteb_dataset_name": "MTOPDomainClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.8738492328218813, + "accuracy_stderr": 0.01140038549835036, + "f1": 0.8697003888366615, + "f1_stderr": 0.010289455988625433, + "main_score": 0.8738492328218813 + }, + "evaluation_time": 13.87 + }, + "validation": { + "es": { + "accuracy": 0.8747871643745906, + "accuracy_stderr": 0.016353099798337992, + "f1": 0.8740334440002367, + "f1_stderr": 0.015468252351754997, + "main_score": 0.8747871643745906 + }, + "evaluation_time": 8.72 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MTOPIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MTOPIntentClassification.json new file mode 100644 index 0000000..976e064 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MTOPIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "mteb_dataset_name": "MTOPIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.5578719146097397, + "accuracy_stderr": 0.02852213349340353, + "f1": 0.33306268129587935, + "f1_stderr": 0.00834922730212183, + "main_score": 0.5578719146097397 + }, + "evaluation_time": 29.62 + }, + "validation": { + "es": { + "accuracy": 0.5832351015062214, + "accuracy_stderr": 0.02775357703168613, + "f1": 0.3752971400492529, + "f1_stderr": 0.014946187517278737, + "main_score": 0.5832351015062214 + }, + "evaluation_time": 25.88 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MassiveIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MassiveIntentClassification.json new file mode 100644 index 0000000..b7c5b8a --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MassiveIntentClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "31efe3c427b0bae9c22cbb560b8f15491cc6bed7", + "mteb_dataset_name": "MassiveIntentClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.5793207800941493, + "accuracy_stderr": 0.01729531649260024, + "f1": 0.5660469779271278, + "f1_stderr": 0.016678182865944514, + "main_score": 0.5793207800941493 + }, + "evaluation_time": 22.97 + }, + "validation": { + "es": { + "accuracy": 0.5867191342843089, + "accuracy_stderr": 0.014864012472729702, + "f1": 0.5583175484171372, + "f1_stderr": 0.012144069736693169, + "main_score": 0.5867191342843089 + }, + "evaluation_time": 20.91 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json new file mode 100644 index 0000000..77260f6 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/MassiveScenarioClassification.json @@ -0,0 +1,25 @@ +{ + "dataset_revision": "7d571f92784cd94a019292a1f45445077d0ef634", + "mteb_dataset_name": "MassiveScenarioClassification", + "mteb_version": "1.1.1", + "test": { + "es": { + "accuracy": 0.6315736381977135, + "accuracy_stderr": 0.023991228111496374, + "f1": 0.6187879288483872, + "f1_stderr": 0.029297826712910608, + "main_score": 0.6315736381977135 + }, + "evaluation_time": 13.73 + }, + "validation": { + "es": { + "accuracy": 0.6404328578455485, + "accuracy_stderr": 0.02103055418242199, + "f1": 0.6294436344078532, + "f1_stderr": 0.024175338470107617, + "main_score": 0.6404328578455485 + }, + "evaluation_time": 11.13 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/STS17.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/STS17.json new file mode 100644 index 0000000..8e3a7c6 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/STS17.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "af5e6fb845001ecf41f4c1e033ce921939a2a68d", + "mteb_dataset_name": "STS17", + "mteb_version": "1.1.1", + "test": { + "es-es": { + "cos_sim": { + "pearson": 0.8594136782189745, + "spearman": 0.8527229300623234 + }, + "euclidean": { + "pearson": 0.8622721965774063, + "spearman": 0.8527294323049455 + }, + "manhattan": { + "pearson": 0.8630467713825977, + "spearman": 0.8562747186954948 + } + }, + "evaluation_time": 1.28 + } +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/STS22.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/STS22.json new file mode 100644 index 0000000..ccbf2b7 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-small/STS22.json @@ -0,0 +1,22 @@ +{ + "dataset_revision": "6d1ba47164174a496b7fa5d3569dae26a6813b80", + "mteb_dataset_name": "STS22", + "mteb_version": "1.1.1", + "test": { + "es": { + "cos_sim": { + "pearson": 0.6120827059362152, + "spearman": 0.6703582596928995 + }, + "euclidean": { + "pearson": 0.6402875298590468, + "spearman": 0.6703582596928995 + }, + "manhattan": { + "pearson": 0.6423836472050369, + "spearman": 0.6680188939165381 + } + }, + "evaluation_time": 40.17 + } +} \ No newline at end of file From 7ffd34d4e053040cd70820ab4b4be8a4a99a15bd Mon Sep 17 00:00:00 2001 From: Bukosabino Date: Wed, 12 Jun 2024 19:22:59 +0200 Subject: [PATCH 3/9] Update using last version of mteb benchmark. --- evaluation/embeddings_model/mteb_benchmark.py | 17 ++++++++++++----- requirements.txt | 2 +- 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/evaluation/embeddings_model/mteb_benchmark.py b/evaluation/embeddings_model/mteb_benchmark.py index fdc3f36..973aca1 100644 --- a/evaluation/embeddings_model/mteb_benchmark.py +++ b/evaluation/embeddings_model/mteb_benchmark.py @@ -1,4 +1,4 @@ -from mteb import MTEB +import mteb from sentence_transformers import SentenceTransformer @@ -16,10 +16,17 @@ # model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" # model_name = "intfloat/multilingual-e5-small" # model_name = "intfloat/multilingual-e5-base" -# model_name = "intfloat/multilingual-e5-large" -model_name = "intfloat/multilingual-e5-large-instruct" +model_name = "intfloat/multilingual-e5-large" +# model_name = "intfloat/multilingual-e5-large-instruct" +try: + model = SentenceTransformer(model_name, device='cuda') + print("Loaded model embedding using GPU") +except: + model = SentenceTransformer(model_name, device='cpu') + print("Loaded model embedding using CPU") -model = SentenceTransformer(model_name) -evaluation = MTEB(task_langs=["es"]) +tasks = mteb.get_tasks(languages=["spa"]) # Spanish +print(tasks) +evaluation = mteb.MTEB(tasks=tasks) results = evaluation.run(model, output_folder=f"results/{model_name}") diff --git a/requirements.txt b/requirements.txt index 1329869..55e35a1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,5 +30,5 @@ black==23.9.1 isort==5.12.0 # Evaluation -mteb==1.1.1 +mteb==1.12.25 # ragas==0.1.0rc1 From 37f01a1da1558048bbfe1f08e68d35bf1d1b259e Mon Sep 17 00:00:00 2001 From: bukosabino Date: Tue, 18 Jun 2024 14:03:35 +0000 Subject: [PATCH 4/9] update embedding models benchmark results --- evaluation/embeddings_model/mteb_benchmark.py | 84 +++- .../AmazonReviewsClassification.json | 137 ++++++ .../BibleNLPBitextMining.json | 35 ++ .../CataloniaTweetClassification.json | 137 ++++++ .../MIRACLRetrieval.json | 158 ++++++ .../MLSUMClusteringS2S.v2.json | 57 +++ .../MTOPDomainClassification.json | 137 ++++++ .../MTOPIntentClassification.json | 137 ++++++ .../MassiveIntentClassification.json | 137 ++++++ .../MassiveScenarioClassification.json | 137 ++++++ .../MintakaRetrieval.json | 158 ++++++ .../MultiEURLEXMultilabelClassification.json | 73 +++ .../MultiHateClassification.json | 95 ++++ .../PawsX.json | 127 +++++ .../PublicHealthQA.json | 158 ++++++ .../SIB200Classification.json | 201 ++++++++ .../SIB200ClusteringS2S.json | 33 ++ .../STS17.json | 54 +++ .../STS22.json | 76 +++ .../STSBenchmarkMultilingualSTS.json | 55 +++ .../STSES.json | 32 ++ .../SpanishNewsClassification.json | 73 +++ .../SpanishNewsClusteringP2P.json | 23 + .../SpanishPassageRetrievalS2P.json | 158 ++++++ .../SpanishPassageRetrievalS2S.json | 158 ++++++ .../SpanishSentimentClassification.json | 181 +++++++ .../Tatoeba.json | 23 + .../TweetSentimentClassification.json | 73 +++ .../XMarket.json | 158 ++++++ .../XNLI.json | 127 +++++ .../XPQARetrieval.json | 455 ++++++++++++++++++ .../XQuADRetrieval.json | 158 ++++++ .../model_meta.json | 1 + .../AmazonReviewsClassification.json | 137 ++++++ .../BibleNLPBitextMining.json | 35 ++ .../CataloniaTweetClassification.json | 137 ++++++ .../MIRACLRetrieval.json | 158 ++++++ .../MLSUMClusteringS2S.v2.json | 57 +++ .../MTOPDomainClassification.json | 137 ++++++ .../MTOPIntentClassification.json | 137 ++++++ .../MassiveIntentClassification.json | 137 ++++++ .../MassiveScenarioClassification.json | 137 ++++++ .../MintakaRetrieval.json | 158 ++++++ .../MultiEURLEXMultilabelClassification.json | 73 +++ .../MultiHateClassification.json | 95 ++++ .../PawsX.json | 127 +++++ .../PublicHealthQA.json | 158 ++++++ .../SIB200Classification.json | 201 ++++++++ .../SIB200ClusteringS2S.json | 33 ++ .../STS17.json | 54 +++ .../STS22.json | 76 +++ .../STSBenchmarkMultilingualSTS.json | 55 +++ .../STSES.json | 32 ++ .../SpanishNewsClassification.json | 73 +++ .../SpanishNewsClusteringP2P.json | 23 + .../SpanishPassageRetrievalS2P.json | 158 ++++++ .../SpanishPassageRetrievalS2S.json | 158 ++++++ .../SpanishSentimentClassification.json | 181 +++++++ .../Tatoeba.json | 23 + .../TweetSentimentClassification.json | 73 +++ .../XMarket.json | 158 ++++++ .../XNLI.json | 127 +++++ .../XPQARetrieval.json | 455 ++++++++++++++++++ .../XQuADRetrieval.json | 158 ++++++ .../model_meta.json | 1 + requirements.txt | 4 +- 66 files changed, 7528 insertions(+), 4 deletions(-) create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/BibleNLPBitextMining.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/CataloniaTweetClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MIRACLRetrieval.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MLSUMClusteringS2S.v2.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MintakaRetrieval.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MultiEURLEXMultilabelClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MultiHateClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/PawsX.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/PublicHealthQA.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SIB200Classification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SIB200ClusteringS2S.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STS17.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STS22.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STSBenchmarkMultilingualSTS.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STSES.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishNewsClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishNewsClusteringP2P.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishPassageRetrievalS2P.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishPassageRetrievalS2S.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishSentimentClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/Tatoeba.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/TweetSentimentClassification.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XMarket.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XNLI.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XPQARetrieval.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XQuADRetrieval.json create mode 100644 evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/model_meta.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/BibleNLPBitextMining.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/CataloniaTweetClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MIRACLRetrieval.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MLSUMClusteringS2S.v2.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MintakaRetrieval.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MultiEURLEXMultilabelClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MultiHateClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/PawsX.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/PublicHealthQA.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SIB200Classification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SIB200ClusteringS2S.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STS17.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STS22.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STSBenchmarkMultilingualSTS.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STSES.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishNewsClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishNewsClusteringP2P.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishPassageRetrievalS2P.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishPassageRetrievalS2S.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishSentimentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/Tatoeba.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/TweetSentimentClassification.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XMarket.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XNLI.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XPQARetrieval.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XQuADRetrieval.json create mode 100644 evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/model_meta.json diff --git a/evaluation/embeddings_model/mteb_benchmark.py b/evaluation/embeddings_model/mteb_benchmark.py index 973aca1..0af1720 100644 --- a/evaluation/embeddings_model/mteb_benchmark.py +++ b/evaluation/embeddings_model/mteb_benchmark.py @@ -7,7 +7,7 @@ # TODO: write results on model cards huggingface # Define the sentence-transformers model name -# model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn" +model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn" # model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es" # model_name = "PlanTL-GOB-ES/roberta-base-bne" # model_name = "PlanTL-GOB-ES/RoBERTalex" @@ -16,7 +16,7 @@ # model_name = "sentence-transformers/paraphrase-multilingual-mpnet-base-v2" # model_name = "intfloat/multilingual-e5-small" # model_name = "intfloat/multilingual-e5-base" -model_name = "intfloat/multilingual-e5-large" +# model_name = "intfloat/multilingual-e5-large" # model_name = "intfloat/multilingual-e5-large-instruct" try: @@ -26,7 +26,87 @@ model = SentenceTransformer(model_name, device='cpu') print("Loaded model embedding using CPU") + +TASK_LIST_BITEXT_MINING = [ + "BibleNLPBitextMining", + # "FloresBitextMining", s2s, crosslingual 406 / 41412 pairs + # "NTREXBitextMining", s2s, crosslingual 62 / 1916 pairs + "Tatoeba", +] + +TASK_LIST_PAIR_CLASSIFICATION = [ + "PawsX", + "XNLI" +] + +TASK_LIST_MULTI_LABEL_CLASSIFICATION = [ + "MultiEURLEXMultilabelClassification" +] + +TASK_LIST_RETRIEVAL = [ + # "BelebeleRetrieval", + "MintakaRetrieval", + "MIRACLRetrieval", + # "MLQARetrieval", + # "MultiLongDocRetrieval", + "PublicHealthQA", + "XMarket", + "XPQARetrieval", + "XQuADRetrieval", + "SpanishPassageRetrievalS2P", + "SpanishPassageRetrievalS2S" +] + +TASK_LIST_CLASSIFICATION = [ + "AmazonReviewsClassification", + "CataloniaTweetClassification", + # "LanguageClassification", + "MassiveIntentClassification", + "MassiveScenarioClassification", + "MTOPDomainClassification", + "MTOPIntentClassification", + "MultiHateClassification", + # "MultilingualSentimentClassification", + "SIB200Classification", + "TweetSentimentClassification", + "SpanishNewsClassification", + "SpanishSentimentClassification" +] + +TASK_LIST_CLUSTERING = [ + # "MLSUMClusteringP2P.v2", + "SpanishNewsClusteringP2P", + "MLSUMClusteringS2S.v2", + "SIB200ClusteringS2S" +] + +TASK_LIST_RERANKING = [ + # "MIRACLReranking" +] + +TASK_LIST_STS = [ + "STS17", + "STS22", + "STSBenchmarkMultilingualSTS", + "STSES" +] + +TASK_LIST = ( + TASK_LIST_BITEXT_MINING + + TASK_LIST_PAIR_CLASSIFICATION + + TASK_LIST_MULTI_LABEL_CLASSIFICATION + + TASK_LIST_RETRIEVAL + + TASK_LIST_CLASSIFICATION + + TASK_LIST_CLUSTERING + + TASK_LIST_RERANKING + + TASK_LIST_STS +) + + tasks = mteb.get_tasks(languages=["spa"]) # Spanish print(tasks) +print(TASK_LIST) +tasks = mteb.get_tasks(tasks=TASK_LIST, languages=["spa"]) # Spanish filtered evaluation = mteb.MTEB(tasks=tasks) +# evaluation = mteb.MTEB(tasks=tasks, task_langs=["es"]) results = evaluation.run(model, output_folder=f"results/{model_name}") diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/AmazonReviewsClassification.json new file mode 100644 index 0000000..18666b3 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/AmazonReviewsClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "evaluation_time": 19.33099603652954, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.28194, + "f1": 0.280623492354538, + "f1_weighted": 0.280623492354538, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.28194, + "scores_per_experiment": [ + { + "accuracy": 0.2994, + "f1": 0.29534152012468623, + "f1_weighted": 0.29534152012468623 + }, + { + "accuracy": 0.268, + "f1": 0.26734995399683475, + "f1_weighted": 0.26734995399683475 + }, + { + "accuracy": 0.281, + "f1": 0.2802469537333817, + "f1_weighted": 0.2802469537333817 + }, + { + "accuracy": 0.3104, + "f1": 0.3085188382500704, + "f1_weighted": 0.3085188382500704 + }, + { + "accuracy": 0.2806, + "f1": 0.27539549039545086, + "f1_weighted": 0.2753954903954508 + }, + { + "accuracy": 0.2652, + "f1": 0.26744700495838514, + "f1_weighted": 0.2674470049583851 + }, + { + "accuracy": 0.2842, + "f1": 0.2834296539567313, + "f1_weighted": 0.2834296539567313 + }, + { + "accuracy": 0.3018, + "f1": 0.304377989211265, + "f1_weighted": 0.304377989211265 + }, + { + "accuracy": 0.2818, + "f1": 0.27968809705983827, + "f1_weighted": 0.2796880970598383 + }, + { + "accuracy": 0.247, + "f1": 0.24443942185873563, + "f1_weighted": 0.2444394218587356 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.28412, + "f1": 0.28250040774091184, + "f1_weighted": 0.2825004077409118, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.28412, + "scores_per_experiment": [ + { + "accuracy": 0.2844, + "f1": 0.2793668216015566, + "f1_weighted": 0.2793668216015566 + }, + { + "accuracy": 0.2798, + "f1": 0.27920750706321396, + "f1_weighted": 0.2792075070632139 + }, + { + "accuracy": 0.284, + "f1": 0.2817804187405256, + "f1_weighted": 0.2817804187405256 + }, + { + "accuracy": 0.3124, + "f1": 0.3102719681603968, + "f1_weighted": 0.31027196816039676 + }, + { + "accuracy": 0.2818, + "f1": 0.27633174533311694, + "f1_weighted": 0.27633174533311694 + }, + { + "accuracy": 0.2734, + "f1": 0.2757925698472424, + "f1_weighted": 0.2757925698472424 + }, + { + "accuracy": 0.2866, + "f1": 0.2863484779904451, + "f1_weighted": 0.28634847799044505 + }, + { + "accuracy": 0.297, + "f1": 0.2987542747538752, + "f1_weighted": 0.29875427475387517 + }, + { + "accuracy": 0.2872, + "f1": 0.28444249117467235, + "f1_weighted": 0.28444249117467235 + }, + { + "accuracy": 0.2546, + "f1": 0.25270780274407356, + "f1_weighted": 0.2527078027440735 + } + ] + } + ] + }, + "task_name": "AmazonReviewsClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/BibleNLPBitextMining.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/BibleNLPBitextMining.json new file mode 100644 index 0000000..f4f6124 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/BibleNLPBitextMining.json @@ -0,0 +1,35 @@ +{ + "dataset_revision": "264a18480c529d9e922483839b4b9758e690b762", + "evaluation_time": 1.3429553508758545, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "train": [ + { + "accuracy": 0.1875, + "f1": 0.1409097413003663, + "hf_subset": "eng_Latn-spa_Latn", + "languages": [ + "eng-Latn", + "spa-Latn" + ], + "main_score": 0.1409097413003663, + "precision": 0.1267657039141414, + "recall": 0.1875 + }, + { + "accuracy": 0.125, + "f1": 0.08871755733543418, + "hf_subset": "spa_Latn-eng_Latn", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.08871755733543418, + "precision": 0.07890790119263284, + "recall": 0.125 + } + ] + }, + "task_name": "BibleNLPBitextMining" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/CataloniaTweetClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/CataloniaTweetClassification.json new file mode 100644 index 0000000..19b8ba8 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/CataloniaTweetClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "cf24d44e517efa534f048e5fc5981f399ed25bee", + "evaluation_time": 6.340556859970093, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.48369999999999996, + "f1": 0.49314220549288346, + "f1_weighted": 0.47738918261400326, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.48369999999999996, + "scores_per_experiment": [ + { + "accuracy": 0.508, + "f1": 0.5239339940412745, + "f1_weighted": 0.5068278754089796 + }, + { + "accuracy": 0.5595, + "f1": 0.5662116415345994, + "f1_weighted": 0.5549628277570985 + }, + { + "accuracy": 0.404, + "f1": 0.4047880799953423, + "f1_weighted": 0.3959503836585128 + }, + { + "accuracy": 0.4405, + "f1": 0.45097274364264733, + "f1_weighted": 0.42405694706815533 + }, + { + "accuracy": 0.436, + "f1": 0.4416859694200371, + "f1_weighted": 0.4247120136944094 + }, + { + "accuracy": 0.502, + "f1": 0.5178034634220223, + "f1_weighted": 0.5004404007776233 + }, + { + "accuracy": 0.531, + "f1": 0.5443296988711129, + "f1_weighted": 0.5273419265492028 + }, + { + "accuracy": 0.4765, + "f1": 0.46929255516978013, + "f1_weighted": 0.4638450444273975 + }, + { + "accuracy": 0.4665, + "f1": 0.46983268345989443, + "f1_weighted": 0.46164094361273644 + }, + { + "accuracy": 0.513, + "f1": 0.5425712253721241, + "f1_weighted": 0.5141134631859173 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.47830000000000006, + "f1": 0.48867510346236365, + "f1_weighted": 0.47308684157353487, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.47830000000000006, + "scores_per_experiment": [ + { + "accuracy": 0.4855, + "f1": 0.5008230844818334, + "f1_weighted": 0.48572150377224377 + }, + { + "accuracy": 0.5415, + "f1": 0.5514039692671057, + "f1_weighted": 0.53638802152537 + }, + { + "accuracy": 0.409, + "f1": 0.41284315995999094, + "f1_weighted": 0.4018208895400104 + }, + { + "accuracy": 0.443, + "f1": 0.4523266259795151, + "f1_weighted": 0.43007488967741986 + }, + { + "accuracy": 0.449, + "f1": 0.4524637222154739, + "f1_weighted": 0.4376239848783541 + }, + { + "accuracy": 0.4885, + "f1": 0.5091919915413213, + "f1_weighted": 0.4889408574644429 + }, + { + "accuracy": 0.526, + "f1": 0.5445752924542642, + "f1_weighted": 0.5241103554190435 + }, + { + "accuracy": 0.483, + "f1": 0.47387804262954386, + "f1_weighted": 0.4727274184486702 + }, + { + "accuracy": 0.4515, + "f1": 0.4553576513763769, + "f1_weighted": 0.4461860175626584 + }, + { + "accuracy": 0.506, + "f1": 0.5338874947182114, + "f1_weighted": 0.5072744774471358 + } + ] + } + ] + }, + "task_name": "CataloniaTweetClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MIRACLRetrieval.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MIRACLRetrieval.json new file mode 100644 index 0000000..04bc9ad --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MIRACLRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "d28a029f35c4ff7f616df47b0edf54e6882395e6", + "evaluation_time": 10.257708072662354, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.70137, + "map_at_1": 0.16905, + "map_at_10": 0.58275, + "map_at_100": 0.61653, + "map_at_1000": 0.61698, + "map_at_20": 0.60867, + "map_at_3": 0.3394, + "map_at_5": 0.44758, + "mrr_at_1": 0.6064814814814815, + "mrr_at_10": 0.7379102978640011, + "mrr_at_100": 0.7394134599050899, + "mrr_at_1000": 0.7394261409878874, + "mrr_at_20": 0.7390824695154755, + "mrr_at_3": 0.7127057613168719, + "mrr_at_5": 0.7296810699588472, + "nauc_map_at_1000_diff1": -0.007317651601829682, + "nauc_map_at_1000_max": 0.2490644015335197, + "nauc_map_at_1000_std": -0.029799169495302803, + "nauc_map_at_100_diff1": -0.007719721275631736, + "nauc_map_at_100_max": 0.24962546711484293, + "nauc_map_at_100_std": -0.029289444010632548, + "nauc_map_at_10_diff1": 0.01050331289218261, + "nauc_map_at_10_max": 0.23001133459075535, + "nauc_map_at_10_std": -0.08129029291793735, + "nauc_map_at_1_diff1": 0.2714552418104389, + "nauc_map_at_1_max": -0.086681874170395, + "nauc_map_at_1_std": -0.18115053894399663, + "nauc_map_at_20_diff1": -0.008721745782685883, + "nauc_map_at_20_max": 0.2474345966057929, + "nauc_map_at_20_std": -0.042495808831597315, + "nauc_map_at_3_diff1": 0.17544236349202122, + "nauc_map_at_3_max": -0.0076818264438188255, + "nauc_map_at_3_std": -0.17597340418326862, + "nauc_map_at_5_diff1": 0.1360436258070462, + "nauc_map_at_5_max": 0.06420219706097102, + "nauc_map_at_5_std": -0.1664891364714567, + "nauc_mrr_at_1000_diff1": 0.04666152249115081, + "nauc_mrr_at_1000_max": 0.2591105083594061, + "nauc_mrr_at_1000_std": 0.06181903773550218, + "nauc_mrr_at_100_diff1": 0.046556555911408504, + "nauc_mrr_at_100_max": 0.25914375659965, + "nauc_mrr_at_100_std": 0.06187048830309337, + "nauc_mrr_at_10_diff1": 0.045504359355399676, + "nauc_mrr_at_10_max": 0.2618509793244554, + "nauc_mrr_at_10_std": 0.06342587814363282, + "nauc_mrr_at_1_diff1": 0.06943843886477287, + "nauc_mrr_at_1_max": 0.2475828538025056, + "nauc_mrr_at_1_std": 0.04992812953323326, + "nauc_mrr_at_20_diff1": 0.046761001236736995, + "nauc_mrr_at_20_max": 0.2596880071750121, + "nauc_mrr_at_20_std": 0.06241501985573096, + "nauc_mrr_at_3_diff1": 0.050031134955902765, + "nauc_mrr_at_3_max": 0.2586528437889882, + "nauc_mrr_at_3_std": 0.06358375016338534, + "nauc_mrr_at_5_diff1": 0.05055951110438985, + "nauc_mrr_at_5_max": 0.25714361301822386, + "nauc_mrr_at_5_std": 0.05408001928168444, + "nauc_ndcg_at_1000_diff1": -0.016948633271396018, + "nauc_ndcg_at_1000_max": 0.28107444445965807, + "nauc_ndcg_at_1000_std": 0.0279148519204702, + "nauc_ndcg_at_100_diff1": -0.02435957839442378, + "nauc_ndcg_at_100_max": 0.28829357087340346, + "nauc_ndcg_at_100_std": 0.03556696125960638, + "nauc_ndcg_at_10_diff1": -0.006459655096910712, + "nauc_ndcg_at_10_max": 0.2709658157112583, + "nauc_ndcg_at_10_std": -0.05256393450839078, + "nauc_ndcg_at_1_diff1": 0.07419680913418852, + "nauc_ndcg_at_1_max": 0.2383064141353194, + "nauc_ndcg_at_1_std": 0.041193727938266936, + "nauc_ndcg_at_20_diff1": -0.03357636423471913, + "nauc_ndcg_at_20_max": 0.29169865632138653, + "nauc_ndcg_at_20_std": 0.005726012551283487, + "nauc_ndcg_at_3_diff1": 0.023928014078519768, + "nauc_ndcg_at_3_max": 0.20647955612036906, + "nauc_ndcg_at_3_std": 0.006816226632200675, + "nauc_ndcg_at_5_diff1": 0.03653646357739245, + "nauc_ndcg_at_5_max": 0.19295693716915094, + "nauc_ndcg_at_5_std": -0.04933692877658081, + "nauc_precision_at_1000_diff1": -0.23145976848291797, + "nauc_precision_at_1000_max": 0.32335549722148454, + "nauc_precision_at_1000_std": 0.2747536645535746, + "nauc_precision_at_100_diff1": -0.2386339296134605, + "nauc_precision_at_100_max": 0.34038373938743177, + "nauc_precision_at_100_std": 0.2834666513613567, + "nauc_precision_at_10_diff1": -0.2316321176806828, + "nauc_precision_at_10_max": 0.3753559847509191, + "nauc_precision_at_10_std": 0.17926475886086315, + "nauc_precision_at_1_diff1": 0.07419680913418852, + "nauc_precision_at_1_max": 0.2383064141353194, + "nauc_precision_at_1_std": 0.041193727938266936, + "nauc_precision_at_20_diff1": -0.24862792404465534, + "nauc_precision_at_20_max": 0.3666776339747446, + "nauc_precision_at_20_std": 0.2440853827750398, + "nauc_precision_at_3_diff1": -0.11004575626475498, + "nauc_precision_at_3_max": 0.30334316921249443, + "nauc_precision_at_3_std": 0.10553272173166008, + "nauc_precision_at_5_diff1": -0.15389688420874342, + "nauc_precision_at_5_max": 0.3174612505984659, + "nauc_precision_at_5_std": 0.10832962023166366, + "nauc_recall_at_1000_diff1": 0.25741813956054227, + "nauc_recall_at_1000_max": 0.9424639289678092, + "nauc_recall_at_1000_std": 0.8456886210049609, + "nauc_recall_at_100_diff1": -0.3565918324499385, + "nauc_recall_at_100_max": 0.5223473536390963, + "nauc_recall_at_100_std": 0.39930922783920736, + "nauc_recall_at_10_diff1": -0.05909315665301678, + "nauc_recall_at_10_max": 0.2398917972181278, + "nauc_recall_at_10_std": -0.15447629776799762, + "nauc_recall_at_1_diff1": 0.2714552418104389, + "nauc_recall_at_1_max": -0.086681874170395, + "nauc_recall_at_1_std": -0.18115053894399663, + "nauc_recall_at_20_diff1": -0.21469244169421278, + "nauc_recall_at_20_max": 0.34201246285418413, + "nauc_recall_at_20_std": -0.011186791513805465, + "nauc_recall_at_3_diff1": 0.1729133133676278, + "nauc_recall_at_3_max": -0.06789516703990331, + "nauc_recall_at_3_std": -0.20074860854858992, + "nauc_recall_at_5_diff1": 0.14693017707271636, + "nauc_recall_at_5_max": -0.028382631058508752, + "nauc_recall_at_5_std": -0.22696374998581995, + "ndcg_at_1": 0.60494, + "ndcg_at_10": 0.70137, + "ndcg_at_100": 0.75644, + "ndcg_at_1000": 0.76071, + "ndcg_at_20": 0.73653, + "ndcg_at_3": 0.60924, + "ndcg_at_5": 0.63114, + "precision_at_1": 0.60494, + "precision_at_10": 0.37932, + "precision_at_100": 0.04517, + "precision_at_1000": 0.00461, + "precision_at_20": 0.21049, + "precision_at_3": 0.5427, + "precision_at_5": 0.4963, + "recall_at_1": 0.16905, + "recall_at_10": 0.82848, + "recall_at_100": 0.97727, + "recall_at_1000": 0.99862, + "recall_at_20": 0.91252, + "recall_at_3": 0.39983, + "recall_at_5": 0.57486 + } + ] + }, + "task_name": "MIRACLRetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MLSUMClusteringS2S.v2.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MLSUMClusteringS2S.v2.json new file mode 100644 index 0000000..8a4440e --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MLSUMClusteringS2S.v2.json @@ -0,0 +1,57 @@ +{ + "dataset_revision": "b5d54f8f3b61ae17845046286940f03c6bc79bc7", + "evaluation_time": 40.5535101890564, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.42841628413893035, + "v_measure": 0.42841628413893035, + "v_measures": { + "Level 0": [ + 0.42755519643965423, + 0.43301120845251906, + 0.42718169071996565, + 0.42832579380278374, + 0.4299637836337959, + 0.4138175856266352, + 0.4217497681091122, + 0.4238357969561577, + 0.440584099183312, + 0.4381379184653672 + ] + } + } + ], + "validation": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.4226473705786238, + "v_measure": 0.4226473705786238, + "v_measures": { + "Level 0": [ + 0.4149317661027187, + 0.4130850744525116, + 0.4343089533809689, + 0.4316431814373901, + 0.42492237447410663, + 0.4205523134743927, + 0.4177048056790333, + 0.42596600788192945, + 0.4234965139520203, + 0.41986271495116617 + ] + } + } + ] + }, + "task_name": "MLSUMClusteringS2S.v2" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MTOPDomainClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MTOPDomainClassification.json new file mode 100644 index 0000000..f759e0b --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MTOPDomainClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "evaluation_time": 5.220965147018433, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.7924616410940628, + "f1": 0.7830311218428887, + "f1_weighted": 0.7954731697974255, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7924616410940628, + "scores_per_experiment": [ + { + "accuracy": 0.7978652434956638, + "f1": 0.7887713382627285, + "f1_weighted": 0.7975839797918298 + }, + { + "accuracy": 0.7558372248165444, + "f1": 0.7566919457596577, + "f1_weighted": 0.7592058267493151 + }, + { + "accuracy": 0.7931954636424283, + "f1": 0.7846917969501537, + "f1_weighted": 0.7966705493984981 + }, + { + "accuracy": 0.7928619079386258, + "f1": 0.77535984846614, + "f1_weighted": 0.7958374210467063 + }, + { + "accuracy": 0.8155436957971981, + "f1": 0.805039703166214, + "f1_weighted": 0.8184596692432305 + }, + { + "accuracy": 0.795530353569046, + "f1": 0.7859972090977675, + "f1_weighted": 0.7989343597248248 + }, + { + "accuracy": 0.8012008005336891, + "f1": 0.793073327045109, + "f1_weighted": 0.8039792125682577 + }, + { + "accuracy": 0.7815210140093396, + "f1": 0.7801552646424692, + "f1_weighted": 0.7872077541941248 + }, + { + "accuracy": 0.8022014676450967, + "f1": 0.784966044213446, + "f1_weighted": 0.8047052897884276 + }, + { + "accuracy": 0.7888592394929953, + "f1": 0.775564740825201, + "f1_weighted": 0.7921476354690412 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.8017681728880157, + "f1": 0.8018841837911677, + "f1_weighted": 0.8035736090827028, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8017681728880157, + "scores_per_experiment": [ + { + "accuracy": 0.808120497707924, + "f1": 0.8061635778695545, + "f1_weighted": 0.8068938148893604 + }, + { + "accuracy": 0.7635887360838245, + "f1": 0.7721588968975397, + "f1_weighted": 0.766774122532532 + }, + { + "accuracy": 0.8218729535036018, + "f1": 0.8239738055469129, + "f1_weighted": 0.8236519935538197 + }, + { + "accuracy": 0.793713163064833, + "f1": 0.7866845543696932, + "f1_weighted": 0.7967519576183322 + }, + { + "accuracy": 0.8146692861820564, + "f1": 0.8154271343015658, + "f1_weighted": 0.8158954171673858 + }, + { + "accuracy": 0.8068107400130976, + "f1": 0.8070897612519414, + "f1_weighted": 0.8074135351000312 + }, + { + "accuracy": 0.8153241650294696, + "f1": 0.8168128079368927, + "f1_weighted": 0.8172228557142128 + }, + { + "accuracy": 0.8028814669286182, + "f1": 0.8102729251431591, + "f1_weighted": 0.8073583645339524 + }, + { + "accuracy": 0.7950229207596594, + "f1": 0.7875804862127228, + "f1_weighted": 0.7957797192236828 + }, + { + "accuracy": 0.7956777996070727, + "f1": 0.7926778883816936, + "f1_weighted": 0.797994310493719 + } + ] + } + ] + }, + "task_name": "MTOPDomainClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MTOPIntentClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MTOPIntentClassification.json new file mode 100644 index 0000000..f424be8 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MTOPIntentClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "evaluation_time": 30.784114122390747, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.5307538358905937, + "f1": 0.357687651215255, + "f1_weighted": 0.5810878782467117, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5307538358905937, + "scores_per_experiment": [ + { + "accuracy": 0.5, + "f1": 0.34983407398313976, + "f1_weighted": 0.5493829950344079 + }, + { + "accuracy": 0.5446964643095397, + "f1": 0.35955115654340963, + "f1_weighted": 0.5984362126288763 + }, + { + "accuracy": 0.5416944629753169, + "f1": 0.3670630634303175, + "f1_weighted": 0.5915072683913849 + }, + { + "accuracy": 0.5116744496330887, + "f1": 0.3579615573818917, + "f1_weighted": 0.5600647192546024 + }, + { + "accuracy": 0.5493662441627751, + "f1": 0.3724615031857277, + "f1_weighted": 0.601397345181211 + }, + { + "accuracy": 0.5493662441627751, + "f1": 0.36071248217024, + "f1_weighted": 0.5983892433893151 + }, + { + "accuracy": 0.5370246831220814, + "f1": 0.3631966160961398, + "f1_weighted": 0.5853315330062412 + }, + { + "accuracy": 0.505003335557038, + "f1": 0.3282055066433297, + "f1_weighted": 0.5541596005318513 + }, + { + "accuracy": 0.5350233488992662, + "f1": 0.35643496505846284, + "f1_weighted": 0.584814614951082 + }, + { + "accuracy": 0.533689126084056, + "f1": 0.3614555876598911, + "f1_weighted": 0.5873952500981453 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.5355599214145383, + "f1": 0.31035764025478496, + "f1_weighted": 0.5842391472689267, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5355599214145383, + "scores_per_experiment": [ + { + "accuracy": 0.5212835625409299, + "f1": 0.2971645185530584, + "f1_weighted": 0.5713918052161404 + }, + { + "accuracy": 0.5592665356908972, + "f1": 0.32945281662054515, + "f1_weighted": 0.607772114225599 + }, + { + "accuracy": 0.5468238375900458, + "f1": 0.3012293741690168, + "f1_weighted": 0.6018896138124327 + }, + { + "accuracy": 0.49181401440733463, + "f1": 0.29686040263842517, + "f1_weighted": 0.5423300999716718 + }, + { + "accuracy": 0.5605762933857237, + "f1": 0.32604099720484886, + "f1_weighted": 0.6097359738888037 + }, + { + "accuracy": 0.5415848068107401, + "f1": 0.3154138835961403, + "f1_weighted": 0.5912396018393782 + }, + { + "accuracy": 0.5428945645055665, + "f1": 0.30410409518182807, + "f1_weighted": 0.5874473179992138 + }, + { + "accuracy": 0.5206286836935167, + "f1": 0.29679399579972066, + "f1_weighted": 0.5746672172802358 + }, + { + "accuracy": 0.5212835625409299, + "f1": 0.31645651233178623, + "f1_weighted": 0.5631378468960507 + }, + { + "accuracy": 0.5494433529796987, + "f1": 0.3200598064524798, + "f1_weighted": 0.5927798815597409 + } + ] + } + ] + }, + "task_name": "MTOPIntentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MassiveIntentClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MassiveIntentClassification.json new file mode 100644 index 0000000..173b4a6 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MassiveIntentClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "4672e20407010da34463acc759c162ca9734bca6", + "evaluation_time": 17.413422346115112, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.5473100201748486, + "f1": 0.5388759211649171, + "f1_weighted": 0.5519049835098329, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5473100201748486, + "scores_per_experiment": [ + { + "accuracy": 0.5574983187626092, + "f1": 0.5492824703516187, + "f1_weighted": 0.5645344130206177 + }, + { + "accuracy": 0.5729657027572294, + "f1": 0.5537351000218513, + "f1_weighted": 0.574886714491984 + }, + { + "accuracy": 0.5521183591123067, + "f1": 0.5426610777227577, + "f1_weighted": 0.5497859402007603 + }, + { + "accuracy": 0.5295897780766644, + "f1": 0.5198597584160835, + "f1_weighted": 0.5317569678824489 + }, + { + "accuracy": 0.5622057834566241, + "f1": 0.538414255108816, + "f1_weighted": 0.565013513742218 + }, + { + "accuracy": 0.5450571620712845, + "f1": 0.5358992659739907, + "f1_weighted": 0.5496204295839354 + }, + { + "accuracy": 0.5295897780766644, + "f1": 0.5261625561873087, + "f1_weighted": 0.5380715657764722 + }, + { + "accuracy": 0.5268997982515131, + "f1": 0.5266104020681593, + "f1_weighted": 0.5316058382624554 + }, + { + "accuracy": 0.5416946872898454, + "f1": 0.5404662162401142, + "f1_weighted": 0.5476922357953111 + }, + { + "accuracy": 0.5554808338937458, + "f1": 0.5556681095584713, + "f1_weighted": 0.5660822163421263 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.5653221839645844, + "f1": 0.5533038433464216, + "f1_weighted": 0.5663488473056314, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5653221839645844, + "scores_per_experiment": [ + { + "accuracy": 0.5764879488440728, + "f1": 0.5584165520957501, + "f1_weighted": 0.5794168650923822 + }, + { + "accuracy": 0.6000983767830792, + "f1": 0.5712908023060943, + "f1_weighted": 0.5989635997433759 + }, + { + "accuracy": 0.5696015740285293, + "f1": 0.5593159334936791, + "f1_weighted": 0.5629311913108553 + }, + { + "accuracy": 0.5533694048204624, + "f1": 0.5424829038576705, + "f1_weighted": 0.5509814686497979 + }, + { + "accuracy": 0.5863256271519921, + "f1": 0.5693034425073245, + "f1_weighted": 0.5876012568688049 + }, + { + "accuracy": 0.5666502705361535, + "f1": 0.5557656016404374, + "f1_weighted": 0.5713941346089588 + }, + { + "accuracy": 0.5268076733890802, + "f1": 0.5377267916464388, + "f1_weighted": 0.5278118497653308 + }, + { + "accuracy": 0.5400885391047713, + "f1": 0.5345706905104906, + "f1_weighted": 0.539250437532577 + }, + { + "accuracy": 0.5636989670437776, + "f1": 0.5506111887793386, + "f1_weighted": 0.5664509396398614 + }, + { + "accuracy": 0.5700934579439252, + "f1": 0.5535545266269916, + "f1_weighted": 0.5786867298443702 + } + ] + } + ] + }, + "task_name": "MassiveIntentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MassiveScenarioClassification.json new file mode 100644 index 0000000..72bbcb5 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MassiveScenarioClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "fad2c6e8459f9e1c45d9315f4953d921437d70f8", + "evaluation_time": 7.296406269073486, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.6322797579018158, + "f1": 0.6344795048082853, + "f1_weighted": 0.6338103481917446, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6322797579018158, + "scores_per_experiment": [ + { + "accuracy": 0.6378614660390047, + "f1": 0.6441211856912198, + "f1_weighted": 0.6410955238984352 + }, + { + "accuracy": 0.632817753866846, + "f1": 0.6323402142412589, + "f1_weighted": 0.6308114687408368 + }, + { + "accuracy": 0.6012104909213181, + "f1": 0.6089764942173317, + "f1_weighted": 0.6015163208631067 + }, + { + "accuracy": 0.6163416274377942, + "f1": 0.6222382001506472, + "f1_weighted": 0.6164670375162038 + }, + { + "accuracy": 0.6624075319435104, + "f1": 0.658909985352963, + "f1_weighted": 0.6629982984107817 + }, + { + "accuracy": 0.620712844653665, + "f1": 0.6207595385090287, + "f1_weighted": 0.620861805081355 + }, + { + "accuracy": 0.6237390719569603, + "f1": 0.6282548397469074, + "f1_weighted": 0.6323356232139485 + }, + { + "accuracy": 0.6200403496973773, + "f1": 0.6251156284402956, + "f1_weighted": 0.6220243300959895 + }, + { + "accuracy": 0.6529926025554809, + "f1": 0.6469805447067548, + "f1_weighted": 0.6540026243049627 + }, + { + "accuracy": 0.6546738399462004, + "f1": 0.6570984170264453, + "f1_weighted": 0.6559904497918266 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.6385145105755042, + "f1": 0.643565913167307, + "f1_weighted": 0.639886171423558, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6385145105755042, + "scores_per_experiment": [ + { + "accuracy": 0.6522380718150517, + "f1": 0.6631096730907511, + "f1_weighted": 0.6555299589714599 + }, + { + "accuracy": 0.6335464830300049, + "f1": 0.6392684091366081, + "f1_weighted": 0.6324896555808442 + }, + { + "accuracy": 0.6064928676832267, + "f1": 0.6155976856542056, + "f1_weighted": 0.6076551216375206 + }, + { + "accuracy": 0.6246925725528775, + "f1": 0.634981691018799, + "f1_weighted": 0.6217053558393448 + }, + { + "accuracy": 0.6827348745696016, + "f1": 0.68610378464216, + "f1_weighted": 0.684861393573177 + }, + { + "accuracy": 0.6187899655681259, + "f1": 0.6216811416797512, + "f1_weighted": 0.6174504180586866 + }, + { + "accuracy": 0.6296114117068372, + "f1": 0.6329409613218124, + "f1_weighted": 0.6375050812589357 + }, + { + "accuracy": 0.6232169208066897, + "f1": 0.6301117689217044, + "f1_weighted": 0.6279577560113245 + }, + { + "accuracy": 0.6561731431382194, + "f1": 0.6509461906233898, + "f1_weighted": 0.655488803389411 + }, + { + "accuracy": 0.6576487948844073, + "f1": 0.66091782558389, + "f1_weighted": 0.6582181699148762 + } + ] + } + ] + }, + "task_name": "MassiveScenarioClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MintakaRetrieval.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MintakaRetrieval.json new file mode 100644 index 0000000..7d0deea --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MintakaRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "efa78cc2f74bbcd21eff2261f9e13aebe40b814e", + "evaluation_time": 5.37589168548584, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.16548, + "map_at_1": 0.07838, + "map_at_10": 0.13323, + "map_at_100": 0.14114, + "map_at_1000": 0.14253, + "map_at_20": 0.13731, + "map_at_3": 0.11496, + "map_at_5": 0.12503, + "mrr_at_1": 0.07838283828382839, + "mrr_at_10": 0.13323085284718922, + "mrr_at_100": 0.14114037716151875, + "mrr_at_1000": 0.14252930048507964, + "mrr_at_20": 0.13730922184476776, + "mrr_at_3": 0.11496149614961494, + "mrr_at_5": 0.12502750275027485, + "nauc_map_at_1000_diff1": 0.1666180871642933, + "nauc_map_at_1000_max": 0.1750662593898178, + "nauc_map_at_1000_std": 0.13889883579510792, + "nauc_map_at_100_diff1": 0.1662284349336637, + "nauc_map_at_100_max": 0.17529508815374154, + "nauc_map_at_100_std": 0.13910399333233373, + "nauc_map_at_10_diff1": 0.17001363721919535, + "nauc_map_at_10_max": 0.1797703674760386, + "nauc_map_at_10_std": 0.14022373698317853, + "nauc_map_at_1_diff1": 0.2816720992618195, + "nauc_map_at_1_max": 0.1798827660041875, + "nauc_map_at_1_std": 0.11059226026701093, + "nauc_map_at_20_diff1": 0.16866355796920554, + "nauc_map_at_20_max": 0.17782673427387416, + "nauc_map_at_20_std": 0.14044063214842467, + "nauc_map_at_3_diff1": 0.19223269902510867, + "nauc_map_at_3_max": 0.1737223392631852, + "nauc_map_at_3_std": 0.12906518563193872, + "nauc_map_at_5_diff1": 0.17914011218072062, + "nauc_map_at_5_max": 0.1803536954024063, + "nauc_map_at_5_std": 0.1365677627294623, + "nauc_mrr_at_1000_diff1": 0.1666180871642933, + "nauc_mrr_at_1000_max": 0.1750662593898178, + "nauc_mrr_at_1000_std": 0.13889883579510792, + "nauc_mrr_at_100_diff1": 0.1662284349336637, + "nauc_mrr_at_100_max": 0.17529508815374154, + "nauc_mrr_at_100_std": 0.13910399333233373, + "nauc_mrr_at_10_diff1": 0.17001363721919535, + "nauc_mrr_at_10_max": 0.1797703674760386, + "nauc_mrr_at_10_std": 0.14022373698317853, + "nauc_mrr_at_1_diff1": 0.2816720992618195, + "nauc_mrr_at_1_max": 0.1798827660041875, + "nauc_mrr_at_1_std": 0.11059226026701093, + "nauc_mrr_at_20_diff1": 0.16866355796920554, + "nauc_mrr_at_20_max": 0.17782673427387416, + "nauc_mrr_at_20_std": 0.14044063214842467, + "nauc_mrr_at_3_diff1": 0.19223269902510867, + "nauc_mrr_at_3_max": 0.1737223392631852, + "nauc_mrr_at_3_std": 0.12906518563193872, + "nauc_mrr_at_5_diff1": 0.17914011218072062, + "nauc_mrr_at_5_max": 0.1803536954024063, + "nauc_mrr_at_5_std": 0.1365677627294623, + "nauc_ndcg_at_1000_diff1": 0.1288208179044034, + "nauc_ndcg_at_1000_max": 0.16094091919385248, + "nauc_ndcg_at_1000_std": 0.14176634076444566, + "nauc_ndcg_at_100_diff1": 0.11591678293100564, + "nauc_ndcg_at_100_max": 0.15974291568493476, + "nauc_ndcg_at_100_std": 0.1432967333785893, + "nauc_ndcg_at_10_diff1": 0.13337055165704745, + "nauc_ndcg_at_10_max": 0.18285450472033085, + "nauc_ndcg_at_10_std": 0.15121808142977453, + "nauc_ndcg_at_1_diff1": 0.2816720992618195, + "nauc_ndcg_at_1_max": 0.1798827660041875, + "nauc_ndcg_at_1_std": 0.11059226026701093, + "nauc_ndcg_at_20_diff1": 0.13050844522353452, + "nauc_ndcg_at_20_max": 0.17719835319494498, + "nauc_ndcg_at_20_std": 0.15212794739793384, + "nauc_ndcg_at_3_diff1": 0.1709003542265605, + "nauc_ndcg_at_3_max": 0.173088647270922, + "nauc_ndcg_at_3_std": 0.13216158922258986, + "nauc_ndcg_at_5_diff1": 0.1511732347509773, + "nauc_ndcg_at_5_max": 0.18424322924075145, + "nauc_ndcg_at_5_std": 0.14470622170262354, + "nauc_precision_at_1000_diff1": -0.007594878436877857, + "nauc_precision_at_1000_max": 0.050120943214360156, + "nauc_precision_at_1000_std": 0.12949352973846803, + "nauc_precision_at_100_diff1": 0.006049101139922704, + "nauc_precision_at_100_max": 0.10771914623251887, + "nauc_precision_at_100_std": 0.1415370123735149, + "nauc_precision_at_10_diff1": 0.06188379038677309, + "nauc_precision_at_10_max": 0.18927920012064922, + "nauc_precision_at_10_std": 0.1729541508587404, + "nauc_precision_at_1_diff1": 0.2816720992618195, + "nauc_precision_at_1_max": 0.1798827660041875, + "nauc_precision_at_1_std": 0.11059226026701093, + "nauc_precision_at_20_diff1": 0.059239789747611515, + "nauc_precision_at_20_max": 0.17430027631771922, + "nauc_precision_at_20_std": 0.17466469009954885, + "nauc_precision_at_3_diff1": 0.12431006484606857, + "nauc_precision_at_3_max": 0.17189746067646125, + "nauc_precision_at_3_std": 0.13862203074968843, + "nauc_precision_at_5_diff1": 0.09469154579773151, + "nauc_precision_at_5_max": 0.1932157547161934, + "nauc_precision_at_5_std": 0.16181434192703428, + "nauc_recall_at_1000_diff1": -0.007594878436880864, + "nauc_recall_at_1000_max": 0.05012094321435784, + "nauc_recall_at_1000_std": 0.1294935297384662, + "nauc_recall_at_100_diff1": 0.006049101139922711, + "nauc_recall_at_100_max": 0.10771914623251896, + "nauc_recall_at_100_std": 0.14153701237351518, + "nauc_recall_at_10_diff1": 0.061883790386772934, + "nauc_recall_at_10_max": 0.18927920012064897, + "nauc_recall_at_10_std": 0.17295415085874052, + "nauc_recall_at_1_diff1": 0.2816720992618195, + "nauc_recall_at_1_max": 0.1798827660041875, + "nauc_recall_at_1_std": 0.11059226026701093, + "nauc_recall_at_20_diff1": 0.059239789747611564, + "nauc_recall_at_20_max": 0.17430027631771944, + "nauc_recall_at_20_std": 0.174664690099549, + "nauc_recall_at_3_diff1": 0.12431006484606853, + "nauc_recall_at_3_max": 0.17189746067646117, + "nauc_recall_at_3_std": 0.1386220307496883, + "nauc_recall_at_5_diff1": 0.09469154579773162, + "nauc_recall_at_5_max": 0.19321575471619337, + "nauc_recall_at_5_std": 0.16181434192703414, + "ndcg_at_1": 0.07838, + "ndcg_at_10": 0.16548, + "ndcg_at_100": 0.20996, + "ndcg_at_1000": 0.2575, + "ndcg_at_20": 0.18037, + "ndcg_at_3": 0.12736, + "ndcg_at_5": 0.14552, + "precision_at_1": 0.07838, + "precision_at_10": 0.02694, + "precision_at_100": 0.00492, + "precision_at_1000": 0.00089, + "precision_at_20": 0.01642, + "precision_at_3": 0.05446, + "precision_at_5": 0.0415, + "recall_at_1": 0.07838, + "recall_at_10": 0.26939, + "recall_at_100": 0.49175, + "recall_at_1000": 0.89068, + "recall_at_20": 0.32838, + "recall_at_3": 0.16337, + "recall_at_5": 0.20751 + } + ] + }, + "task_name": "MintakaRetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MultiEURLEXMultilabelClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MultiEURLEXMultilabelClassification.json new file mode 100644 index 0000000..9771a56 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MultiEURLEXMultilabelClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "2aea5a6dc8fdcfeca41d0fb963c0a338930bde5c", + "evaluation_time": 47.347028732299805, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.05144, + "f1": 0.30854148594046094, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "lrap": 0.4650435355820052, + "main_score": 0.05144, + "scores_per_experiment": [ + { + "accuracy": 0.0514, + "f1": 0.34212080815080365, + "lrap": 0.47483762169311533 + }, + { + "accuracy": 0.0382, + "f1": 0.277417442217232, + "lrap": 0.4388064920634813 + }, + { + "accuracy": 0.0388, + "f1": 0.281562344618226, + "lrap": 0.45648888888888195 + }, + { + "accuracy": 0.0354, + "f1": 0.3320053542737601, + "lrap": 0.4714609126984103 + }, + { + "accuracy": 0.0514, + "f1": 0.29519834612565227, + "lrap": 0.4699347142857095 + }, + { + "accuracy": 0.0314, + "f1": 0.2626970321439598, + "lrap": 0.4198859920634814 + }, + { + "accuracy": 0.0468, + "f1": 0.36741263289154896, + "lrap": 0.4922116190476201 + }, + { + "accuracy": 0.0572, + "f1": 0.31901196133552734, + "lrap": 0.47289268650793037 + }, + { + "accuracy": 0.1286, + "f1": 0.3184666205055383, + "lrap": 0.48906795238094963 + }, + { + "accuracy": 0.0352, + "f1": 0.2895223171423608, + "lrap": 0.46484847619047176 + } + ] + } + ] + }, + "task_name": "MultiEURLEXMultilabelClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MultiHateClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MultiHateClassification.json new file mode 100644 index 0000000..107538c --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/MultiHateClassification.json @@ -0,0 +1,95 @@ +{ + "dataset_revision": "8f95949846bb9e33c6aaf730ccfdb8fe6bcfb7a9", + "evaluation_time": 1.9376027584075928, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.5578, + "ap": 0.32008693899430757, + "ap_weighted": 0.32008693899430757, + "f1": 0.5193445200440681, + "f1_weighted": 0.5685733932661492, + "hf_subset": "spa", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5578, + "scores_per_experiment": [ + { + "accuracy": 0.53, + "ap": 0.33357146837559204, + "ap_weighted": 0.33357146837559204, + "f1": 0.5228988261280905, + "f1_weighted": 0.5466470141587385 + }, + { + "accuracy": 0.591, + "ap": 0.2989592272685056, + "ap_weighted": 0.2989592272685056, + "f1": 0.5068908685592894, + "f1_weighted": 0.5899817296435749 + }, + { + "accuracy": 0.566, + "ap": 0.3391732909379968, + "ap_weighted": 0.3391732909379968, + "f1": 0.549027187218142, + "f1_weighted": 0.5847225872954516 + }, + { + "accuracy": 0.527, + "ap": 0.31435896090265025, + "ap_weighted": 0.31435896090265025, + "f1": 0.5094780659954226, + "f1_weighted": 0.5473031933703835 + }, + { + "accuracy": 0.505, + "ap": 0.3266889927083788, + "ap_weighted": 0.3266889927083788, + "f1": 0.5013975976429704, + "f1_weighted": 0.5186891289567123 + }, + { + "accuracy": 0.559, + "ap": 0.3113112261197368, + "ap_weighted": 0.3113112261197368, + "f1": 0.5211929136551221, + "f1_weighted": 0.5760871884903896 + }, + { + "accuracy": 0.63, + "ap": 0.3321251062383138, + "ap_weighted": 0.3321251062383138, + "f1": 0.5652194349262754, + "f1_weighted": 0.633692156558606 + }, + { + "accuracy": 0.494, + "ap": 0.31802265500794913, + "ap_weighted": 0.31802265500794913, + "f1": 0.48968065708153813, + "f1_weighted": 0.5088360039373254 + }, + { + "accuracy": 0.563, + "ap": 0.2820445151033386, + "ap_weighted": 0.2820445151033386, + "f1": 0.45265462506841814, + "f1_weighted": 0.5529239189928844 + }, + { + "accuracy": 0.613, + "ap": 0.34461394728061395, + "ap_weighted": 0.34461394728061395, + "f1": 0.575005024165412, + "f1_weighted": 0.626851011257425 + } + ] + } + ] + }, + "task_name": "MultiHateClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/PawsX.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/PawsX.json new file mode 100644 index 0000000..ec6a279 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/PawsX.json @@ -0,0 +1,127 @@ +{ + "dataset_revision": "8a04d940a42cd40658986fdd8e3da561533a3646", + "evaluation_time": 3.692772150039673, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine": { + "accuracy": 0.6065, + "accuracy_threshold": 0.960740864276886, + "ap": 0.5995508319223644, + "f1": 0.6249555634553857, + "f1_threshold": 0.6940346956253052, + "precision": 0.46117523609653727, + "recall": 0.9691289966923925 + }, + "dot": { + "accuracy": 0.5585, + "accuracy_threshold": 135.313232421875, + "ap": 0.4940203240642238, + "f1": 0.6245250431778929, + "f1_threshold": 50.725486755371094, + "precision": 0.45472837022132795, + "recall": 0.9966923925027563 + }, + "euclidean": { + "accuracy": 0.609, + "accuracy_threshold": 2.660865306854248, + "ap": 0.6015051251246853, + "f1": 0.6240057845263919, + "f1_threshold": 7.785566329956055, + "precision": 0.464228079612695, + "recall": 0.9514884233737596 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6015684593563027, + "manhattan": { + "accuracy": 0.609, + "accuracy_threshold": 58.04786682128906, + "ap": 0.6015684593563027, + "f1": 0.624548736462094, + "f1_threshold": 172.89822387695312, + "precision": 0.4643048845947397, + "recall": 0.9536934950385888 + }, + "max": { + "accuracy": 0.609, + "ap": 0.6015684593563027, + "f1": 0.6249555634553857 + }, + "similarity": { + "accuracy": 0.6065, + "accuracy_threshold": 0.960740864276886, + "ap": 0.5995508319223645, + "f1": 0.6249555634553857, + "f1_threshold": 0.6940346360206604, + "precision": 0.46117523609653727, + "recall": 0.9691289966923925 + } + } + ], + "validation": [ + { + "cosine": { + "accuracy": 0.62, + "accuracy_threshold": 0.9686229825019836, + "ap": 0.5675064413010118, + "f1": 0.6046010064701653, + "f1_threshold": 0.5832871794700623, + "precision": 0.434625322997416, + "recall": 0.9929161747343566 + }, + "dot": { + "accuracy": 0.58, + "accuracy_threshold": 161.220703125, + "ap": 0.46692153543214066, + "f1": 0.6065934065934067, + "f1_threshold": 70.09696960449219, + "precision": 0.4397238449283059, + "recall": 0.9775678866587958 + }, + "euclidean": { + "accuracy": 0.624, + "accuracy_threshold": 2.9109959602355957, + "ap": 0.5704933965685662, + "f1": 0.6045135035146134, + "f1_threshold": 8.24219799041748, + "precision": 0.4401939655172414, + "recall": 0.9645808736717828 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5704933965685662, + "manhattan": { + "accuracy": 0.624, + "accuracy_threshold": 63.25823211669922, + "ap": 0.5703471366686128, + "f1": 0.604737231680237, + "f1_threshold": 181.4298095703125, + "precision": 0.4404312668463612, + "recall": 0.9645808736717828 + }, + "max": { + "accuracy": 0.624, + "ap": 0.5704933965685662, + "f1": 0.6065934065934067 + }, + "similarity": { + "accuracy": 0.62, + "accuracy_threshold": 0.9686229825019836, + "ap": 0.5677406947820186, + "f1": 0.6046010064701653, + "f1_threshold": 0.5832871794700623, + "precision": 0.434625322997416, + "recall": 0.9929161747343566 + } + } + ] + }, + "task_name": "PawsX" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/PublicHealthQA.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/PublicHealthQA.json new file mode 100644 index 0000000..a72e03b --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/PublicHealthQA.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "main", + "evaluation_time": 0.7530865669250488, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.62516, + "map_at_1": 0.45062, + "map_at_10": 0.5699, + "map_at_100": 0.57783, + "map_at_1000": 0.57809, + "map_at_20": 0.57517, + "map_at_3": 0.54527, + "map_at_5": 0.56348, + "mrr_at_1": 0.4506172839506173, + "mrr_at_10": 0.5698951597099745, + "mrr_at_100": 0.5778253563340834, + "mrr_at_1000": 0.5780904721143345, + "mrr_at_20": 0.5751654124339309, + "mrr_at_3": 0.5452674897119342, + "mrr_at_5": 0.563477366255144, + "nauc_map_at_1000_diff1": 0.47158147504561854, + "nauc_map_at_1000_max": 0.2846252256211228, + "nauc_map_at_1000_std": -0.21398561397890511, + "nauc_map_at_100_diff1": 0.47104672454519475, + "nauc_map_at_100_max": 0.2841477443626458, + "nauc_map_at_100_std": -0.21417979733131212, + "nauc_map_at_10_diff1": 0.47043147764363746, + "nauc_map_at_10_max": 0.28322066619786423, + "nauc_map_at_10_std": -0.21342137225556201, + "nauc_map_at_1_diff1": 0.5159930047869917, + "nauc_map_at_1_max": 0.2671616904412584, + "nauc_map_at_1_std": -0.20762998522300108, + "nauc_map_at_20_diff1": 0.47069509555394007, + "nauc_map_at_20_max": 0.28588555619594264, + "nauc_map_at_20_std": -0.21384775925275834, + "nauc_map_at_3_diff1": 0.46561689640154785, + "nauc_map_at_3_max": 0.2906911425092011, + "nauc_map_at_3_std": -0.21235416952194414, + "nauc_map_at_5_diff1": 0.46514977434868937, + "nauc_map_at_5_max": 0.2836836333775643, + "nauc_map_at_5_std": -0.21985593354752533, + "nauc_mrr_at_1000_diff1": 0.47158147504561854, + "nauc_mrr_at_1000_max": 0.2846252256211228, + "nauc_mrr_at_1000_std": -0.21398561397890511, + "nauc_mrr_at_100_diff1": 0.47104672454519475, + "nauc_mrr_at_100_max": 0.2841477443626458, + "nauc_mrr_at_100_std": -0.21417979733131212, + "nauc_mrr_at_10_diff1": 0.47043147764363746, + "nauc_mrr_at_10_max": 0.28322066619786423, + "nauc_mrr_at_10_std": -0.21342137225556201, + "nauc_mrr_at_1_diff1": 0.5159930047869917, + "nauc_mrr_at_1_max": 0.2671616904412584, + "nauc_mrr_at_1_std": -0.20762998522300108, + "nauc_mrr_at_20_diff1": 0.47069509555394007, + "nauc_mrr_at_20_max": 0.28588555619594264, + "nauc_mrr_at_20_std": -0.21384775925275834, + "nauc_mrr_at_3_diff1": 0.46561689640154785, + "nauc_mrr_at_3_max": 0.2906911425092011, + "nauc_mrr_at_3_std": -0.21235416952194414, + "nauc_mrr_at_5_diff1": 0.46514977434868937, + "nauc_mrr_at_5_max": 0.2836836333775643, + "nauc_mrr_at_5_std": -0.21985593354752533, + "nauc_ndcg_at_1000_diff1": 0.46520956699217336, + "nauc_ndcg_at_1000_max": 0.28578110100765497, + "nauc_ndcg_at_1000_std": -0.21542629280527723, + "nauc_ndcg_at_100_diff1": 0.4544320564375849, + "nauc_ndcg_at_100_max": 0.2765855030819246, + "nauc_ndcg_at_100_std": -0.2191233107644375, + "nauc_ndcg_at_10_diff1": 0.4600379545453733, + "nauc_ndcg_at_10_max": 0.28339731362943643, + "nauc_ndcg_at_10_std": -0.21435398420974108, + "nauc_ndcg_at_1_diff1": 0.5159930047869917, + "nauc_ndcg_at_1_max": 0.2671616904412584, + "nauc_ndcg_at_1_std": -0.20762998522300108, + "nauc_ndcg_at_20_diff1": 0.45768823580250156, + "nauc_ndcg_at_20_max": 0.29427860373536735, + "nauc_ndcg_at_20_std": -0.21519079253726803, + "nauc_ndcg_at_3_diff1": 0.4482292106322508, + "nauc_ndcg_at_3_max": 0.29608612195325246, + "nauc_ndcg_at_3_std": -0.2136759699400792, + "nauc_ndcg_at_5_diff1": 0.4467652516742521, + "nauc_ndcg_at_5_max": 0.2853455348552161, + "nauc_ndcg_at_5_std": -0.22896542035186454, + "nauc_precision_at_1000_diff1": 1.0, + "nauc_precision_at_1000_max": 1.0, + "nauc_precision_at_1000_std": 1.0, + "nauc_precision_at_100_diff1": -0.1217695285604106, + "nauc_precision_at_100_max": -0.20715474459372657, + "nauc_precision_at_100_std": -0.4136878064861392, + "nauc_precision_at_10_diff1": 0.4202599107548976, + "nauc_precision_at_10_max": 0.28040455468536307, + "nauc_precision_at_10_std": -0.216843775559926, + "nauc_precision_at_1_diff1": 0.5159930047869917, + "nauc_precision_at_1_max": 0.2671616904412584, + "nauc_precision_at_1_std": -0.20762998522300108, + "nauc_precision_at_20_diff1": 0.3689365293735076, + "nauc_precision_at_20_max": 0.36795708230651636, + "nauc_precision_at_20_std": -0.220404400545685, + "nauc_precision_at_3_diff1": 0.38993073497721403, + "nauc_precision_at_3_max": 0.31316408024217934, + "nauc_precision_at_3_std": -0.21799331652477838, + "nauc_precision_at_5_diff1": 0.37377813182159025, + "nauc_precision_at_5_max": 0.2906700534996669, + "nauc_precision_at_5_std": -0.2690050089055706, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": -0.12176952856041803, + "nauc_recall_at_100_max": -0.20715474459373226, + "nauc_recall_at_100_std": -0.4136878064861405, + "nauc_recall_at_10_diff1": 0.4202599107548977, + "nauc_recall_at_10_max": 0.28040455468536213, + "nauc_recall_at_10_std": -0.21684377555992648, + "nauc_recall_at_1_diff1": 0.5159930047869917, + "nauc_recall_at_1_max": 0.2671616904412584, + "nauc_recall_at_1_std": -0.20762998522300108, + "nauc_recall_at_20_diff1": 0.3689365293735081, + "nauc_recall_at_20_max": 0.3679570823065182, + "nauc_recall_at_20_std": -0.22040440054568552, + "nauc_recall_at_3_diff1": 0.389930734977214, + "nauc_recall_at_3_max": 0.31316408024217973, + "nauc_recall_at_3_std": -0.21799331652477866, + "nauc_recall_at_5_diff1": 0.3737781318215906, + "nauc_recall_at_5_max": 0.29067005349966724, + "nauc_recall_at_5_std": -0.26900500890557033, + "ndcg_at_1": 0.45062, + "ndcg_at_10": 0.62516, + "ndcg_at_100": 0.66255, + "ndcg_at_1000": 0.66704, + "ndcg_at_20": 0.64405, + "ndcg_at_3": 0.57642, + "ndcg_at_5": 0.60936, + "precision_at_1": 0.45062, + "precision_at_10": 0.07963, + "precision_at_100": 0.00969, + "precision_at_1000": 0.001, + "precision_at_20": 0.04352, + "precision_at_3": 0.22222, + "precision_at_5": 0.14938, + "recall_at_1": 0.45062, + "recall_at_10": 0.7963, + "recall_at_100": 0.96914, + "recall_at_1000": 1.0, + "recall_at_20": 0.87037, + "recall_at_3": 0.66667, + "recall_at_5": 0.74691 + } + ] + }, + "task_name": "PublicHealthQA" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SIB200Classification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SIB200Classification.json new file mode 100644 index 0000000..6dba627 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SIB200Classification.json @@ -0,0 +1,201 @@ +{ + "dataset_revision": "a74d7350ea12af010cfb1c21e34f1f81fd2e615b", + "evaluation_time": 3.262220621109009, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.6549019607843137, + "f1": 0.6473752909208815, + "f1_weighted": 0.6554681173330721, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6549019607843137, + "scores_per_experiment": [ + { + "accuracy": 0.6666666666666666, + "f1": 0.6549941884755778, + "f1_weighted": 0.6617558291899887 + }, + { + "accuracy": 0.6323529411764706, + "f1": 0.6123855390109261, + "f1_weighted": 0.6347141000649773 + }, + { + "accuracy": 0.6127450980392157, + "f1": 0.6246561862134277, + "f1_weighted": 0.6064500045642957 + }, + { + "accuracy": 0.6617647058823529, + "f1": 0.6493336781993205, + "f1_weighted": 0.6627044240215596 + }, + { + "accuracy": 0.7205882352941176, + "f1": 0.7160408454711865, + "f1_weighted": 0.7162979579704671 + }, + { + "accuracy": 0.6764705882352942, + "f1": 0.6581556714741864, + "f1_weighted": 0.6819555187826845 + }, + { + "accuracy": 0.6372549019607843, + "f1": 0.6447547289281906, + "f1_weighted": 0.6454451716971885 + }, + { + "accuracy": 0.6911764705882353, + "f1": 0.687053019138875, + "f1_weighted": 0.6946380980551722 + }, + { + "accuracy": 0.6176470588235294, + "f1": 0.603054630025092, + "f1_weighted": 0.6242638312627237 + }, + { + "accuracy": 0.6323529411764706, + "f1": 0.6233244222720316, + "f1_weighted": 0.6264562377216636 + } + ] + } + ], + "train": [ + { + "accuracy": 0.6766048502139801, + "f1": 0.6730148976315004, + "f1_weighted": 0.6768781685716406, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6766048502139801, + "scores_per_experiment": [ + { + "accuracy": 0.7218259629101283, + "f1": 0.7096241869171067, + "f1_weighted": 0.7243468603850782 + }, + { + "accuracy": 0.637660485021398, + "f1": 0.6322725805214832, + "f1_weighted": 0.6340740075369343 + }, + { + "accuracy": 0.637660485021398, + "f1": 0.6340193152552757, + "f1_weighted": 0.6351654481786452 + }, + { + "accuracy": 0.7104136947218259, + "f1": 0.6997441819805666, + "f1_weighted": 0.7114017490043526 + }, + { + "accuracy": 0.7089871611982882, + "f1": 0.7087450299940781, + "f1_weighted": 0.7077087016570865 + }, + { + "accuracy": 0.6590584878744651, + "f1": 0.6586609555636654, + "f1_weighted": 0.6589452635100681 + }, + { + "accuracy": 0.6490727532097005, + "f1": 0.6510391573951563, + "f1_weighted": 0.6512125029833539 + }, + { + "accuracy": 0.6761768901569187, + "f1": 0.6791424066154649, + "f1_weighted": 0.6809032115523276 + }, + { + "accuracy": 0.6747503566333809, + "f1": 0.6705490818265639, + "f1_weighted": 0.678368178255033 + }, + { + "accuracy": 0.6904422253922967, + "f1": 0.686352080245643, + "f1_weighted": 0.686655762653527 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.6343434343434343, + "f1": 0.6337263258873838, + "f1_weighted": 0.6370742410309489, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6343434343434343, + "scores_per_experiment": [ + { + "accuracy": 0.6464646464646465, + "f1": 0.6357399463281815, + "f1_weighted": 0.6461240957497641 + }, + { + "accuracy": 0.5858585858585859, + "f1": 0.582592166089703, + "f1_weighted": 0.594939124371378 + }, + { + "accuracy": 0.6262626262626263, + "f1": 0.6323107609958087, + "f1_weighted": 0.6196099084398837 + }, + { + "accuracy": 0.6464646464646465, + "f1": 0.6389465437732292, + "f1_weighted": 0.6498543892629494 + }, + { + "accuracy": 0.7070707070707071, + "f1": 0.7174958691320408, + "f1_weighted": 0.698349092068304 + }, + { + "accuracy": 0.6363636363636364, + "f1": 0.6291789859385678, + "f1_weighted": 0.6449218007089405 + }, + { + "accuracy": 0.6060606060606061, + "f1": 0.6147293870759344, + "f1_weighted": 0.6272045557870559 + }, + { + "accuracy": 0.7474747474747475, + "f1": 0.7424752429655993, + "f1_weighted": 0.7521718740774285 + }, + { + "accuracy": 0.48484848484848486, + "f1": 0.48783165440089077, + "f1_weighted": 0.4885935226088679 + }, + { + "accuracy": 0.6565656565656566, + "f1": 0.6559627021738823, + "f1_weighted": 0.6489740472349168 + } + ] + } + ] + }, + "task_name": "SIB200Classification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SIB200ClusteringS2S.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SIB200ClusteringS2S.json new file mode 100644 index 0000000..5f1edd6 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SIB200ClusteringS2S.json @@ -0,0 +1,33 @@ +{ + "dataset_revision": "a74d7350ea12af010cfb1c21e34f1f81fd2e615b", + "evaluation_time": 3.3481369018554688, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.3347573603718645, + "v_measure": 0.3347573603718645, + "v_measures": { + "Level 0": [ + 0.33232210901839193, + 0.3807657067739453, + 0.3942063018233329, + 0.33511763993202287, + 0.26342836283940013, + 0.3137164192723059, + 0.3088622803490379, + 0.39538861954826193, + 0.2971551648766469, + 0.32661099928529896 + ] + } + } + ] + }, + "task_name": "SIB200ClusteringS2S" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STS17.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STS17.json new file mode 100644 index 0000000..29366e9 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STS17.json @@ -0,0 +1,54 @@ +{ + "dataset_revision": "faeb762787bd10488a50c8b5be4a3b82e411949c", + "evaluation_time": 0.25136542320251465, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine_pearson": 0.22959516885296977, + "cosine_spearman": 0.23167578806693545, + "euclidean_pearson": 0.10837657120741692, + "euclidean_spearman": 0.08825319601913272, + "hf_subset": "es-en", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.23167578806693545, + "manhattan_pearson": 0.1142681417620011, + "manhattan_spearman": 0.09487233949321199, + "pearson": [ + 0.2295951618757698, + 0.0002511814478693034 + ], + "spearman": [ + 0.23167578806693545, + 0.00021975060438111495 + ] + }, + { + "cosine_pearson": 0.7935023680206318, + "cosine_spearman": 0.8138426859678183, + "euclidean_pearson": 0.7980227367337552, + "euclidean_spearman": 0.7969480808062653, + "hf_subset": "es-es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8138426859678183, + "manhattan_pearson": 0.7984973545712435, + "manhattan_spearman": 0.7985076992010667, + "pearson": [ + 0.793502390329609, + 2.0521961377378995e-55 + ], + "spearman": [ + 0.8138307290156093, + 2.1254480551563648e-60 + ] + } + ] + }, + "task_name": "STS17" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STS22.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STS22.json new file mode 100644 index 0000000..1c2f73f --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STS22.json @@ -0,0 +1,76 @@ +{ + "dataset_revision": "de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3", + "evaluation_time": 7.090377330780029, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine_pearson": 0.4978428022843198, + "cosine_spearman": 0.49970798735740846, + "euclidean_pearson": 0.47855189277615345, + "euclidean_spearman": 0.46710726362543586, + "hf_subset": "es-it", + "languages": [ + "spa-Latn", + "ita-Latn" + ], + "main_score": 0.49970798735740846, + "manhattan_pearson": 0.47263176205371293, + "manhattan_spearman": 0.4603589488973792, + "pearson": [ + 0.4978427820565887, + 4.702290589537785e-15 + ], + "spearman": [ + 0.49970798735740846, + 3.58566353821529e-15 + ] + }, + { + "cosine_pearson": 0.6113034394291524, + "cosine_spearman": 0.5948046751699049, + "euclidean_pearson": 0.6224912903737604, + "euclidean_spearman": 0.5999849091787655, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5948046751699049, + "manhattan_pearson": 0.6249518586407526, + "manhattan_spearman": 0.5997154342133152, + "pearson": [ + 0.6113034350762023, + 2.147357704769111e-22 + ], + "spearman": [ + 0.5948046751699049, + 5.275280471575169e-21 + ] + }, + { + "cosine_pearson": 0.5729968513539743, + "cosine_spearman": 0.5961517692922045, + "euclidean_pearson": 0.5524678872903522, + "euclidean_spearman": 0.5738666494630229, + "hf_subset": "es-en", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.5961517692922045, + "manhattan_pearson": 0.5510976716945734, + "manhattan_spearman": 0.5712828986498969, + "pearson": [ + 0.5729968496232489, + 5.09849190169218e-34 + ], + "spearman": [ + 0.5961517692922045, + 2.3298019568771317e-37 + ] + } + ] + }, + "task_name": "STS22" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STSBenchmarkMultilingualSTS.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STSBenchmarkMultilingualSTS.json new file mode 100644 index 0000000..62d99aa --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STSBenchmarkMultilingualSTS.json @@ -0,0 +1,55 @@ +{ + "dataset_revision": "29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c", + "evaluation_time": 1.376321792602539, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "dev": [ + { + "cosine_pearson": 0.7769719941808816, + "cosine_spearman": 0.7724973718736371, + "euclidean_pearson": 0.7684806932586751, + "euclidean_spearman": 0.7663576836758872, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7724973718736371, + "manhattan_pearson": 0.768090051301334, + "manhattan_spearman": 0.7662266035142532, + "pearson": [ + 0.7769719931083154, + 2.2670762650358534e-303 + ], + "spearman": [ + 0.772496969738128, + 9.999126889041934e-298 + ] + } + ], + "test": [ + { + "cosine_pearson": 0.7006059243671393, + "cosine_spearman": 0.6948053245000386, + "euclidean_pearson": 0.7125291832724886, + "euclidean_spearman": 0.7005614540032093, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6948053245000386, + "manhattan_pearson": 0.712317433441208, + "manhattan_spearman": 0.700282023368011, + "pearson": [ + 0.7006059236409815, + 4.4738224965820465e-204 + ], + "spearman": [ + 0.6948027666163249, + 2.3576449306562938e-199 + ] + } + ] + }, + "task_name": "STSBenchmarkMultilingualSTS" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STSES.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STSES.json new file mode 100644 index 0000000..707572f --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/STSES.json @@ -0,0 +1,32 @@ +{ + "dataset_revision": "0912bb6c9393c76d62a7c5ee81c4c817ff47c9f4", + "evaluation_time": 0.17313575744628906, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine_pearson": 0.7001671775851761, + "cosine_spearman": 0.6040795444089487, + "euclidean_pearson": 0.6787237943760047, + "euclidean_spearman": 0.5883632992222803, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6040795444089487, + "manhattan_pearson": 0.6785953303160526, + "manhattan_spearman": 0.5896540053487216, + "pearson": [ + 0.7001671694881852, + 3.7556323066349675e-24 + ], + "spearman": [ + 0.6040795444089487, + 8.706249242673398e-17 + ] + } + ] + }, + "task_name": "STSES" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishNewsClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishNewsClassification.json new file mode 100644 index 0000000..3f80152 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishNewsClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "0086c197b914690a9dace258a19398890a05299a", + "evaluation_time": 16.999836444854736, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "train": [ + { + "accuracy": 0.81318359375, + "f1": 0.8134390749666117, + "f1_weighted": 0.8133446474464803, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.81318359375, + "scores_per_experiment": [ + { + "accuracy": 0.78466796875, + "f1": 0.7855770552922136, + "f1_weighted": 0.785439811955572 + }, + { + "accuracy": 0.80517578125, + "f1": 0.805069831062872, + "f1_weighted": 0.804944201400515 + }, + { + "accuracy": 0.82275390625, + "f1": 0.8223607927817761, + "f1_weighted": 0.8223114917829379 + }, + { + "accuracy": 0.81640625, + "f1": 0.8168763771949509, + "f1_weighted": 0.8167803150251276 + }, + { + "accuracy": 0.83203125, + "f1": 0.8312633389456011, + "f1_weighted": 0.8311878732960509 + }, + { + "accuracy": 0.8330078125, + "f1": 0.8337427653368202, + "f1_weighted": 0.8336408451725623 + }, + { + "accuracy": 0.7978515625, + "f1": 0.7964574464560225, + "f1_weighted": 0.7963524718504411 + }, + { + "accuracy": 0.80517578125, + "f1": 0.8053529783047013, + "f1_weighted": 0.8052739917156719 + }, + { + "accuracy": 0.8056640625, + "f1": 0.8063842834033986, + "f1_weighted": 0.8063134171149237 + }, + { + "accuracy": 0.8291015625, + "f1": 0.8313058808877599, + "f1_weighted": 0.8312020551510007 + } + ] + } + ] + }, + "task_name": "SpanishNewsClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishNewsClusteringP2P.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishNewsClusteringP2P.json new file mode 100644 index 0000000..faf598a --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishNewsClusteringP2P.json @@ -0,0 +1,23 @@ +{ + "dataset_revision": "bf8ca8ddc5b7da4f7004720ddf99bbe0483480e6", + "evaluation_time": 5.457086563110352, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.379918321557151, + "v_measure": 0.379918321557151, + "v_measure_std": 0.0, + "v_measures": [ + 0.379918321557151 + ] + } + ] + }, + "task_name": "SpanishNewsClusteringP2P" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishPassageRetrievalS2P.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishPassageRetrievalS2P.json new file mode 100644 index 0000000..1a3f2ea --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishPassageRetrievalS2P.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "9cddf2ce5209ade52c2115ccfa00eb22c6d3a837", + "evaluation_time": 35.20808529853821, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.40022, + "map_at_1": 0.12136, + "map_at_10": 0.25995, + "map_at_100": 0.3106, + "map_at_1000": 0.31641, + "map_at_20": 0.2854, + "map_at_3": 0.18748, + "map_at_5": 0.21553, + "mrr_at_1": 0.47904191616766467, + "mrr_at_10": 0.622077274023382, + "mrr_at_100": 0.6248658217916299, + "mrr_at_1000": 0.6249538018486397, + "mrr_at_20": 0.6246897034400976, + "mrr_at_3": 0.5778443113772455, + "mrr_at_5": 0.6062874251497007, + "nauc_map_at_1000_diff1": 0.17481769624369123, + "nauc_map_at_1000_max": 0.3038725948186323, + "nauc_map_at_1000_std": -0.10255554615150143, + "nauc_map_at_100_diff1": 0.17267789022765032, + "nauc_map_at_100_max": 0.30700987451871603, + "nauc_map_at_100_std": -0.09714179825987225, + "nauc_map_at_10_diff1": 0.20069185450488808, + "nauc_map_at_10_max": 0.2662622780653073, + "nauc_map_at_10_std": -0.1192241774184681, + "nauc_map_at_1_diff1": 0.21289250974315124, + "nauc_map_at_1_max": 0.12545110111780092, + "nauc_map_at_1_std": -0.1028752945199612, + "nauc_map_at_20_diff1": 0.18606474885693716, + "nauc_map_at_20_max": 0.30038825477851966, + "nauc_map_at_20_std": -0.09869602619346368, + "nauc_map_at_3_diff1": 0.288987762214622, + "nauc_map_at_3_max": 0.2788428478588407, + "nauc_map_at_3_std": -0.18753651558639997, + "nauc_map_at_5_diff1": 0.25365664689228573, + "nauc_map_at_5_max": 0.21627409685900337, + "nauc_map_at_5_std": -0.17479013360925522, + "nauc_mrr_at_1000_diff1": 0.2767563064886639, + "nauc_mrr_at_1000_max": 0.3016028919572398, + "nauc_mrr_at_1000_std": 0.008084154687729094, + "nauc_mrr_at_100_diff1": 0.27644928469934205, + "nauc_mrr_at_100_max": 0.3017657479961963, + "nauc_mrr_at_100_std": 0.008361985169690419, + "nauc_mrr_at_10_diff1": 0.2762944123995444, + "nauc_mrr_at_10_max": 0.30435276967992586, + "nauc_mrr_at_10_std": 0.01057816582217488, + "nauc_mrr_at_1_diff1": 0.24300894056136213, + "nauc_mrr_at_1_max": 0.23854462968592624, + "nauc_mrr_at_1_std": 0.0010662939945230519, + "nauc_mrr_at_20_diff1": 0.27615981047608074, + "nauc_mrr_at_20_max": 0.30180205524691694, + "nauc_mrr_at_20_std": 0.008434891705626278, + "nauc_mrr_at_3_diff1": 0.29970498033313375, + "nauc_mrr_at_3_max": 0.3082515222217264, + "nauc_mrr_at_3_std": -0.01427786597442777, + "nauc_mrr_at_5_diff1": 0.2835610483976153, + "nauc_mrr_at_5_max": 0.3156942547629527, + "nauc_mrr_at_5_std": 0.023324496606770606, + "nauc_ndcg_at_1000_diff1": 0.1230370771663956, + "nauc_ndcg_at_1000_max": 0.3465409739859415, + "nauc_ndcg_at_1000_std": -0.015264740677426664, + "nauc_ndcg_at_100_diff1": 0.10504091124494593, + "nauc_ndcg_at_100_max": 0.3549998620853102, + "nauc_ndcg_at_100_std": 0.000900074880563761, + "nauc_ndcg_at_10_diff1": 0.18184339234723795, + "nauc_ndcg_at_10_max": 0.303154240769196, + "nauc_ndcg_at_10_std": -0.05671756678632068, + "nauc_ndcg_at_1_diff1": 0.21205148612363173, + "nauc_ndcg_at_1_max": 0.21842581119008506, + "nauc_ndcg_at_1_std": -0.0011402042839973113, + "nauc_ndcg_at_20_diff1": 0.13949476386628923, + "nauc_ndcg_at_20_max": 0.3621434264946678, + "nauc_ndcg_at_20_std": -0.0019422089588749924, + "nauc_ndcg_at_3_diff1": 0.26403147714353276, + "nauc_ndcg_at_3_max": 0.342162987622103, + "nauc_ndcg_at_3_std": -0.13658159674390835, + "nauc_ndcg_at_5_diff1": 0.2593409224049024, + "nauc_ndcg_at_5_max": 0.26918192892855786, + "nauc_ndcg_at_5_std": -0.13317302867254738, + "nauc_precision_at_1000_diff1": -0.0903314599449656, + "nauc_precision_at_1000_max": -0.11162239348110206, + "nauc_precision_at_1000_std": -0.0452609161716946, + "nauc_precision_at_100_diff1": -0.17051373584965346, + "nauc_precision_at_100_max": -0.0007441012823367824, + "nauc_precision_at_100_std": -0.008528749137334576, + "nauc_precision_at_10_diff1": -0.061994342754566154, + "nauc_precision_at_10_max": 0.3935671457024858, + "nauc_precision_at_10_std": 0.11604480905525386, + "nauc_precision_at_1_diff1": 0.21205148612363173, + "nauc_precision_at_1_max": 0.21842581119008506, + "nauc_precision_at_1_std": -0.0011402042839973113, + "nauc_precision_at_20_diff1": -0.15542018080679257, + "nauc_precision_at_20_max": 0.3759474689244075, + "nauc_precision_at_20_std": 0.17339314166252864, + "nauc_precision_at_3_diff1": 0.21121253716337912, + "nauc_precision_at_3_max": 0.4035339702892788, + "nauc_precision_at_3_std": -0.12403982837384787, + "nauc_precision_at_5_diff1": 0.16181069342910576, + "nauc_precision_at_5_max": 0.27344424540782064, + "nauc_precision_at_5_std": -0.05295984523428073, + "nauc_recall_at_1000_diff1": -0.8608952413060788, + "nauc_recall_at_1000_max": 0.555520517679306, + "nauc_recall_at_1000_std": 0.9437397187521263, + "nauc_recall_at_100_diff1": -0.18152050536218237, + "nauc_recall_at_100_max": 0.336589092154205, + "nauc_recall_at_100_std": 0.24167673741113904, + "nauc_recall_at_10_diff1": 0.0487798975940694, + "nauc_recall_at_10_max": 0.23350189467684582, + "nauc_recall_at_10_std": -0.01309123061688193, + "nauc_recall_at_1_diff1": 0.21289250974315124, + "nauc_recall_at_1_max": 0.12545110111780092, + "nauc_recall_at_1_std": -0.1028752945199612, + "nauc_recall_at_20_diff1": 0.007304562844697699, + "nauc_recall_at_20_max": 0.3398333384897412, + "nauc_recall_at_20_std": 0.10588618878858275, + "nauc_recall_at_3_diff1": 0.26275810712557457, + "nauc_recall_at_3_max": 0.3045447078390836, + "nauc_recall_at_3_std": -0.18955942785599803, + "nauc_recall_at_5_diff1": 0.1707017771444248, + "nauc_recall_at_5_max": 0.178586250643481, + "nauc_recall_at_5_std": -0.1145911964807851, + "ndcg_at_1": 0.48503, + "ndcg_at_10": 0.40022, + "ndcg_at_100": 0.53557, + "ndcg_at_1000": 0.57595, + "ndcg_at_20": 0.45258, + "ndcg_at_3": 0.37121, + "ndcg_at_5": 0.35305, + "precision_at_1": 0.48503, + "precision_at_10": 0.19341, + "precision_at_100": 0.04455, + "precision_at_1000": 0.00571, + "precision_at_20": 0.13653, + "precision_at_3": 0.31737, + "precision_at_5": 0.2491, + "recall_at_1": 0.12136, + "recall_at_10": 0.44246, + "recall_at_100": 0.81212, + "recall_at_1000": 0.97224, + "recall_at_20": 0.5674, + "recall_at_3": 0.22785, + "recall_at_5": 0.2958 + } + ] + }, + "task_name": "SpanishPassageRetrievalS2P" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishPassageRetrievalS2S.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishPassageRetrievalS2S.json new file mode 100644 index 0000000..36c287d --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishPassageRetrievalS2S.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "9cddf2ce5209ade52c2115ccfa00eb22c6d3a837", + "evaluation_time": 0.7334530353546143, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.551, + "map_at_1": 0.12515, + "map_at_10": 0.35802, + "map_at_100": 0.48558, + "map_at_1000": 0.48563, + "map_at_20": 0.43003, + "map_at_3": 0.22535, + "map_at_5": 0.27007, + "mrr_at_1": 0.6047904191616766, + "mrr_at_10": 0.7216424294268606, + "mrr_at_100": 0.7234745177330595, + "mrr_at_1000": 0.7234745177330595, + "mrr_at_20": 0.723202334826146, + "mrr_at_3": 0.6906187624750498, + "mrr_at_5": 0.7118762475049899, + "nauc_map_at_1000_diff1": 0.05381975978203124, + "nauc_map_at_1000_max": -0.19068423396832654, + "nauc_map_at_1000_std": -0.318257756392083, + "nauc_map_at_100_diff1": 0.053660730261866574, + "nauc_map_at_100_max": -0.19118672303129786, + "nauc_map_at_100_std": -0.3184305389838363, + "nauc_map_at_10_diff1": 0.11344249401458821, + "nauc_map_at_10_max": -0.19522550756005838, + "nauc_map_at_10_std": -0.32914847643995837, + "nauc_map_at_1_diff1": 0.27443974339637023, + "nauc_map_at_1_max": -0.17730212949483137, + "nauc_map_at_1_std": -0.2564473616774583, + "nauc_map_at_20_diff1": 0.07930117769894833, + "nauc_map_at_20_max": -0.1802930168633578, + "nauc_map_at_20_std": -0.34583445173277255, + "nauc_map_at_3_diff1": 0.1881039453144911, + "nauc_map_at_3_max": -0.21030530881003967, + "nauc_map_at_3_std": -0.3099423754014957, + "nauc_map_at_5_diff1": 0.13170493687990498, + "nauc_map_at_5_max": -0.22908434427600463, + "nauc_map_at_5_std": -0.2981294832568133, + "nauc_mrr_at_1000_diff1": 0.11994067882449629, + "nauc_mrr_at_1000_max": -0.2102138980193067, + "nauc_mrr_at_1000_std": -0.24007590496891254, + "nauc_mrr_at_100_diff1": 0.11994067882449629, + "nauc_mrr_at_100_max": -0.2102138980193067, + "nauc_mrr_at_100_std": -0.24007590496891254, + "nauc_mrr_at_10_diff1": 0.11692622787343909, + "nauc_mrr_at_10_max": -0.20493498023299586, + "nauc_mrr_at_10_std": -0.23649905102789953, + "nauc_mrr_at_1_diff1": 0.1392121458261825, + "nauc_mrr_at_1_max": -0.18821664201771995, + "nauc_mrr_at_1_std": -0.23329026091174443, + "nauc_mrr_at_20_diff1": 0.11994229681720686, + "nauc_mrr_at_20_max": -0.20909892915426817, + "nauc_mrr_at_20_std": -0.23912599304119025, + "nauc_mrr_at_3_diff1": 0.14360335792728132, + "nauc_mrr_at_3_max": -0.24284187245732874, + "nauc_mrr_at_3_std": -0.28189423457107926, + "nauc_mrr_at_5_diff1": 0.11293581117999407, + "nauc_mrr_at_5_max": -0.2026229103940088, + "nauc_mrr_at_5_std": -0.22453018134673355, + "nauc_ndcg_at_1000_diff1": 0.030303135163982697, + "nauc_ndcg_at_1000_max": -0.17876886662009528, + "nauc_ndcg_at_1000_std": -0.25509604138907327, + "nauc_ndcg_at_100_diff1": 0.029390635648117548, + "nauc_ndcg_at_100_max": -0.18163088201640037, + "nauc_ndcg_at_100_std": -0.25606555052731217, + "nauc_ndcg_at_10_diff1": 0.09151368894356164, + "nauc_ndcg_at_10_max": -0.20017709824965096, + "nauc_ndcg_at_10_std": -0.3149725548245281, + "nauc_ndcg_at_1_diff1": 0.1392121458261825, + "nauc_ndcg_at_1_max": -0.18821664201771995, + "nauc_ndcg_at_1_std": -0.23329026091174443, + "nauc_ndcg_at_20_diff1": 0.10151580946084861, + "nauc_ndcg_at_20_max": -0.135153697771884, + "nauc_ndcg_at_20_std": -0.33030260707323034, + "nauc_ndcg_at_3_diff1": 0.05727923248440769, + "nauc_ndcg_at_3_max": -0.21088672483400434, + "nauc_ndcg_at_3_std": -0.25654855527181897, + "nauc_ndcg_at_5_diff1": 0.03021623749759961, + "nauc_ndcg_at_5_max": -0.22122249648860148, + "nauc_ndcg_at_5_std": -0.23768798267938876, + "nauc_precision_at_1000_diff1": -0.23066915579705854, + "nauc_precision_at_1000_max": 0.007942446381149305, + "nauc_precision_at_1000_std": 0.15358317028033047, + "nauc_precision_at_100_diff1": -0.2314080939510637, + "nauc_precision_at_100_max": 0.005548991016028222, + "nauc_precision_at_100_std": 0.1527800869226724, + "nauc_precision_at_10_diff1": -0.18645235016756934, + "nauc_precision_at_10_max": -0.041786369257374556, + "nauc_precision_at_10_std": -0.05408901372784425, + "nauc_precision_at_1_diff1": 0.1392121458261825, + "nauc_precision_at_1_max": -0.18821664201771995, + "nauc_precision_at_1_std": -0.23329026091174443, + "nauc_precision_at_20_diff1": -0.1824512495901429, + "nauc_precision_at_20_max": 0.015557822145490806, + "nauc_precision_at_20_std": -0.006436038570984637, + "nauc_precision_at_3_diff1": -0.04841996689899454, + "nauc_precision_at_3_max": -0.16381568438347885, + "nauc_precision_at_3_std": -0.2043982036955689, + "nauc_precision_at_5_diff1": -0.16389239283775092, + "nauc_precision_at_5_max": -0.1291472834819096, + "nauc_precision_at_5_std": -0.0877660608324423, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": -0.5648868145417875, + "nauc_recall_at_100_max": -1.7381069047170103, + "nauc_recall_at_100_std": -0.5648868145417875, + "nauc_recall_at_10_diff1": 0.1905455492088472, + "nauc_recall_at_10_max": -0.09788611816291132, + "nauc_recall_at_10_std": -0.3173481431102901, + "nauc_recall_at_1_diff1": 0.27443974339637023, + "nauc_recall_at_1_max": -0.17730212949483137, + "nauc_recall_at_1_std": -0.2564473616774583, + "nauc_recall_at_20_diff1": 0.17802835114290502, + "nauc_recall_at_20_max": 0.025903791507722793, + "nauc_recall_at_20_std": -0.3122636344963216, + "nauc_recall_at_3_diff1": 0.21951984901917115, + "nauc_recall_at_3_max": -0.21908808161280197, + "nauc_recall_at_3_std": -0.33443767759447246, + "nauc_recall_at_5_diff1": 0.13958728725002917, + "nauc_recall_at_5_max": -0.2296194351994532, + "nauc_recall_at_5_std": -0.2643356526944423, + "ndcg_at_1": 0.60479, + "ndcg_at_10": 0.551, + "ndcg_at_100": 0.71621, + "ndcg_at_1000": 0.71643, + "ndcg_at_20": 0.6151, + "ndcg_at_3": 0.52548, + "ndcg_at_5": 0.50862, + "precision_at_1": 0.60479, + "precision_at_10": 0.3497, + "precision_at_100": 0.07713, + "precision_at_1000": 0.00772, + "precision_at_20": 0.26078, + "precision_at_3": 0.47505, + "precision_at_5": 0.40359, + "recall_at_1": 0.12515, + "recall_at_10": 0.55869, + "recall_at_100": 0.99925, + "recall_at_1000": 1.0, + "recall_at_20": 0.74321, + "recall_at_3": 0.26642, + "recall_at_5": 0.34408 + } + ] + }, + "task_name": "SpanishPassageRetrievalS2S" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishSentimentClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishSentimentClassification.json new file mode 100644 index 0000000..230164a --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/SpanishSentimentClassification.json @@ -0,0 +1,181 @@ +{ + "dataset_revision": "2a6e340e4b59b7c0a78c03a0b79ac27e1b4a2662", + "evaluation_time": 2.603635787963867, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.6378378378378378, + "ap": 0.8673434606289703, + "ap_weighted": 0.8673434606289703, + "f1": 0.5625422611160673, + "f1_weighted": 0.6764645416856906, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6378378378378378, + "scores_per_experiment": [ + { + "accuracy": 0.6925675675675675, + "ap": 0.8833830104321907, + "ap_weighted": 0.8833830104321907, + "f1": 0.611822858872188, + "f1_weighted": 0.7266597779056166 + }, + { + "accuracy": 0.5236486486486487, + "ap": 0.8423788772149428, + "ap_weighted": 0.8423788772149428, + "f1": 0.4739270183399509, + "f1_weighted": 0.5788341943758847 + }, + { + "accuracy": 0.7027027027027027, + "ap": 0.8937099073414112, + "ap_weighted": 0.8937099073414112, + "f1": 0.6289035787554137, + "f1_weighted": 0.7362477590423795 + }, + { + "accuracy": 0.4797297297297297, + "ap": 0.8364921978843598, + "ap_weighted": 0.8364921978843598, + "f1": 0.443010752688172, + "f1_weighted": 0.5357744841615809 + }, + { + "accuracy": 0.7195945945945946, + "ap": 0.8943937070707305, + "ap_weighted": 0.8943937070707305, + "f1": 0.6402664909583424, + "f1_weighted": 0.7498420010170505 + }, + { + "accuracy": 0.6824324324324325, + "ap": 0.8951728721372588, + "ap_weighted": 0.8951728721372588, + "f1": 0.6174448660837046, + "f1_weighted": 0.7197203803374402 + }, + { + "accuracy": 0.46959459459459457, + "ap": 0.8277804929193993, + "ap_weighted": 0.8277804929193993, + "f1": 0.428915514592934, + "f1_weighted": 0.5277811267488687 + }, + { + "accuracy": 0.706081081081081, + "ap": 0.8861302938849931, + "ap_weighted": 0.8861302938849931, + "f1": 0.6229299363057326, + "f1_weighted": 0.7377861938371493 + }, + { + "accuracy": 0.6756756756756757, + "ap": 0.8472636413210183, + "ap_weighted": 0.8472636413210183, + "f1": 0.5509481668773704, + "f1_weighted": 0.704458946936823 + }, + { + "accuracy": 0.7263513513513513, + "ap": 0.8667296060833996, + "ap_weighted": 0.8667296060833996, + "f1": 0.6072534276868642, + "f1_weighted": 0.7475405524941128 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.6666666666666667, + "ap": 0.8781254155069786, + "ap_weighted": 0.8781254155069786, + "f1": 0.5945033703651762, + "f1_weighted": 0.7007789379342009, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6666666666666667, + "scores_per_experiment": [ + { + "accuracy": 0.7619047619047619, + "ap": 0.9052695789059426, + "ap_weighted": 0.9052695789059426, + "f1": 0.6810884522407488, + "f1_weighted": 0.7848391200526575 + }, + { + "accuracy": 0.5170068027210885, + "ap": 0.8498085936102465, + "ap_weighted": 0.8498085936102465, + "f1": 0.47838472687290723, + "f1_weighted": 0.5701121570123376 + }, + { + "accuracy": 0.7006802721088435, + "ap": 0.8925144119949315, + "ap_weighted": 0.8925144119949315, + "f1": 0.6279337321675104, + "f1_weighted": 0.7342555982356127 + }, + { + "accuracy": 0.5986394557823129, + "ap": 0.8826132381327186, + "ap_weighted": 0.8826132381327186, + "f1": 0.5550710511465654, + "f1_weighted": 0.6450492781116962 + }, + { + "accuracy": 0.782312925170068, + "ap": 0.9153536875023309, + "ap_weighted": 0.9153536875023309, + "f1": 0.7057057057057057, + "f1_weighted": 0.8027415170272313 + }, + { + "accuracy": 0.6122448979591837, + "ap": 0.8688705234159779, + "ap_weighted": 0.8688705234159779, + "f1": 0.5517573423206548, + "f1_weighted": 0.6581706346476963 + }, + { + "accuracy": 0.4421768707482993, + "ap": 0.8173947264856356, + "ap_weighted": 0.8173947264856356, + "f1": 0.40444664031620553, + "f1_weighted": 0.5013215562905003 + }, + { + "accuracy": 0.7414965986394558, + "ap": 0.8898572603085912, + "ap_weighted": 0.8898572603085912, + "f1": 0.6505255255255256, + "f1_weighted": 0.7657555514698371 + }, + { + "accuracy": 0.7142857142857143, + "ap": 0.8585073946161457, + "ap_weighted": 0.8585073946161457, + "f1": 0.5897129186602871, + "f1_weighted": 0.7358168147641831 + }, + { + "accuracy": 0.7959183673469388, + "ap": 0.9010647400972673, + "ap_weighted": 0.9010647400972673, + "f1": 0.7004076086956521, + "f1_weighted": 0.8097271517302573 + } + ] + } + ] + }, + "task_name": "SpanishSentimentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/Tatoeba.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/Tatoeba.json new file mode 100644 index 0000000..848ed16 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/Tatoeba.json @@ -0,0 +1,23 @@ +{ + "dataset_revision": "69e8f12da6e31d59addadda9a9c8a2e601a0e282", + "evaluation_time": 0.6245768070220947, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.198, + "f1": 0.15162606837606837, + "hf_subset": "spa-eng", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.15162606837606837, + "precision": 0.1370073685086843, + "recall": 0.198 + } + ] + }, + "task_name": "Tatoeba" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/TweetSentimentClassification.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/TweetSentimentClassification.json new file mode 100644 index 0000000..767bbca --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/TweetSentimentClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "d522bb117c32f5e0207344f69f7075fc9941168b", + "evaluation_time": 0.829547643661499, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.408203125, + "f1": 0.4058319047307838, + "f1_weighted": 0.4057056459457991, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.408203125, + "scores_per_experiment": [ + { + "accuracy": 0.4609375, + "f1": 0.45288778578934014, + "f1_weighted": 0.45280548264882786 + }, + { + "accuracy": 0.39453125, + "f1": 0.39248541653011726, + "f1_weighted": 0.3924736519507439 + }, + { + "accuracy": 0.4375, + "f1": 0.43509787314135134, + "f1_weighted": 0.43475696772656686 + }, + { + "accuracy": 0.31640625, + "f1": 0.317093349012368, + "f1_weighted": 0.3170830836209331 + }, + { + "accuracy": 0.421875, + "f1": 0.4220059622402184, + "f1_weighted": 0.4219832239935702 + }, + { + "accuracy": 0.3203125, + "f1": 0.32164384967696696, + "f1_weighted": 0.32143491442268585 + }, + { + "accuracy": 0.42578125, + "f1": 0.415601055295463, + "f1_weighted": 0.41513298339152493 + }, + { + "accuracy": 0.42578125, + "f1": 0.42581351551872054, + "f1_weighted": 0.4258369712464528 + }, + { + "accuracy": 0.390625, + "f1": 0.3902731635289775, + "f1_weighted": 0.39006588282115173 + }, + { + "accuracy": 0.48828125, + "f1": 0.4854170765743146, + "f1_weighted": 0.48548329763553383 + } + ] + } + ] + }, + "task_name": "TweetSentimentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XMarket.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XMarket.json new file mode 100644 index 0000000..845a5e5 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XMarket.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "dfe57acff5b62c23732a7b7d3e3fb84ff501708b", + "evaluation_time": 29.802857160568237, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.11391, + "map_at_1": 0.02953, + "map_at_10": 0.05741, + "map_at_100": 0.06996, + "map_at_1000": 0.07476, + "map_at_20": 0.0628, + "map_at_3": 0.04403, + "map_at_5": 0.05028, + "mrr_at_1": 0.12251748251748251, + "mrr_at_10": 0.17076512376512368, + "mrr_at_100": 0.1790410148691893, + "mrr_at_1000": 0.17984465856537252, + "mrr_at_20": 0.17515042581287782, + "mrr_at_3": 0.1527272727272726, + "mrr_at_5": 0.16239160839160838, + "nauc_map_at_1000_diff1": 0.1612100704855169, + "nauc_map_at_1000_max": 0.2935086748048516, + "nauc_map_at_1000_std": 0.14196019240502306, + "nauc_map_at_100_diff1": 0.17137211337365993, + "nauc_map_at_100_max": 0.27526210470780915, + "nauc_map_at_100_std": 0.11680638928725205, + "nauc_map_at_10_diff1": 0.19565626017151083, + "nauc_map_at_10_max": 0.23122757014384315, + "nauc_map_at_10_std": 0.05229074194568603, + "nauc_map_at_1_diff1": 0.3380436578435557, + "nauc_map_at_1_max": 0.203110079901594, + "nauc_map_at_1_std": -0.01270166033547901, + "nauc_map_at_20_diff1": 0.1860085536684612, + "nauc_map_at_20_max": 0.24798651950929965, + "nauc_map_at_20_std": 0.07661820524437718, + "nauc_map_at_3_diff1": 0.2330144510282287, + "nauc_map_at_3_max": 0.19406460035016085, + "nauc_map_at_3_std": 0.0017753342923682548, + "nauc_map_at_5_diff1": 0.2144721297196232, + "nauc_map_at_5_max": 0.21085079754329356, + "nauc_map_at_5_std": 0.02566698641579549, + "nauc_mrr_at_1000_diff1": 0.14770886373984224, + "nauc_mrr_at_1000_max": 0.31963158775534845, + "nauc_mrr_at_1000_std": 0.18819227964655413, + "nauc_mrr_at_100_diff1": 0.14773415277079638, + "nauc_mrr_at_100_max": 0.319537217611964, + "nauc_mrr_at_100_std": 0.18816529744849286, + "nauc_mrr_at_10_diff1": 0.1501181595103948, + "nauc_mrr_at_10_max": 0.3169222675278736, + "nauc_mrr_at_10_std": 0.18283082063334768, + "nauc_mrr_at_1_diff1": 0.18115468499232248, + "nauc_mrr_at_1_max": 0.3397806889136678, + "nauc_mrr_at_1_std": 0.1658503619370513, + "nauc_mrr_at_20_diff1": 0.14868805134120933, + "nauc_mrr_at_20_max": 0.31850158331326023, + "nauc_mrr_at_20_std": 0.1861520033534345, + "nauc_mrr_at_3_diff1": 0.15804128644402232, + "nauc_mrr_at_3_max": 0.3185594808762246, + "nauc_mrr_at_3_std": 0.17179034282213967, + "nauc_mrr_at_5_diff1": 0.1540857979701909, + "nauc_mrr_at_5_max": 0.3186283714720409, + "nauc_mrr_at_5_std": 0.17757103527771706, + "nauc_ndcg_at_1000_diff1": 0.10999158815134912, + "nauc_ndcg_at_1000_max": 0.3610184754788345, + "nauc_ndcg_at_1000_std": 0.25750094926754286, + "nauc_ndcg_at_100_diff1": 0.1307037793828278, + "nauc_ndcg_at_100_max": 0.33135261637163355, + "nauc_ndcg_at_100_std": 0.21695871606129236, + "nauc_ndcg_at_10_diff1": 0.12977081830271198, + "nauc_ndcg_at_10_max": 0.30950250227500997, + "nauc_ndcg_at_10_std": 0.17877518671002396, + "nauc_ndcg_at_1_diff1": 0.18115468499232248, + "nauc_ndcg_at_1_max": 0.3397806889136678, + "nauc_ndcg_at_1_std": 0.1658503619370513, + "nauc_ndcg_at_20_diff1": 0.13541782706417002, + "nauc_ndcg_at_20_max": 0.3121702527593361, + "nauc_ndcg_at_20_std": 0.18428559196571043, + "nauc_ndcg_at_3_diff1": 0.1428900017782782, + "nauc_ndcg_at_3_max": 0.31374537603377817, + "nauc_ndcg_at_3_std": 0.1687037297645673, + "nauc_ndcg_at_5_diff1": 0.13753995570708116, + "nauc_ndcg_at_5_max": 0.3123389689639103, + "nauc_ndcg_at_5_std": 0.1752292209633786, + "nauc_precision_at_1000_diff1": -0.003825415502444535, + "nauc_precision_at_1000_max": 0.35037050754482557, + "nauc_precision_at_1000_std": 0.37288817394489954, + "nauc_precision_at_100_diff1": -0.0132324108021303, + "nauc_precision_at_100_max": 0.40650763331706635, + "nauc_precision_at_100_std": 0.43041129699965003, + "nauc_precision_at_10_diff1": 0.033258397314156196, + "nauc_precision_at_10_max": 0.3782404718127721, + "nauc_precision_at_10_std": 0.31297025649638965, + "nauc_precision_at_1_diff1": 0.18115468499232248, + "nauc_precision_at_1_max": 0.3397806889136678, + "nauc_precision_at_1_std": 0.1658503619370513, + "nauc_precision_at_20_diff1": 0.02701840684484534, + "nauc_precision_at_20_max": 0.3955754328490449, + "nauc_precision_at_20_std": 0.36081012062446083, + "nauc_precision_at_3_diff1": 0.10120861831508274, + "nauc_precision_at_3_max": 0.3468428373617635, + "nauc_precision_at_3_std": 0.21715860028934014, + "nauc_precision_at_5_diff1": 0.07110599666506763, + "nauc_precision_at_5_max": 0.3660227973444863, + "nauc_precision_at_5_std": 0.26126822517518505, + "nauc_recall_at_1000_diff1": 0.0342446660161427, + "nauc_recall_at_1000_max": 0.3098589528746264, + "nauc_recall_at_1000_std": 0.26470617700102994, + "nauc_recall_at_100_diff1": 0.08358166475596025, + "nauc_recall_at_100_max": 0.2595311563939461, + "nauc_recall_at_100_std": 0.1953398756643907, + "nauc_recall_at_10_diff1": 0.13016954503726516, + "nauc_recall_at_10_max": 0.2002695011248594, + "nauc_recall_at_10_std": 0.07209450481962243, + "nauc_recall_at_1_diff1": 0.3380436578435557, + "nauc_recall_at_1_max": 0.203110079901594, + "nauc_recall_at_1_std": -0.01270166033547901, + "nauc_recall_at_20_diff1": 0.11671393287554412, + "nauc_recall_at_20_max": 0.22006499378322855, + "nauc_recall_at_20_std": 0.11081240526690561, + "nauc_recall_at_3_diff1": 0.18545848499760495, + "nauc_recall_at_3_max": 0.15898831872374544, + "nauc_recall_at_3_std": -0.0040334668773936, + "nauc_recall_at_5_diff1": 0.16462629048779595, + "nauc_recall_at_5_max": 0.1814579572416329, + "nauc_recall_at_5_std": 0.036938541398785274, + "ndcg_at_1": 0.12252, + "ndcg_at_10": 0.11391, + "ndcg_at_100": 0.14273, + "ndcg_at_1000": 0.18983, + "ndcg_at_20": 0.11946, + "ndcg_at_3": 0.11073, + "ndcg_at_5": 0.11109, + "precision_at_1": 0.12252, + "precision_at_10": 0.06459, + "precision_at_100": 0.02492, + "precision_at_1000": 0.00813, + "precision_at_20": 0.0499, + "precision_at_3": 0.09091, + "precision_at_5": 0.08022, + "recall_at_1": 0.02953, + "recall_at_10": 0.10379, + "recall_at_100": 0.2451, + "recall_at_1000": 0.47167, + "recall_at_20": 0.13715, + "recall_at_3": 0.05671, + "recall_at_5": 0.07438 + } + ] + }, + "task_name": "XMarket" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XNLI.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XNLI.json new file mode 100644 index 0000000..0c11e83 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XNLI.json @@ -0,0 +1,127 @@ +{ + "dataset_revision": "09698e0180d87dc247ca447d3a1248b931ac0cdb", + "evaluation_time": 2.118696928024292, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine": { + "accuracy": 0.5699633699633699, + "accuracy_threshold": 0.5124573707580566, + "ap": 0.5700439595753928, + "f1": 0.6686868686868687, + "f1_threshold": 0.23125973343849182, + "precision": 0.5100154083204931, + "recall": 0.9706744868035191 + }, + "dot": { + "accuracy": 0.5721611721611721, + "accuracy_threshold": 77.91481018066406, + "ap": 0.5793703625227221, + "f1": 0.6697530864197531, + "f1_threshold": 34.103485107421875, + "precision": 0.5158478605388273, + "recall": 0.9545454545454546 + }, + "euclidean": { + "accuracy": 0.5699633699633699, + "accuracy_threshold": 8.91073226928711, + "ap": 0.5654540110303955, + "f1": 0.6673458991339787, + "f1_threshold": 13.936699867248535, + "precision": 0.5113192818110851, + "recall": 0.9604105571847508 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5793703625227221, + "manhattan": { + "accuracy": 0.5692307692307692, + "accuracy_threshold": 197.28347778320312, + "ap": 0.5650076642576558, + "f1": 0.6666666666666666, + "f1_threshold": 357.9945068359375, + "precision": 0.5011070110701107, + "recall": 0.9956011730205279 + }, + "max": { + "accuracy": 0.5721611721611721, + "ap": 0.5793703625227221, + "f1": 0.6697530864197531 + }, + "similarity": { + "accuracy": 0.5699633699633699, + "accuracy_threshold": 0.5124572515487671, + "ap": 0.5700439595753928, + "f1": 0.6686868686868687, + "f1_threshold": 0.23125974833965302, + "precision": 0.5100154083204931, + "recall": 0.9706744868035191 + } + } + ], + "validation": [ + { + "cosine": { + "accuracy": 0.5846153846153846, + "accuracy_threshold": 0.5614842176437378, + "ap": 0.5884265584962705, + "f1": 0.6730286288297337, + "f1_threshold": 0.21053043007850647, + "precision": 0.5118411000763942, + "recall": 0.9824046920821115 + }, + "dot": { + "accuracy": 0.5794871794871795, + "accuracy_threshold": 68.42404174804688, + "ap": 0.5759231659121954, + "f1": 0.6742268041237113, + "f1_threshold": 33.66102600097656, + "precision": 0.519872813990461, + "recall": 0.9589442815249267 + }, + "euclidean": { + "accuracy": 0.5868131868131868, + "accuracy_threshold": 10.251142501831055, + "ap": 0.5884237060443422, + "f1": 0.6744540375825291, + "f1_threshold": 14.045129776000977, + "precision": 0.515928515928516, + "recall": 0.9736070381231672 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5884265584962705, + "manhattan": { + "accuracy": 0.5846153846153846, + "accuracy_threshold": 223.50860595703125, + "ap": 0.5883653597378161, + "f1": 0.6739926739926742, + "f1_threshold": 293.31536865234375, + "precision": 0.5240032546786005, + "recall": 0.9442815249266863 + }, + "max": { + "accuracy": 0.5868131868131868, + "ap": 0.5884265584962705, + "f1": 0.6744540375825291 + }, + "similarity": { + "accuracy": 0.5846153846153846, + "accuracy_threshold": 0.5614842772483826, + "ap": 0.5884265584962705, + "f1": 0.6730286288297337, + "f1_threshold": 0.21053044497966766, + "precision": 0.5118411000763942, + "recall": 0.9824046920821115 + } + } + ] + }, + "task_name": "XNLI" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XPQARetrieval.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XPQARetrieval.json new file mode 100644 index 0000000..e96b495 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XPQARetrieval.json @@ -0,0 +1,455 @@ +{ + "dataset_revision": "c99d599f0a6ab9b85b065da6f9d94f9cf731679f", + "evaluation_time": 7.174119472503662, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spa-spa", + "languages": [ + "spa-Latn", + "spa-Latn" + ], + "main_score": 0.47322, + "map_at_1": 0.22295, + "map_at_10": 0.40335, + "map_at_100": 0.42221, + "map_at_1000": 0.42367, + "map_at_20": 0.41329, + "map_at_3": 0.34133, + "map_at_5": 0.38119, + "mrr_at_1": 0.4501891551071879, + "mrr_at_10": 0.5211253227646667, + "mrr_at_100": 0.5290161261617519, + "mrr_at_1000": 0.5295424087587944, + "mrr_at_20": 0.5256492100785624, + "mrr_at_3": 0.4966372425388818, + "mrr_at_5": 0.5094997898276583, + "nauc_map_at_1000_diff1": 0.4101300415264544, + "nauc_map_at_1000_max": 0.37552565548606176, + "nauc_map_at_1000_std": -0.07702316122775409, + "nauc_map_at_100_diff1": 0.4101394997614877, + "nauc_map_at_100_max": 0.3754238092460309, + "nauc_map_at_100_std": -0.07723144825145725, + "nauc_map_at_10_diff1": 0.40945293327691584, + "nauc_map_at_10_max": 0.37110677129893177, + "nauc_map_at_10_std": -0.07577667098165636, + "nauc_map_at_1_diff1": 0.5274445560072095, + "nauc_map_at_1_max": 0.1965668991827117, + "nauc_map_at_1_std": -0.05052385384218404, + "nauc_map_at_20_diff1": 0.40862277782741113, + "nauc_map_at_20_max": 0.37311467911577373, + "nauc_map_at_20_std": -0.07951447502669706, + "nauc_map_at_3_diff1": 0.4430434497696989, + "nauc_map_at_3_max": 0.3182774844227132, + "nauc_map_at_3_std": -0.07141442487327278, + "nauc_map_at_5_diff1": 0.412434766273287, + "nauc_map_at_5_max": 0.3573352744434764, + "nauc_map_at_5_std": -0.080376702247322, + "nauc_mrr_at_1000_diff1": 0.47989501169146337, + "nauc_mrr_at_1000_max": 0.41714615241819575, + "nauc_mrr_at_1000_std": -0.08742719435502701, + "nauc_mrr_at_100_diff1": 0.47969230900246124, + "nauc_mrr_at_100_max": 0.41693307488340536, + "nauc_mrr_at_100_std": -0.08774741433139747, + "nauc_mrr_at_10_diff1": 0.48108491650672824, + "nauc_mrr_at_10_max": 0.41902441785596, + "nauc_mrr_at_10_std": -0.08575159409041404, + "nauc_mrr_at_1_diff1": 0.49410638229645926, + "nauc_mrr_at_1_max": 0.398398699821759, + "nauc_mrr_at_1_std": -0.10047731518590312, + "nauc_mrr_at_20_diff1": 0.4796296309866593, + "nauc_mrr_at_20_max": 0.41847147284285363, + "nauc_mrr_at_20_std": -0.08730559906350552, + "nauc_mrr_at_3_diff1": 0.48342880489210754, + "nauc_mrr_at_3_max": 0.4163389383900365, + "nauc_mrr_at_3_std": -0.08767224151075226, + "nauc_mrr_at_5_diff1": 0.48226960152029325, + "nauc_mrr_at_5_max": 0.41927281988714654, + "nauc_mrr_at_5_std": -0.08813609421859124, + "nauc_ndcg_at_1000_diff1": 0.41979007959246667, + "nauc_ndcg_at_1000_max": 0.40144496490911463, + "nauc_ndcg_at_1000_std": -0.07138146711022608, + "nauc_ndcg_at_100_diff1": 0.4147666703858517, + "nauc_ndcg_at_100_max": 0.3993279944829789, + "nauc_ndcg_at_100_std": -0.06982731536027592, + "nauc_ndcg_at_10_diff1": 0.4154351515106638, + "nauc_ndcg_at_10_max": 0.39501853595505076, + "nauc_ndcg_at_10_std": -0.0724325825297142, + "nauc_ndcg_at_1_diff1": 0.49410638229645926, + "nauc_ndcg_at_1_max": 0.398398699821759, + "nauc_ndcg_at_1_std": -0.10047731518590312, + "nauc_ndcg_at_20_diff1": 0.4114042087846038, + "nauc_ndcg_at_20_max": 0.3955829899135673, + "nauc_ndcg_at_20_std": -0.08208744834316112, + "nauc_ndcg_at_3_diff1": 0.42798869597127287, + "nauc_ndcg_at_3_max": 0.3835753000305652, + "nauc_ndcg_at_3_std": -0.08122146912041701, + "nauc_ndcg_at_5_diff1": 0.41962852668014117, + "nauc_ndcg_at_5_max": 0.3824675744134876, + "nauc_ndcg_at_5_std": -0.07960432722700808, + "nauc_precision_at_1000_diff1": -0.06674767698096729, + "nauc_precision_at_1000_max": 0.2387161568891472, + "nauc_precision_at_1000_std": -0.040664007336274656, + "nauc_precision_at_100_diff1": 0.006519265628793583, + "nauc_precision_at_100_max": 0.31348844781438967, + "nauc_precision_at_100_std": -0.026246318041910004, + "nauc_precision_at_10_diff1": 0.10178340683733093, + "nauc_precision_at_10_max": 0.40556915175274, + "nauc_precision_at_10_std": -0.06413130989021472, + "nauc_precision_at_1_diff1": 0.49410638229645926, + "nauc_precision_at_1_max": 0.398398699821759, + "nauc_precision_at_1_std": -0.10047731518590312, + "nauc_precision_at_20_diff1": 0.07101036588496402, + "nauc_precision_at_20_max": 0.3753557289126776, + "nauc_precision_at_20_std": -0.07719146163168399, + "nauc_precision_at_3_diff1": 0.21011215576983416, + "nauc_precision_at_3_max": 0.4192847349757771, + "nauc_precision_at_3_std": -0.0907666376019511, + "nauc_precision_at_5_diff1": 0.13738755338130057, + "nauc_precision_at_5_max": 0.4242196709404185, + "nauc_precision_at_5_std": -0.09153549222238518, + "nauc_recall_at_1000_diff1": 0.0765115116948963, + "nauc_recall_at_1000_max": 0.31318106760751296, + "nauc_recall_at_1000_std": 0.4367049461960615, + "nauc_recall_at_100_diff1": 0.27382405151119255, + "nauc_recall_at_100_max": 0.3316078113185145, + "nauc_recall_at_100_std": 0.016250386117522903, + "nauc_recall_at_10_diff1": 0.34289171850004774, + "nauc_recall_at_10_max": 0.3634719241576307, + "nauc_recall_at_10_std": -0.04741750985695728, + "nauc_recall_at_1_diff1": 0.5274445560072095, + "nauc_recall_at_1_max": 0.1965668991827117, + "nauc_recall_at_1_std": -0.05052385384218404, + "nauc_recall_at_20_diff1": 0.31379968029408434, + "nauc_recall_at_20_max": 0.3525445199450759, + "nauc_recall_at_20_std": -0.0753015439149771, + "nauc_recall_at_3_diff1": 0.40693539518041355, + "nauc_recall_at_3_max": 0.3001684849471086, + "nauc_recall_at_3_std": -0.054494141512651924, + "nauc_recall_at_5_diff1": 0.3645810929456298, + "nauc_recall_at_5_max": 0.3513950278337877, + "nauc_recall_at_5_std": -0.0627715350414618, + "ndcg_at_1": 0.45019, + "ndcg_at_10": 0.47322, + "ndcg_at_100": 0.54098, + "ndcg_at_1000": 0.56788, + "ndcg_at_20": 0.49855, + "ndcg_at_3": 0.42268, + "ndcg_at_5": 0.43544, + "precision_at_1": 0.45019, + "precision_at_10": 0.13644, + "precision_at_100": 0.02, + "precision_at_1000": 0.00239, + "precision_at_20": 0.07831, + "precision_at_3": 0.29929, + "precision_at_5": 0.22446, + "recall_at_1": 0.22295, + "recall_at_10": 0.55071, + "recall_at_100": 0.80637, + "recall_at_1000": 0.9777, + "recall_at_20": 0.62642, + "recall_at_3": 0.38264, + "recall_at_5": 0.4644 + }, + { + "hf_subset": "eng-spa", + "languages": [ + "eng-Latn", + "spa-Latn" + ], + "main_score": 0.13385, + "map_at_1": 0.05149, + "map_at_10": 0.09885, + "map_at_100": 0.11075, + "map_at_1000": 0.11377, + "map_at_20": 0.10424, + "map_at_3": 0.08193, + "map_at_5": 0.09061, + "mrr_at_1": 0.11979823455233292, + "mrr_at_10": 0.16552573109950158, + "mrr_at_100": 0.17596011554661672, + "mrr_at_1000": 0.17750161430957156, + "mrr_at_20": 0.170262595009217, + "mrr_at_3": 0.1506935687263556, + "mrr_at_5": 0.15838587641866328, + "nauc_map_at_1000_diff1": 0.16981528540290114, + "nauc_map_at_1000_max": 0.30772907026819696, + "nauc_map_at_1000_std": 0.06996377514441718, + "nauc_map_at_100_diff1": 0.16914196971802495, + "nauc_map_at_100_max": 0.30822166251495964, + "nauc_map_at_100_std": 0.0705457715265207, + "nauc_map_at_10_diff1": 0.1734493912307137, + "nauc_map_at_10_max": 0.2987504235446724, + "nauc_map_at_10_std": 0.04827439874608506, + "nauc_map_at_1_diff1": 0.23657369017721755, + "nauc_map_at_1_max": 0.2663012051953149, + "nauc_map_at_1_std": 0.04082743739831973, + "nauc_map_at_20_diff1": 0.17241063377535423, + "nauc_map_at_20_max": 0.3020872044283822, + "nauc_map_at_20_std": 0.05851582565569818, + "nauc_map_at_3_diff1": 0.17982392342641051, + "nauc_map_at_3_max": 0.2996971469435361, + "nauc_map_at_3_std": 0.042589003199420904, + "nauc_map_at_5_diff1": 0.18318810505920227, + "nauc_map_at_5_max": 0.2966254390185695, + "nauc_map_at_5_std": 0.034192638545391035, + "nauc_mrr_at_1000_diff1": 0.17094138992946248, + "nauc_mrr_at_1000_max": 0.2804583298970962, + "nauc_mrr_at_1000_std": 0.055981250174288386, + "nauc_mrr_at_100_diff1": 0.17064773780581935, + "nauc_mrr_at_100_max": 0.280748550823589, + "nauc_mrr_at_100_std": 0.056695841645784575, + "nauc_mrr_at_10_diff1": 0.17372819189022465, + "nauc_mrr_at_10_max": 0.2810724427306018, + "nauc_mrr_at_10_std": 0.05284205824758423, + "nauc_mrr_at_1_diff1": 0.21758167334615755, + "nauc_mrr_at_1_max": 0.3069026835894448, + "nauc_mrr_at_1_std": 0.0478026717389974, + "nauc_mrr_at_20_diff1": 0.17177842378189506, + "nauc_mrr_at_20_max": 0.27983899186652517, + "nauc_mrr_at_20_std": 0.0533175831931301, + "nauc_mrr_at_3_diff1": 0.1771304622382114, + "nauc_mrr_at_3_max": 0.28335439678166774, + "nauc_mrr_at_3_std": 0.04219571756300347, + "nauc_mrr_at_5_diff1": 0.176734890915563, + "nauc_mrr_at_5_max": 0.2821942542465157, + "nauc_mrr_at_5_std": 0.04526980278930942, + "nauc_ndcg_at_1000_diff1": 0.1528483614481676, + "nauc_ndcg_at_1000_max": 0.30157280776011763, + "nauc_ndcg_at_1000_std": 0.10525573360389297, + "nauc_ndcg_at_100_diff1": 0.14625509164791295, + "nauc_ndcg_at_100_max": 0.3150508118623774, + "nauc_ndcg_at_100_std": 0.1275832647065197, + "nauc_ndcg_at_10_diff1": 0.159156288898909, + "nauc_ndcg_at_10_max": 0.2884380197302731, + "nauc_ndcg_at_10_std": 0.06351891235022641, + "nauc_ndcg_at_1_diff1": 0.21758167334615755, + "nauc_ndcg_at_1_max": 0.3069026835894448, + "nauc_ndcg_at_1_std": 0.0478026717389974, + "nauc_ndcg_at_20_diff1": 0.1574551771609007, + "nauc_ndcg_at_20_max": 0.29317203050909424, + "nauc_ndcg_at_20_std": 0.08028619194495099, + "nauc_ndcg_at_3_diff1": 0.15945207120650862, + "nauc_ndcg_at_3_max": 0.2967069680004616, + "nauc_ndcg_at_3_std": 0.04544196344801241, + "nauc_ndcg_at_5_diff1": 0.17343682589991075, + "nauc_ndcg_at_5_max": 0.28845397283056806, + "nauc_ndcg_at_5_std": 0.03917223225119734, + "nauc_precision_at_1000_diff1": 0.01746136495011916, + "nauc_precision_at_1000_max": 0.06342231184287896, + "nauc_precision_at_1000_std": 0.009858347460776922, + "nauc_precision_at_100_diff1": 0.07075311744343227, + "nauc_precision_at_100_max": 0.2890964230978766, + "nauc_precision_at_100_std": 0.17009374737798322, + "nauc_precision_at_10_diff1": 0.12217513923170542, + "nauc_precision_at_10_max": 0.30392946276087024, + "nauc_precision_at_10_std": 0.07025046782683812, + "nauc_precision_at_1_diff1": 0.21758167334615755, + "nauc_precision_at_1_max": 0.3069026835894448, + "nauc_precision_at_1_std": 0.0478026717389974, + "nauc_precision_at_20_diff1": 0.1216945475642686, + "nauc_precision_at_20_max": 0.3056596780065624, + "nauc_precision_at_20_std": 0.10637671532145981, + "nauc_precision_at_3_diff1": 0.14180016196899226, + "nauc_precision_at_3_max": 0.31481581505681605, + "nauc_precision_at_3_std": 0.03476354845371766, + "nauc_precision_at_5_diff1": 0.15118688912997819, + "nauc_precision_at_5_max": 0.3074429275028567, + "nauc_precision_at_5_std": 0.028108137412478647, + "nauc_recall_at_1000_diff1": 0.09988531302703038, + "nauc_recall_at_1000_max": 0.34174741084787874, + "nauc_recall_at_1000_std": 0.3375294593491094, + "nauc_recall_at_100_diff1": 0.08854916708885169, + "nauc_recall_at_100_max": 0.31355763064238507, + "nauc_recall_at_100_std": 0.25180204304550924, + "nauc_recall_at_10_diff1": 0.11954698865615596, + "nauc_recall_at_10_max": 0.2508693699223439, + "nauc_recall_at_10_std": 0.08682896743252057, + "nauc_recall_at_1_diff1": 0.23657369017721755, + "nauc_recall_at_1_max": 0.2663012051953149, + "nauc_recall_at_1_std": 0.04082743739831973, + "nauc_recall_at_20_diff1": 0.11629268398696745, + "nauc_recall_at_20_max": 0.2586496388218271, + "nauc_recall_at_20_std": 0.11745436015111511, + "nauc_recall_at_3_diff1": 0.13711838511402943, + "nauc_recall_at_3_max": 0.2817766693259193, + "nauc_recall_at_3_std": 0.05804627963743468, + "nauc_recall_at_5_diff1": 0.1509496030640576, + "nauc_recall_at_5_max": 0.26444819092065547, + "nauc_recall_at_5_std": 0.040460115320471925, + "ndcg_at_1": 0.1198, + "ndcg_at_10": 0.13385, + "ndcg_at_100": 0.19486, + "ndcg_at_1000": 0.26706, + "ndcg_at_20": 0.15087, + "ndcg_at_3": 0.11293, + "ndcg_at_5": 0.11691, + "precision_at_1": 0.1198, + "precision_at_10": 0.04212, + "precision_at_100": 0.01005, + "precision_at_1000": 0.00216, + "precision_at_20": 0.02781, + "precision_at_3": 0.08449, + "precision_at_5": 0.06406, + "recall_at_1": 0.05149, + "recall_at_10": 0.16873, + "recall_at_100": 0.41591, + "recall_at_1000": 0.88081, + "recall_at_20": 0.22064, + "recall_at_3": 0.10069, + "recall_at_5": 0.12739 + }, + { + "hf_subset": "spa-eng", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.16581, + "map_at_1": 0.06641, + "map_at_10": 0.13193, + "map_at_100": 0.14525, + "map_at_1000": 0.1481, + "map_at_20": 0.13866, + "map_at_3": 0.10835, + "map_at_5": 0.1216, + "mrr_at_1": 0.13871374527112232, + "mrr_at_10": 0.18474098760984003, + "mrr_at_100": 0.1933663622003425, + "mrr_at_1000": 0.19485295220322132, + "mrr_at_20": 0.1891762101512471, + "mrr_at_3": 0.16771752837326603, + "mrr_at_5": 0.17730138713745267, + "nauc_map_at_1000_diff1": 0.2891887685272858, + "nauc_map_at_1000_max": 0.47853851134798403, + "nauc_map_at_1000_std": 0.20873339748832107, + "nauc_map_at_100_diff1": 0.28947651543002956, + "nauc_map_at_100_max": 0.4794783827331946, + "nauc_map_at_100_std": 0.20950516894892232, + "nauc_map_at_10_diff1": 0.30195552654124935, + "nauc_map_at_10_max": 0.47381317779055, + "nauc_map_at_10_std": 0.19419961468682473, + "nauc_map_at_1_diff1": 0.46638165026779765, + "nauc_map_at_1_max": 0.4364581818642576, + "nauc_map_at_1_std": 0.16437258413883196, + "nauc_map_at_20_diff1": 0.29600515993120535, + "nauc_map_at_20_max": 0.47802267872540644, + "nauc_map_at_20_std": 0.20353882869230172, + "nauc_map_at_3_diff1": 0.3452384329285874, + "nauc_map_at_3_max": 0.4715464111442569, + "nauc_map_at_3_std": 0.1855030780037972, + "nauc_map_at_5_diff1": 0.31905888089261797, + "nauc_map_at_5_max": 0.477507299235446, + "nauc_map_at_5_std": 0.18919119557595196, + "nauc_mrr_at_1000_diff1": 0.2818982326137024, + "nauc_mrr_at_1000_max": 0.45359425772588446, + "nauc_mrr_at_1000_std": 0.19255982258474652, + "nauc_mrr_at_100_diff1": 0.2812184650449626, + "nauc_mrr_at_100_max": 0.4533152709680096, + "nauc_mrr_at_100_std": 0.19266008390455602, + "nauc_mrr_at_10_diff1": 0.28347904073723174, + "nauc_mrr_at_10_max": 0.45044798607325703, + "nauc_mrr_at_10_std": 0.18757180411432384, + "nauc_mrr_at_1_diff1": 0.3665550615873501, + "nauc_mrr_at_1_max": 0.4933519633982474, + "nauc_mrr_at_1_std": 0.1761590076396144, + "nauc_mrr_at_20_diff1": 0.2832370201289105, + "nauc_mrr_at_20_max": 0.4528454372571194, + "nauc_mrr_at_20_std": 0.1900889314392444, + "nauc_mrr_at_3_diff1": 0.30710871117244004, + "nauc_mrr_at_3_max": 0.45584478974277526, + "nauc_mrr_at_3_std": 0.18154966286023694, + "nauc_mrr_at_5_diff1": 0.29529231845231974, + "nauc_mrr_at_5_max": 0.4551362117825395, + "nauc_mrr_at_5_std": 0.18449461426787872, + "nauc_ndcg_at_1000_diff1": 0.246646102912717, + "nauc_ndcg_at_1000_max": 0.4584226089190682, + "nauc_ndcg_at_1000_std": 0.22699628988837844, + "nauc_ndcg_at_100_diff1": 0.23889777962839034, + "nauc_ndcg_at_100_max": 0.4696824117848245, + "nauc_ndcg_at_100_std": 0.24355474358275378, + "nauc_ndcg_at_10_diff1": 0.2684094103395798, + "nauc_ndcg_at_10_max": 0.4504802123018303, + "nauc_ndcg_at_10_std": 0.19436346355443865, + "nauc_ndcg_at_1_diff1": 0.3665550615873501, + "nauc_ndcg_at_1_max": 0.4933519633982474, + "nauc_ndcg_at_1_std": 0.1761590076396144, + "nauc_ndcg_at_20_diff1": 0.2589042767029927, + "nauc_ndcg_at_20_max": 0.4623167805413833, + "nauc_ndcg_at_20_std": 0.21651409853253759, + "nauc_ndcg_at_3_diff1": 0.29389652495039104, + "nauc_ndcg_at_3_max": 0.4667709876582448, + "nauc_ndcg_at_3_std": 0.19235169275488417, + "nauc_ndcg_at_5_diff1": 0.29447611777864413, + "nauc_ndcg_at_5_max": 0.46122316214352477, + "nauc_ndcg_at_5_std": 0.18941743271000738, + "nauc_precision_at_1000_diff1": -0.034163402598399825, + "nauc_precision_at_1000_max": 0.10439223693214769, + "nauc_precision_at_1000_std": 0.040716469671998386, + "nauc_precision_at_100_diff1": 0.04711547518790777, + "nauc_precision_at_100_max": 0.385673257959861, + "nauc_precision_at_100_std": 0.2483444613470521, + "nauc_precision_at_10_diff1": 0.11548193113711476, + "nauc_precision_at_10_max": 0.4364049272711339, + "nauc_precision_at_10_std": 0.1987121496971378, + "nauc_precision_at_1_diff1": 0.3665550615873501, + "nauc_precision_at_1_max": 0.4933519633982474, + "nauc_precision_at_1_std": 0.1761590076396144, + "nauc_precision_at_20_diff1": 0.10140633745451463, + "nauc_precision_at_20_max": 0.435649937632033, + "nauc_precision_at_20_std": 0.23431867133087333, + "nauc_precision_at_3_diff1": 0.187942460921644, + "nauc_precision_at_3_max": 0.4826146046287366, + "nauc_precision_at_3_std": 0.20429048758997462, + "nauc_precision_at_5_diff1": 0.1511221061862531, + "nauc_precision_at_5_max": 0.4689696542313204, + "nauc_precision_at_5_std": 0.1978982973450797, + "nauc_recall_at_1000_diff1": 0.1777987108247898, + "nauc_recall_at_1000_max": 0.48081949699445925, + "nauc_recall_at_1000_std": 0.4304494879997491, + "nauc_recall_at_100_diff1": 0.1472238012644601, + "nauc_recall_at_100_max": 0.45113055825696863, + "nauc_recall_at_100_std": 0.33058730696466804, + "nauc_recall_at_10_diff1": 0.21559675427748481, + "nauc_recall_at_10_max": 0.4047939350647471, + "nauc_recall_at_10_std": 0.19575867678423667, + "nauc_recall_at_1_diff1": 0.46638165026779765, + "nauc_recall_at_1_max": 0.4364581818642576, + "nauc_recall_at_1_std": 0.16437258413883196, + "nauc_recall_at_20_diff1": 0.19405383057599995, + "nauc_recall_at_20_max": 0.4285385318705685, + "nauc_recall_at_20_std": 0.2453460527069173, + "nauc_recall_at_3_diff1": 0.30332968962760914, + "nauc_recall_at_3_max": 0.42728152251094464, + "nauc_recall_at_3_std": 0.182092587644656, + "nauc_recall_at_5_diff1": 0.2633905782535538, + "nauc_recall_at_5_max": 0.4278864250162255, + "nauc_recall_at_5_std": 0.1873340043639545, + "ndcg_at_1": 0.13871, + "ndcg_at_10": 0.16581, + "ndcg_at_100": 0.22607, + "ndcg_at_1000": 0.29341, + "ndcg_at_20": 0.18513, + "ndcg_at_3": 0.13839, + "ndcg_at_5": 0.14627, + "precision_at_1": 0.13871, + "precision_at_10": 0.05195, + "precision_at_100": 0.01103, + "precision_at_1000": 0.00217, + "precision_at_20": 0.03335, + "precision_at_3": 0.10256, + "precision_at_5": 0.07995, + "recall_at_1": 0.06641, + "recall_at_10": 0.20689, + "recall_at_100": 0.44493, + "recall_at_1000": 0.88377, + "recall_at_20": 0.26587, + "recall_at_3": 0.12823, + "recall_at_5": 0.16114 + } + ] + }, + "task_name": "XPQARetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XQuADRetrieval.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XQuADRetrieval.json new file mode 100644 index 0000000..6c03805 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/XQuADRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "51adfef1c1287aab1d2d91b5bead9bcfb9c68583", + "evaluation_time": 1.5398015975952148, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "validation": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.81996, + "map_at_1": 0.69426, + "map_at_10": 0.78093, + "map_at_100": 0.78341, + "map_at_1000": 0.78344, + "map_at_20": 0.7828, + "map_at_3": 0.76647, + "map_at_5": 0.77466, + "mrr_at_1": 0.6942567567567568, + "mrr_at_10": 0.7809269090519098, + "mrr_at_100": 0.7834057642532574, + "mrr_at_1000": 0.7834406039241898, + "mrr_at_20": 0.7827964137392358, + "mrr_at_3": 0.7664695945945951, + "mrr_at_5": 0.7746621621621632, + "nauc_map_at_1000_diff1": 0.682361430556322, + "nauc_map_at_1000_max": 0.5987455232553652, + "nauc_map_at_1000_std": 0.1772064138044479, + "nauc_map_at_100_diff1": 0.6823622349748709, + "nauc_map_at_100_max": 0.5987999694453191, + "nauc_map_at_100_std": 0.17726577977481656, + "nauc_map_at_10_diff1": 0.6811910996505279, + "nauc_map_at_10_max": 0.6017949199843236, + "nauc_map_at_10_std": 0.18003233778196964, + "nauc_map_at_1_diff1": 0.7126079025170393, + "nauc_map_at_1_max": 0.5367549909069829, + "nauc_map_at_1_std": 0.11457848335910233, + "nauc_map_at_20_diff1": 0.6821182916911805, + "nauc_map_at_20_max": 0.5996575558910002, + "nauc_map_at_20_std": 0.17833042230511117, + "nauc_map_at_3_diff1": 0.6770826242196856, + "nauc_map_at_3_max": 0.6061538214452007, + "nauc_map_at_3_std": 0.17848750874741787, + "nauc_map_at_5_diff1": 0.6775773048235915, + "nauc_map_at_5_max": 0.6070061871516373, + "nauc_map_at_5_std": 0.18520158007017573, + "nauc_mrr_at_1000_diff1": 0.682361430556322, + "nauc_mrr_at_1000_max": 0.5987455232553652, + "nauc_mrr_at_1000_std": 0.1772064138044479, + "nauc_mrr_at_100_diff1": 0.6823622349748709, + "nauc_mrr_at_100_max": 0.5987999694453191, + "nauc_mrr_at_100_std": 0.17726577977481656, + "nauc_mrr_at_10_diff1": 0.6811910996505279, + "nauc_mrr_at_10_max": 0.6017949199843236, + "nauc_mrr_at_10_std": 0.18003233778196964, + "nauc_mrr_at_1_diff1": 0.7126079025170393, + "nauc_mrr_at_1_max": 0.5367549909069829, + "nauc_mrr_at_1_std": 0.11457848335910233, + "nauc_mrr_at_20_diff1": 0.6821182916911805, + "nauc_mrr_at_20_max": 0.5996575558910002, + "nauc_mrr_at_20_std": 0.17833042230511117, + "nauc_mrr_at_3_diff1": 0.6770826242196856, + "nauc_mrr_at_3_max": 0.6061538214452007, + "nauc_mrr_at_3_std": 0.17848750874741787, + "nauc_mrr_at_5_diff1": 0.6775773048235915, + "nauc_mrr_at_5_max": 0.6070061871516373, + "nauc_mrr_at_5_std": 0.18520158007017573, + "nauc_ndcg_at_1000_diff1": 0.6785754642491914, + "nauc_ndcg_at_1000_max": 0.6087848290724543, + "nauc_ndcg_at_1000_std": 0.1891955615960067, + "nauc_ndcg_at_100_diff1": 0.6785903767510317, + "nauc_ndcg_at_100_max": 0.610172463196193, + "nauc_ndcg_at_100_std": 0.19103978580765613, + "nauc_ndcg_at_10_diff1": 0.6724031346447155, + "nauc_ndcg_at_10_max": 0.6268815971907553, + "nauc_ndcg_at_10_std": 0.20860309293305795, + "nauc_ndcg_at_1_diff1": 0.7126079025170393, + "nauc_ndcg_at_1_max": 0.5367549909069829, + "nauc_ndcg_at_1_std": 0.11457848335910233, + "nauc_ndcg_at_20_diff1": 0.6769201863671689, + "nauc_ndcg_at_20_max": 0.6184069159328102, + "nauc_ndcg_at_20_std": 0.20214797348019276, + "nauc_ndcg_at_3_diff1": 0.6630539009829314, + "nauc_ndcg_at_3_max": 0.6353436004116547, + "nauc_ndcg_at_3_std": 0.20543586115633475, + "nauc_ndcg_at_5_diff1": 0.6623986780411254, + "nauc_ndcg_at_5_max": 0.6387947419517314, + "nauc_ndcg_at_5_std": 0.22075268144907895, + "nauc_precision_at_1000_diff1": 1.0, + "nauc_precision_at_1000_max": 1.0, + "nauc_precision_at_1000_std": 1.0, + "nauc_precision_at_100_diff1": 0.6814575739502011, + "nauc_precision_at_100_max": 0.9074134048855836, + "nauc_precision_at_100_std": 0.6032706496077938, + "nauc_precision_at_10_diff1": 0.6035201453464304, + "nauc_precision_at_10_max": 0.8510671696238781, + "nauc_precision_at_10_std": 0.4696112604147858, + "nauc_precision_at_1_diff1": 0.7126079025170393, + "nauc_precision_at_1_max": 0.5367549909069829, + "nauc_precision_at_1_std": 0.11457848335910233, + "nauc_precision_at_20_diff1": 0.6387732048550764, + "nauc_precision_at_20_max": 0.8791975405034081, + "nauc_precision_at_20_std": 0.5661507482817274, + "nauc_precision_at_3_diff1": 0.6020384014625442, + "nauc_precision_at_3_max": 0.7631652002016415, + "nauc_precision_at_3_std": 0.3234730924741548, + "nauc_precision_at_5_diff1": 0.5784653632603379, + "nauc_precision_at_5_max": 0.8141749373855437, + "nauc_precision_at_5_std": 0.42015965008045586, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": 0.6814575739502193, + "nauc_recall_at_100_max": 0.9074134048855995, + "nauc_recall_at_100_std": 0.6032706496078215, + "nauc_recall_at_10_diff1": 0.6035201453464307, + "nauc_recall_at_10_max": 0.851067169623881, + "nauc_recall_at_10_std": 0.4696112604147857, + "nauc_recall_at_1_diff1": 0.7126079025170393, + "nauc_recall_at_1_max": 0.5367549909069829, + "nauc_recall_at_1_std": 0.11457848335910233, + "nauc_recall_at_20_diff1": 0.6387732048550856, + "nauc_recall_at_20_max": 0.8791975405034081, + "nauc_recall_at_20_std": 0.5661507482817295, + "nauc_recall_at_3_diff1": 0.6020384014625456, + "nauc_recall_at_3_max": 0.7631652002016434, + "nauc_recall_at_3_std": 0.32347309247415523, + "nauc_recall_at_5_diff1": 0.5784653632603376, + "nauc_recall_at_5_max": 0.8141749373855456, + "nauc_recall_at_5_std": 0.420159650080459, + "ndcg_at_1": 0.69426, + "ndcg_at_10": 0.81996, + "ndcg_at_100": 0.83165, + "ndcg_at_1000": 0.83235, + "ndcg_at_20": 0.82679, + "ndcg_at_3": 0.79001, + "ndcg_at_5": 0.80487, + "precision_at_1": 0.69426, + "precision_at_10": 0.09409, + "precision_at_100": 0.00995, + "precision_at_1000": 0.001, + "precision_at_20": 0.0484, + "precision_at_3": 0.28604, + "precision_at_5": 0.17889, + "recall_at_1": 0.69426, + "recall_at_10": 0.94088, + "recall_at_100": 0.99493, + "recall_at_1000": 1.0, + "recall_at_20": 0.96791, + "recall_at_3": 0.85811, + "recall_at_5": 0.89443 + } + ] + }, + "task_name": "XQuADRetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/model_meta.json b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/model_meta.json new file mode 100644 index 0000000..b4d7392 --- /dev/null +++ b/evaluation/embeddings_model/results/dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn/e95950ddcdb6e290f3b40cb449aa9214645067b6/model_meta.json @@ -0,0 +1 @@ +{"name": "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn", "revision": "e95950ddcdb6e290f3b40cb449aa9214645067b6", "release_date": null, "languages": [], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": null, "similarity_fn_name": null, "framework": ["Sentence Transformers"], "loader": null} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/AmazonReviewsClassification.json new file mode 100644 index 0000000..bf7c070 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/AmazonReviewsClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "evaluation_time": 28.858722448349, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.42702, + "f1": 0.4071323003968025, + "f1_weighted": 0.40713230039680237, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.42702, + "scores_per_experiment": [ + { + "accuracy": 0.4424, + "f1": 0.4081103611816884, + "f1_weighted": 0.4081103611816883 + }, + { + "accuracy": 0.4394, + "f1": 0.4193673747607381, + "f1_weighted": 0.41936737476073804 + }, + { + "accuracy": 0.4524, + "f1": 0.4178417859287327, + "f1_weighted": 0.41784178592873267 + }, + { + "accuracy": 0.4328, + "f1": 0.43066570602606546, + "f1_weighted": 0.4306657060260654 + }, + { + "accuracy": 0.418, + "f1": 0.39849358192686346, + "f1_weighted": 0.39849358192686346 + }, + { + "accuracy": 0.4264, + "f1": 0.40572640063745374, + "f1_weighted": 0.4057264006374538 + }, + { + "accuracy": 0.419, + "f1": 0.4041658526203714, + "f1_weighted": 0.4041658526203714 + }, + { + "accuracy": 0.4218, + "f1": 0.4120876479014086, + "f1_weighted": 0.41208764790140856 + }, + { + "accuracy": 0.4306, + "f1": 0.39639273202273795, + "f1_weighted": 0.396392732022738 + }, + { + "accuracy": 0.3874, + "f1": 0.37847156096196394, + "f1_weighted": 0.378471560961964 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.4245, + "f1": 0.40573866420994165, + "f1_weighted": 0.40573866420994176, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.4245, + "scores_per_experiment": [ + { + "accuracy": 0.4362, + "f1": 0.4046647362461015, + "f1_weighted": 0.40466473624610155 + }, + { + "accuracy": 0.4346, + "f1": 0.41676401455719275, + "f1_weighted": 0.4167640145571928 + }, + { + "accuracy": 0.4586, + "f1": 0.4246623380704193, + "f1_weighted": 0.42466233807041937 + }, + { + "accuracy": 0.4212, + "f1": 0.41960024687627595, + "f1_weighted": 0.41960024687627595 + }, + { + "accuracy": 0.4138, + "f1": 0.3947515879151939, + "f1_weighted": 0.39475158791519394 + }, + { + "accuracy": 0.427, + "f1": 0.4088668999283883, + "f1_weighted": 0.40886689992838826 + }, + { + "accuracy": 0.4204, + "f1": 0.40584372454838025, + "f1_weighted": 0.4058437245483803 + }, + { + "accuracy": 0.4166, + "f1": 0.4091252945946275, + "f1_weighted": 0.40912529459462754 + }, + { + "accuracy": 0.4246, + "f1": 0.3880922083648488, + "f1_weighted": 0.3880922083648488 + }, + { + "accuracy": 0.392, + "f1": 0.38501559099798877, + "f1_weighted": 0.38501559099798877 + } + ] + } + ] + }, + "task_name": "AmazonReviewsClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/BibleNLPBitextMining.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/BibleNLPBitextMining.json new file mode 100644 index 0000000..c2702c8 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/BibleNLPBitextMining.json @@ -0,0 +1,35 @@ +{ + "dataset_revision": "264a18480c529d9e922483839b4b9758e690b762", + "evaluation_time": 2.042119026184082, + "kg_co2_emissions": null, + "mteb_version": "1.12.25", + "scores": { + "train": [ + { + "accuracy": 0.98828125, + "f1": 0.984375, + "hf_subset": "eng_Latn-spa_Latn", + "languages": [ + "eng-Latn", + "spa-Latn" + ], + "main_score": 0.984375, + "precision": 0.982421875, + "recall": 0.98828125 + }, + { + "accuracy": 0.984375, + "f1": 0.9791666666666666, + "hf_subset": "spa_Latn-eng_Latn", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.9791666666666666, + "precision": 0.9765625, + "recall": 0.984375 + } + ] + }, + "task_name": "BibleNLPBitextMining" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/CataloniaTweetClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/CataloniaTweetClassification.json new file mode 100644 index 0000000..1ffa3b3 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/CataloniaTweetClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "cf24d44e517efa534f048e5fc5981f399ed25bee", + "evaluation_time": 11.199100494384766, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.5025000000000001, + "f1": 0.5157603653313163, + "f1_weighted": 0.49347854578241285, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5025000000000001, + "scores_per_experiment": [ + { + "accuracy": 0.532, + "f1": 0.5444835923249581, + "f1_weighted": 0.5310312401585366 + }, + { + "accuracy": 0.4905, + "f1": 0.4994132141708986, + "f1_weighted": 0.4819118278361011 + }, + { + "accuracy": 0.4295, + "f1": 0.4300187061535821, + "f1_weighted": 0.41732008724961184 + }, + { + "accuracy": 0.4995, + "f1": 0.5121574288699979, + "f1_weighted": 0.49384375695947985 + }, + { + "accuracy": 0.491, + "f1": 0.5057092157107741, + "f1_weighted": 0.4733601879988239 + }, + { + "accuracy": 0.5235, + "f1": 0.5366829949002724, + "f1_weighted": 0.5173935689333952 + }, + { + "accuracy": 0.5415, + "f1": 0.561353544586859, + "f1_weighted": 0.5339293796491836 + }, + { + "accuracy": 0.497, + "f1": 0.504611714788325, + "f1_weighted": 0.47668041713250486 + }, + { + "accuracy": 0.5075, + "f1": 0.5245229204629887, + "f1_weighted": 0.5050733275925439 + }, + { + "accuracy": 0.513, + "f1": 0.538650321344508, + "f1_weighted": 0.5042416643139469 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.50545, + "f1": 0.5180886299484765, + "f1_weighted": 0.497176091684976, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.50545, + "scores_per_experiment": [ + { + "accuracy": 0.5305, + "f1": 0.5491266175009213, + "f1_weighted": 0.5298216379558344 + }, + { + "accuracy": 0.4955, + "f1": 0.5028655001962522, + "f1_weighted": 0.4878046423222683 + }, + { + "accuracy": 0.4315, + "f1": 0.4321685786948945, + "f1_weighted": 0.4216788679314995 + }, + { + "accuracy": 0.5055, + "f1": 0.5165668915124385, + "f1_weighted": 0.5003677503298092 + }, + { + "accuracy": 0.503, + "f1": 0.5161044059815396, + "f1_weighted": 0.48382988342547584 + }, + { + "accuracy": 0.5245, + "f1": 0.5370462378140411, + "f1_weighted": 0.5195643522332578 + }, + { + "accuracy": 0.53, + "f1": 0.549351986986132, + "f1_weighted": 0.5241671934435186 + }, + { + "accuracy": 0.5035, + "f1": 0.5087443885955485, + "f1_weighted": 0.48644681105119686 + }, + { + "accuracy": 0.4975, + "f1": 0.5177333990100802, + "f1_weighted": 0.4935868005995442 + }, + { + "accuracy": 0.533, + "f1": 0.551178293192917, + "f1_weighted": 0.5244929775573559 + } + ] + } + ] + }, + "task_name": "CataloniaTweetClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MIRACLRetrieval.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MIRACLRetrieval.json new file mode 100644 index 0000000..77e7950 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MIRACLRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "d28a029f35c4ff7f616df47b0edf54e6882395e6", + "evaluation_time": 26.019325494766235, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.82005, + "map_at_1": 0.21656, + "map_at_10": 0.71975, + "map_at_100": 0.73892, + "map_at_1000": 0.73894, + "map_at_20": 0.73705, + "map_at_3": 0.43125, + "map_at_5": 0.55387, + "mrr_at_1": 0.7407407407407407, + "mrr_at_10": 0.83654651675485, + "mrr_at_100": 0.8369139476778366, + "mrr_at_1000": 0.8369139476778366, + "mrr_at_20": 0.8369139476778366, + "mrr_at_3": 0.820730452674897, + "mrr_at_5": 0.8330761316872427, + "nauc_map_at_1000_diff1": 0.12630888607994345, + "nauc_map_at_1000_max": 0.2452669576658864, + "nauc_map_at_1000_std": 0.07986195329624464, + "nauc_map_at_100_diff1": 0.1263918750152354, + "nauc_map_at_100_max": 0.2453542613463867, + "nauc_map_at_100_std": 0.07994826484766877, + "nauc_map_at_10_diff1": 0.12092950262910344, + "nauc_map_at_10_max": 0.255820765791503, + "nauc_map_at_10_std": 0.06292119073022297, + "nauc_map_at_1_diff1": 0.2776684811776689, + "nauc_map_at_1_max": 0.02462544747250985, + "nauc_map_at_1_std": -0.13602913083365026, + "nauc_map_at_20_diff1": 0.12404948268972653, + "nauc_map_at_20_max": 0.24764999198492982, + "nauc_map_at_20_std": 0.07930559368779662, + "nauc_map_at_3_diff1": 0.2151688439519292, + "nauc_map_at_3_max": 0.08745515754553647, + "nauc_map_at_3_std": -0.08630046963849108, + "nauc_map_at_5_diff1": 0.2115974902794902, + "nauc_map_at_5_max": 0.13824266385127668, + "nauc_map_at_5_std": -0.042399807732673114, + "nauc_mrr_at_1000_diff1": 0.23414829125131517, + "nauc_mrr_at_1000_max": 0.2764703877705055, + "nauc_mrr_at_1000_std": 0.12510892155281064, + "nauc_mrr_at_100_diff1": 0.23414829125131517, + "nauc_mrr_at_100_max": 0.2764703877705055, + "nauc_mrr_at_100_std": 0.12510892155281064, + "nauc_mrr_at_10_diff1": 0.23406026909849903, + "nauc_mrr_at_10_max": 0.2766646985158662, + "nauc_mrr_at_10_std": 0.12475925372686632, + "nauc_mrr_at_1_diff1": 0.2314851389882168, + "nauc_mrr_at_1_max": 0.2596458254210836, + "nauc_mrr_at_1_std": 0.11088487462121062, + "nauc_mrr_at_20_diff1": 0.23414829125131517, + "nauc_mrr_at_20_max": 0.2764703877705055, + "nauc_mrr_at_20_std": 0.12510892155281064, + "nauc_mrr_at_3_diff1": 0.2198942139524294, + "nauc_mrr_at_3_max": 0.2791540181590473, + "nauc_mrr_at_3_std": 0.13696697659412918, + "nauc_mrr_at_5_diff1": 0.23994451876927358, + "nauc_mrr_at_5_max": 0.2844686571257038, + "nauc_mrr_at_5_std": 0.12830220063287182, + "nauc_ndcg_at_1000_diff1": 0.14769155739514844, + "nauc_ndcg_at_1000_max": 0.2671784102843396, + "nauc_ndcg_at_1000_std": 0.10696168244243037, + "nauc_ndcg_at_100_diff1": 0.14801917483141677, + "nauc_ndcg_at_100_max": 0.2685943376522826, + "nauc_ndcg_at_100_std": 0.10791340900659142, + "nauc_ndcg_at_10_diff1": 0.1345051372587822, + "nauc_ndcg_at_10_max": 0.29005084669769515, + "nauc_ndcg_at_10_std": 0.07896082378630781, + "nauc_ndcg_at_1_diff1": 0.2258659822062612, + "nauc_ndcg_at_1_max": 0.2584942266018811, + "nauc_ndcg_at_1_std": 0.12314633857036736, + "nauc_ndcg_at_20_diff1": 0.1418083456864262, + "nauc_ndcg_at_20_max": 0.27384089732365224, + "nauc_ndcg_at_20_std": 0.1062505422762453, + "nauc_ndcg_at_3_diff1": 0.12715949164982834, + "nauc_ndcg_at_3_max": 0.22375142370924314, + "nauc_ndcg_at_3_std": 0.08082596411674717, + "nauc_ndcg_at_5_diff1": 0.18209856522078008, + "nauc_ndcg_at_5_max": 0.22750575840697287, + "nauc_ndcg_at_5_std": 0.04579318883289008, + "nauc_precision_at_1000_diff1": -0.12724036600345565, + "nauc_precision_at_1000_max": 0.14531482215685915, + "nauc_precision_at_1000_std": 0.20154485697630725, + "nauc_precision_at_100_diff1": -0.12640971832132333, + "nauc_precision_at_100_max": 0.14690463912326707, + "nauc_precision_at_100_std": 0.20262040598961736, + "nauc_precision_at_10_diff1": -0.14123426873895367, + "nauc_precision_at_10_max": 0.1886604907286534, + "nauc_precision_at_10_std": 0.19860785067518957, + "nauc_precision_at_1_diff1": 0.2258659822062612, + "nauc_precision_at_1_max": 0.2584942266018811, + "nauc_precision_at_1_std": 0.12314633857036736, + "nauc_precision_at_20_diff1": -0.13264697639378065, + "nauc_precision_at_20_max": 0.15753878545465452, + "nauc_precision_at_20_std": 0.20676187031467108, + "nauc_precision_at_3_diff1": -0.04452222765963082, + "nauc_precision_at_3_max": 0.23137815192532826, + "nauc_precision_at_3_std": 0.16010343768927882, + "nauc_precision_at_5_diff1": -0.0737741692006809, + "nauc_precision_at_5_max": 0.20756908304274746, + "nauc_precision_at_5_std": 0.18471296534154105, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": 0.1709319490351383, + "nauc_recall_at_100_max": 0.8471351008532184, + "nauc_recall_at_100_std": 0.46714515859322125, + "nauc_recall_at_10_diff1": 0.025262835945452866, + "nauc_recall_at_10_max": 0.34450763325560557, + "nauc_recall_at_10_std": -0.06212323562311667, + "nauc_recall_at_1_diff1": 0.2776684811776689, + "nauc_recall_at_1_max": 0.02462544747250985, + "nauc_recall_at_1_std": -0.13602913083365026, + "nauc_recall_at_20_diff1": -0.08617349532033955, + "nauc_recall_at_20_max": 0.4389744693933162, + "nauc_recall_at_20_std": 0.07762718661771986, + "nauc_recall_at_3_diff1": 0.17826121205791354, + "nauc_recall_at_3_max": 0.03707294936568367, + "nauc_recall_at_3_std": -0.09965646197786593, + "nauc_recall_at_5_diff1": 0.21216940467879947, + "nauc_recall_at_5_max": 0.06867350110815043, + "nauc_recall_at_5_std": -0.09714400236827504, + "ndcg_at_1": 0.74228, + "ndcg_at_10": 0.82005, + "ndcg_at_100": 0.84305, + "ndcg_at_1000": 0.84333, + "ndcg_at_20": 0.83972, + "ndcg_at_3": 0.72298, + "ndcg_at_5": 0.73977, + "precision_at_1": 0.74228, + "precision_at_10": 0.43102, + "precision_at_100": 0.04608, + "precision_at_1000": 0.00461, + "precision_at_20": 0.22747, + "precision_at_3": 0.6322, + "precision_at_5": 0.56111, + "recall_at_1": 0.21656, + "recall_at_10": 0.94477, + "recall_at_100": 0.99829, + "recall_at_1000": 1.0, + "recall_at_20": 0.98912, + "recall_at_3": 0.48893, + "recall_at_5": 0.67207 + } + ] + }, + "task_name": "MIRACLRetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MLSUMClusteringS2S.v2.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MLSUMClusteringS2S.v2.json new file mode 100644 index 0000000..3c328b9 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MLSUMClusteringS2S.v2.json @@ -0,0 +1,57 @@ +{ + "dataset_revision": "b5d54f8f3b61ae17845046286940f03c6bc79bc7", + "evaluation_time": 79.78134441375732, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.48075917245775485, + "v_measure": 0.48075917245775485, + "v_measures": { + "Level 0": [ + 0.4907475800260038, + 0.47508246275757887, + 0.49683097179911, + 0.45862901883600393, + 0.47942058517699243, + 0.474710018111693, + 0.47710333779749764, + 0.4816307126028423, + 0.4731262094551825, + 0.5003108280146442 + ] + } + } + ], + "validation": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.4575731927584702, + "v_measure": 0.4575731927584702, + "v_measures": { + "Level 0": [ + 0.45799728572413567, + 0.47413710773799006, + 0.44206566666107416, + 0.44919743411311774, + 0.4569745933708344, + 0.44784031107497474, + 0.4707272476759318, + 0.46363497875163445, + 0.48117879316649725, + 0.43197850930851067 + ] + } + } + ] + }, + "task_name": "MLSUMClusteringS2S.v2" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MTOPDomainClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MTOPDomainClassification.json new file mode 100644 index 0000000..c3f6f8a --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MTOPDomainClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "evaluation_time": 6.270148038864136, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.8998999332888593, + "f1": 0.8956014159753185, + "f1_weighted": 0.8993901494831877, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8998999332888593, + "scores_per_experiment": [ + { + "accuracy": 0.9219479653102068, + "f1": 0.9159690057447801, + "f1_weighted": 0.9214934916576839 + }, + { + "accuracy": 0.8812541694462975, + "f1": 0.8754704542503858, + "f1_weighted": 0.8809667242896289 + }, + { + "accuracy": 0.9049366244162775, + "f1": 0.9050361430672885, + "f1_weighted": 0.9045353182901911 + }, + { + "accuracy": 0.8919279519679787, + "f1": 0.8903385401342722, + "f1_weighted": 0.8913534805759208 + }, + { + "accuracy": 0.895930620413609, + "f1": 0.8908654011970542, + "f1_weighted": 0.8960376590400219 + }, + { + "accuracy": 0.9219479653102068, + "f1": 0.9170138182508049, + "f1_weighted": 0.9217822066533904 + }, + { + "accuracy": 0.8885923949299533, + "f1": 0.8843078783284795, + "f1_weighted": 0.8876923056111465 + }, + { + "accuracy": 0.8932621747831888, + "f1": 0.8885954413426087, + "f1_weighted": 0.8933426395694206 + }, + { + "accuracy": 0.9022681787858572, + "f1": 0.8991976263732283, + "f1_weighted": 0.9016153747120387 + }, + { + "accuracy": 0.8969312875250167, + "f1": 0.8892198510642833, + "f1_weighted": 0.8950822944324336 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.8943025540275048, + "f1": 0.8943182962481947, + "f1_weighted": 0.8936316631476989, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8943025540275048, + "scores_per_experiment": [ + { + "accuracy": 0.91683038637852, + "f1": 0.916931623008614, + "f1_weighted": 0.9162553963333272 + }, + { + "accuracy": 0.8650949574328749, + "f1": 0.8643820690704818, + "f1_weighted": 0.8638908236027467 + }, + { + "accuracy": 0.899803536345776, + "f1": 0.9026665712579536, + "f1_weighted": 0.8999274422732103 + }, + { + "accuracy": 0.8749181401440733, + "f1": 0.8764843459470889, + "f1_weighted": 0.87396792864471 + }, + { + "accuracy": 0.885396201702685, + "f1": 0.8851557668026891, + "f1_weighted": 0.8853650486723169 + }, + { + "accuracy": 0.9292730844793713, + "f1": 0.9288826482897767, + "f1_weighted": 0.9288106940635975 + }, + { + "accuracy": 0.8867059593975115, + "f1": 0.8855828394270963, + "f1_weighted": 0.8853048565816104 + }, + { + "accuracy": 0.9017681728880157, + "f1": 0.8998041275080194, + "f1_weighted": 0.9015241708238587 + }, + { + "accuracy": 0.8867059593975115, + "f1": 0.8893700656726184, + "f1_weighted": 0.8856814890153847 + }, + { + "accuracy": 0.8965291421087099, + "f1": 0.8939229054976096, + "f1_weighted": 0.8955887814662269 + } + ] + } + ] + }, + "task_name": "MTOPDomainClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MTOPIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MTOPIntentClassification.json new file mode 100644 index 0000000..9ab4cf6 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MTOPIntentClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "evaluation_time": 19.99280548095703, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.6673782521681121, + "f1": 0.4282498959016361, + "f1_weighted": 0.704250775214722, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6673782521681121, + "scores_per_experiment": [ + { + "accuracy": 0.6647765176784523, + "f1": 0.4146055186730349, + "f1_weighted": 0.6984131322505579 + }, + { + "accuracy": 0.69079386257505, + "f1": 0.43953091147026513, + "f1_weighted": 0.7327953693084361 + }, + { + "accuracy": 0.7024683122081388, + "f1": 0.4411675709173595, + "f1_weighted": 0.7307365160345759 + }, + { + "accuracy": 0.6501000667111407, + "f1": 0.4305955652036859, + "f1_weighted": 0.685570226438838 + }, + { + "accuracy": 0.6641094062708472, + "f1": 0.42274769477044605, + "f1_weighted": 0.697283615992139 + }, + { + "accuracy": 0.6794529686457639, + "f1": 0.42214957934598013, + "f1_weighted": 0.7174679123907838 + }, + { + "accuracy": 0.6637758505670447, + "f1": 0.4309662718837306, + "f1_weighted": 0.7009665397322848 + }, + { + "accuracy": 0.6477651767845231, + "f1": 0.43871698248534147, + "f1_weighted": 0.6930762724348851 + }, + { + "accuracy": 0.6454302868579053, + "f1": 0.42698810043249863, + "f1_weighted": 0.6836218015362063 + }, + { + "accuracy": 0.6651100733822548, + "f1": 0.41503076383401866, + "f1_weighted": 0.7025763660285136 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.6707924034053699, + "f1": 0.4333134780687608, + "f1_weighted": 0.7010576278211016, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6707924034053699, + "scores_per_experiment": [ + { + "accuracy": 0.6633922724296005, + "f1": 0.4405047476139423, + "f1_weighted": 0.6922672298826147 + }, + { + "accuracy": 0.6941715782580222, + "f1": 0.43428537418062174, + "f1_weighted": 0.7328347607517478 + }, + { + "accuracy": 0.704649639816634, + "f1": 0.44515282127247974, + "f1_weighted": 0.729114602085298 + }, + { + "accuracy": 0.639161755075311, + "f1": 0.4177467115034224, + "f1_weighted": 0.6664739123513768 + }, + { + "accuracy": 0.6699410609037328, + "f1": 0.41165644374695565, + "f1_weighted": 0.6928848873535991 + }, + { + "accuracy": 0.6797642436149313, + "f1": 0.4308827072995052, + "f1_weighted": 0.7139975906636254 + }, + { + "accuracy": 0.6719056974459725, + "f1": 0.4344842774085819, + "f1_weighted": 0.6972734196022903 + }, + { + "accuracy": 0.6673215455140799, + "f1": 0.4594445096434361, + "f1_weighted": 0.706932002989614 + }, + { + "accuracy": 0.633267845448592, + "f1": 0.4193713481286648, + "f1_weighted": 0.668246185532327 + }, + { + "accuracy": 0.6843483955468238, + "f1": 0.43960583988999846, + "f1_weighted": 0.7105516869985236 + } + ] + } + ] + }, + "task_name": "MTOPIntentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MassiveIntentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MassiveIntentClassification.json new file mode 100644 index 0000000..2708f63 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MassiveIntentClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "4672e20407010da34463acc759c162ca9734bca6", + "evaluation_time": 19.76087713241577, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.6470073974445192, + "f1": 0.6310214609644293, + "f1_weighted": 0.6436837513584948, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6470073974445192, + "scores_per_experiment": [ + { + "accuracy": 0.6741761936785474, + "f1": 0.6512228307084073, + "f1_weighted": 0.6684080825723471 + }, + { + "accuracy": 0.6546738399462004, + "f1": 0.6376361286183498, + "f1_weighted": 0.6566169114439243 + }, + { + "accuracy": 0.632481506388702, + "f1": 0.624393539224367, + "f1_weighted": 0.6321266745990587 + }, + { + "accuracy": 0.6526563550773369, + "f1": 0.6345272333124827, + "f1_weighted": 0.6574592419993966 + }, + { + "accuracy": 0.6422326832548756, + "f1": 0.6195091613673828, + "f1_weighted": 0.6343266017638244 + }, + { + "accuracy": 0.6375252185608608, + "f1": 0.6338831279478526, + "f1_weighted": 0.6347787222429591 + }, + { + "accuracy": 0.6489576328177539, + "f1": 0.6345956030306216, + "f1_weighted": 0.6426186783790114 + }, + { + "accuracy": 0.6344989912575656, + "f1": 0.6089359368087541, + "f1_weighted": 0.6331084222537378 + }, + { + "accuracy": 0.6267652992602556, + "f1": 0.6229109823141604, + "f1_weighted": 0.6115157007846054 + }, + { + "accuracy": 0.6661062542030934, + "f1": 0.6426000663119137, + "f1_weighted": 0.6658784775460829 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.6552877520905066, + "f1": 0.6202265822581658, + "f1_weighted": 0.6505936208453924, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6552877520905066, + "scores_per_experiment": [ + { + "accuracy": 0.675848499754058, + "f1": 0.6373415123186894, + "f1_weighted": 0.6683655353684971 + }, + { + "accuracy": 0.6591244466305952, + "f1": 0.6259666827677907, + "f1_weighted": 0.6538849368003509 + }, + { + "accuracy": 0.6389572060993606, + "f1": 0.6031764905555325, + "f1_weighted": 0.6382117581459715 + }, + { + "accuracy": 0.6497786522380719, + "f1": 0.6044528580865534, + "f1_weighted": 0.6502507419541558 + }, + { + "accuracy": 0.6586325627151992, + "f1": 0.6103464952553869, + "f1_weighted": 0.6518200747569041 + }, + { + "accuracy": 0.6625676340383669, + "f1": 0.630959770801855, + "f1_weighted": 0.6596876064340712 + }, + { + "accuracy": 0.6483030004918839, + "f1": 0.622416652567368, + "f1_weighted": 0.6432411370805438 + }, + { + "accuracy": 0.6522380718150517, + "f1": 0.6172071576800932, + "f1_weighted": 0.6483792375223332 + }, + { + "accuracy": 0.6428922774225283, + "f1": 0.6309803632326331, + "f1_weighted": 0.6275587876194969 + }, + { + "accuracy": 0.6645351696999509, + "f1": 0.6194178393157558, + "f1_weighted": 0.6645363927715999 + } + ] + } + ] + }, + "task_name": "MassiveIntentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MassiveScenarioClassification.json new file mode 100644 index 0000000..3bf7596 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MassiveScenarioClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "fad2c6e8459f9e1c45d9315f4953d921437d70f8", + "evaluation_time": 10.508144617080688, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.689340954942838, + "f1": 0.6814415958731015, + "f1_weighted": 0.684599238297634, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.689340954942838, + "scores_per_experiment": [ + { + "accuracy": 0.7135171486213854, + "f1": 0.707396787491918, + "f1_weighted": 0.7112134622503993 + }, + { + "accuracy": 0.7017484868863484, + "f1": 0.6991731577400936, + "f1_weighted": 0.698909749707669 + }, + { + "accuracy": 0.703429724277068, + "f1": 0.6827795148251238, + "f1_weighted": 0.6960022150155442 + }, + { + "accuracy": 0.660390047074647, + "f1": 0.6539983013603253, + "f1_weighted": 0.6590063992411666 + }, + { + "accuracy": 0.6859448554135844, + "f1": 0.6750047994378384, + "f1_weighted": 0.674856195824311 + }, + { + "accuracy": 0.6691324815063887, + "f1": 0.6544214909997551, + "f1_weighted": 0.6530695422655092 + }, + { + "accuracy": 0.6882985877605918, + "f1": 0.6789080194451139, + "f1_weighted": 0.6841195755202121 + }, + { + "accuracy": 0.6849361129791527, + "f1": 0.6866799775271314, + "f1_weighted": 0.6829274488516678 + }, + { + "accuracy": 0.7074646940147948, + "f1": 0.7048816176179851, + "f1_weighted": 0.7065085719395983 + }, + { + "accuracy": 0.6785474108944183, + "f1": 0.6711722922857306, + "f1_weighted": 0.6793792223602632 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.6898671913428431, + "f1": 0.6784455770714182, + "f1_weighted": 0.6854763370931158, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6898671913428431, + "scores_per_experiment": [ + { + "accuracy": 0.7127397934087555, + "f1": 0.7033831106211523, + "f1_weighted": 0.7105329985851567 + }, + { + "accuracy": 0.6999508116084604, + "f1": 0.6946142599469556, + "f1_weighted": 0.7003488359657578 + }, + { + "accuracy": 0.6989670437776685, + "f1": 0.6855044507803725, + "f1_weighted": 0.6940612897526253 + }, + { + "accuracy": 0.6414166256763404, + "f1": 0.6287791514369785, + "f1_weighted": 0.6363481818276955 + }, + { + "accuracy": 0.690113133300541, + "f1": 0.6737844237263116, + "f1_weighted": 0.6812603911940652 + }, + { + "accuracy": 0.6768322675848499, + "f1": 0.6618495853575518, + "f1_weighted": 0.6642768371454628 + }, + { + "accuracy": 0.6876537137235612, + "f1": 0.6713162408667717, + "f1_weighted": 0.681612763528513 + }, + { + "accuracy": 0.691096901131333, + "f1": 0.6884716079085146, + "f1_weighted": 0.6887484320545538 + }, + { + "accuracy": 0.7092966060009838, + "f1": 0.705133496830295, + "f1_weighted": 0.7067786987009043 + }, + { + "accuracy": 0.690605017215937, + "f1": 0.6716194432392777, + "f1_weighted": 0.6907949421764245 + } + ] + } + ] + }, + "task_name": "MassiveScenarioClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MintakaRetrieval.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MintakaRetrieval.json new file mode 100644 index 0000000..fbdfec1 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MintakaRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "efa78cc2f74bbcd21eff2261f9e13aebe40b814e", + "evaluation_time": 7.05264139175415, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.2836, + "map_at_1": 0.15759, + "map_at_10": 0.2395, + "map_at_100": 0.24909, + "map_at_1000": 0.25023, + "map_at_20": 0.24509, + "map_at_3": 0.21528, + "map_at_5": 0.22965, + "mrr_at_1": 0.15759075907590758, + "mrr_at_10": 0.23949722353187702, + "mrr_at_100": 0.24908622523828003, + "mrr_at_1000": 0.2502319149307796, + "mrr_at_20": 0.24509243420996593, + "mrr_at_3": 0.2152777777777775, + "mrr_at_5": 0.22965484048404824, + "nauc_map_at_1000_diff1": 0.2806210376472332, + "nauc_map_at_1000_max": 0.3174306923701855, + "nauc_map_at_1000_std": 0.1706411574885985, + "nauc_map_at_100_diff1": 0.28025784064834797, + "nauc_map_at_100_max": 0.3174188148243739, + "nauc_map_at_100_std": 0.1710298026614719, + "nauc_map_at_10_diff1": 0.27972316231989286, + "nauc_map_at_10_max": 0.3157232326121595, + "nauc_map_at_10_std": 0.1680506426386125, + "nauc_map_at_1_diff1": 0.3574576867174328, + "nauc_map_at_1_max": 0.32803431303885816, + "nauc_map_at_1_std": 0.08966084129264282, + "nauc_map_at_20_diff1": 0.27929287579638706, + "nauc_map_at_20_max": 0.31683000891048013, + "nauc_map_at_20_std": 0.1700909955097024, + "nauc_map_at_3_diff1": 0.2974469826861232, + "nauc_map_at_3_max": 0.318484096985998, + "nauc_map_at_3_std": 0.14924317136481072, + "nauc_map_at_5_diff1": 0.28554849796682286, + "nauc_map_at_5_max": 0.31653047860796735, + "nauc_map_at_5_std": 0.1609951936275703, + "nauc_mrr_at_1000_diff1": 0.2806210376472332, + "nauc_mrr_at_1000_max": 0.3174306923701855, + "nauc_mrr_at_1000_std": 0.1706411574885985, + "nauc_mrr_at_100_diff1": 0.28025784064834797, + "nauc_mrr_at_100_max": 0.3174188148243739, + "nauc_mrr_at_100_std": 0.1710298026614719, + "nauc_mrr_at_10_diff1": 0.27972316231989286, + "nauc_mrr_at_10_max": 0.3157232326121595, + "nauc_mrr_at_10_std": 0.1680506426386125, + "nauc_mrr_at_1_diff1": 0.3574576867174328, + "nauc_mrr_at_1_max": 0.32803431303885816, + "nauc_mrr_at_1_std": 0.08966084129264282, + "nauc_mrr_at_20_diff1": 0.27929287579638706, + "nauc_mrr_at_20_max": 0.31683000891048013, + "nauc_mrr_at_20_std": 0.1700909955097024, + "nauc_mrr_at_3_diff1": 0.2974469826861232, + "nauc_mrr_at_3_max": 0.318484096985998, + "nauc_mrr_at_3_std": 0.14924317136481072, + "nauc_mrr_at_5_diff1": 0.28554849796682286, + "nauc_mrr_at_5_max": 0.31653047860796735, + "nauc_mrr_at_5_std": 0.1609951936275703, + "nauc_ndcg_at_1000_diff1": 0.2623118931788109, + "nauc_ndcg_at_1000_max": 0.3166414453263883, + "nauc_ndcg_at_1000_std": 0.1999737837287591, + "nauc_ndcg_at_100_diff1": 0.25372573862982645, + "nauc_ndcg_at_100_max": 0.317144129156677, + "nauc_ndcg_at_100_std": 0.21303978352722364, + "nauc_ndcg_at_10_diff1": 0.24927780207686157, + "nauc_ndcg_at_10_max": 0.3092045193499185, + "nauc_ndcg_at_10_std": 0.19987251933520317, + "nauc_ndcg_at_1_diff1": 0.3574576867174328, + "nauc_ndcg_at_1_max": 0.32803431303885816, + "nauc_ndcg_at_1_std": 0.08966084129264282, + "nauc_ndcg_at_20_diff1": 0.24808183603204273, + "nauc_ndcg_at_20_max": 0.3127154141390978, + "nauc_ndcg_at_20_std": 0.2071682526367811, + "nauc_ndcg_at_3_diff1": 0.2816245744839948, + "nauc_ndcg_at_3_max": 0.31406940069509504, + "nauc_ndcg_at_3_std": 0.1650829721911416, + "nauc_ndcg_at_5_diff1": 0.26196199437271406, + "nauc_ndcg_at_5_max": 0.31081712184948707, + "nauc_ndcg_at_5_std": 0.1847691236860679, + "nauc_precision_at_1000_diff1": 0.18403348053613416, + "nauc_precision_at_1000_max": 0.3572955192310904, + "nauc_precision_at_1000_std": 0.5040458070884974, + "nauc_precision_at_100_diff1": 0.177788191242803, + "nauc_precision_at_100_max": 0.3285446967447927, + "nauc_precision_at_100_std": 0.3643519273418131, + "nauc_precision_at_10_diff1": 0.17226284118296495, + "nauc_precision_at_10_max": 0.29273259109007543, + "nauc_precision_at_10_std": 0.28071988600661435, + "nauc_precision_at_1_diff1": 0.3574576867174328, + "nauc_precision_at_1_max": 0.32803431303885816, + "nauc_precision_at_1_std": 0.08966084129264282, + "nauc_precision_at_20_diff1": 0.16673939110190678, + "nauc_precision_at_20_max": 0.30392178980959955, + "nauc_precision_at_20_std": 0.30841522152222134, + "nauc_precision_at_3_diff1": 0.24299635901158625, + "nauc_precision_at_3_max": 0.30278291736476437, + "nauc_precision_at_3_std": 0.20378860057436296, + "nauc_precision_at_5_diff1": 0.2040566837676211, + "nauc_precision_at_5_max": 0.29643258689736457, + "nauc_precision_at_5_std": 0.24331680809006553, + "nauc_recall_at_1000_diff1": 0.1840334805361303, + "nauc_recall_at_1000_max": 0.35729551923108493, + "nauc_recall_at_1000_std": 0.5040458070884997, + "nauc_recall_at_100_diff1": 0.17778819124280318, + "nauc_recall_at_100_max": 0.3285446967447927, + "nauc_recall_at_100_std": 0.36435192734181315, + "nauc_recall_at_10_diff1": 0.17226284118296473, + "nauc_recall_at_10_max": 0.2927325910900757, + "nauc_recall_at_10_std": 0.28071988600661457, + "nauc_recall_at_1_diff1": 0.3574576867174328, + "nauc_recall_at_1_max": 0.32803431303885816, + "nauc_recall_at_1_std": 0.08966084129264282, + "nauc_recall_at_20_diff1": 0.16673939110190691, + "nauc_recall_at_20_max": 0.30392178980959944, + "nauc_recall_at_20_std": 0.3084152215222215, + "nauc_recall_at_3_diff1": 0.24299635901158634, + "nauc_recall_at_3_max": 0.3027829173647644, + "nauc_recall_at_3_std": 0.20378860057436296, + "nauc_recall_at_5_diff1": 0.2040566837676213, + "nauc_recall_at_5_max": 0.2964325868973647, + "nauc_recall_at_5_std": 0.2433168080900657, + "ndcg_at_1": 0.15759, + "ndcg_at_10": 0.2836, + "ndcg_at_100": 0.33409, + "ndcg_at_1000": 0.36971, + "ndcg_at_20": 0.30388, + "ndcg_at_3": 0.23385, + "ndcg_at_5": 0.25981, + "precision_at_1": 0.15759, + "precision_at_10": 0.04241, + "precision_at_100": 0.0067, + "precision_at_1000": 0.00096, + "precision_at_20": 0.02521, + "precision_at_3": 0.09585, + "precision_at_5": 0.07013, + "recall_at_1": 0.15759, + "recall_at_10": 0.42409, + "recall_at_100": 0.66955, + "recall_at_1000": 0.96122, + "recall_at_20": 0.50413, + "recall_at_3": 0.28754, + "recall_at_5": 0.35066 + } + ] + }, + "task_name": "MintakaRetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MultiEURLEXMultilabelClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MultiEURLEXMultilabelClassification.json new file mode 100644 index 0000000..f5cfd58 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MultiEURLEXMultilabelClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "2aea5a6dc8fdcfeca41d0fb963c0a338930bde5c", + "evaluation_time": 97.49976062774658, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.05226000000000001, + "f1": 0.34776591670396934, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "lrap": 0.4867923297619023, + "main_score": 0.05226000000000001, + "scores_per_experiment": [ + { + "accuracy": 0.0508, + "f1": 0.3515592980298273, + "lrap": 0.482634095238089 + }, + { + "accuracy": 0.0384, + "f1": 0.3342647418714472, + "lrap": 0.4764395634920609 + }, + { + "accuracy": 0.0304, + "f1": 0.3333561591095359, + "lrap": 0.46384270634920033 + }, + { + "accuracy": 0.031, + "f1": 0.3596151728257786, + "lrap": 0.4852323915343918 + }, + { + "accuracy": 0.054, + "f1": 0.33368864988206265, + "lrap": 0.4787818928571386 + }, + { + "accuracy": 0.0296, + "f1": 0.2899263926598507, + "lrap": 0.4484587619047552 + }, + { + "accuracy": 0.0414, + "f1": 0.39769741410668413, + "lrap": 0.5033074206349228 + }, + { + "accuracy": 0.0596, + "f1": 0.3661165974403023, + "lrap": 0.5017065899470858 + }, + { + "accuracy": 0.1392, + "f1": 0.3769839918111925, + "lrap": 0.5246692063492061 + }, + { + "accuracy": 0.0482, + "f1": 0.334450749303012, + "lrap": 0.502850669312172 + } + ] + } + ] + }, + "task_name": "MultiEURLEXMultilabelClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MultiHateClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MultiHateClassification.json new file mode 100644 index 0000000..c34715d --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/MultiHateClassification.json @@ -0,0 +1,95 @@ +{ + "dataset_revision": "8f95949846bb9e33c6aaf730ccfdb8fe6bcfb7a9", + "evaluation_time": 2.2212448120117188, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.639, + "ap": 0.3592180685323137, + "ap_weighted": 0.3592180685323137, + "f1": 0.5930714642747119, + "f1_weighted": 0.6477640594222518, + "hf_subset": "spa", + "languages": [ + "spa-Latn" + ], + "main_score": 0.639, + "scores_per_experiment": [ + { + "accuracy": 0.631, + "ap": 0.3491654403729876, + "ap_weighted": 0.3491654403729876, + "f1": 0.5849787034464764, + "f1_weighted": 0.6413651568814244 + }, + { + "accuracy": 0.666, + "ap": 0.3675561140408701, + "ap_weighted": 0.3675561140408701, + "f1": 0.6110055158020274, + "f1_weighted": 0.67068038163387 + }, + { + "accuracy": 0.64, + "ap": 0.36961338167435726, + "ap_weighted": 0.36961338167435726, + "f1": 0.6059389380492226, + "f1_weighted": 0.6532073505523422 + }, + { + "accuracy": 0.654, + "ap": 0.3789944316175472, + "ap_weighted": 0.3789944316175472, + "f1": 0.6182549174503842, + "f1_weighted": 0.6659150275165386 + }, + { + "accuracy": 0.581, + "ap": 0.3290027958993476, + "ap_weighted": 0.3290027958993476, + "f1": 0.5483156702122269, + "f1_weighted": 0.5978889287750946 + }, + { + "accuracy": 0.678, + "ap": 0.3717159495021337, + "ap_weighted": 0.3717159495021337, + "f1": 0.6166666666666667, + "f1_weighted": 0.6792266666666666 + }, + { + "accuracy": 0.643, + "ap": 0.34827995159338443, + "ap_weighted": 0.34827995159338443, + "f1": 0.5867285454812761, + "f1_weighted": 0.6489473895019789 + }, + { + "accuracy": 0.625, + "ap": 0.3936575208985076, + "ap_weighted": 0.3936575208985076, + "f1": 0.6128716561144083, + "f1_weighted": 0.6408285165964502 + }, + { + "accuracy": 0.604, + "ap": 0.3086126126126126, + "ap_weighted": 0.3086126126126126, + "f1": 0.5267595938736879, + "f1_weighted": 0.6047647564963001 + }, + { + "accuracy": 0.668, + "ap": 0.37558248711138886, + "ap_weighted": 0.37558248711138886, + "f1": 0.6191944356507416, + "f1_weighted": 0.6748164196018517 + } + ] + } + ] + }, + "task_name": "MultiHateClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/PawsX.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/PawsX.json new file mode 100644 index 0000000..d918e17 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/PawsX.json @@ -0,0 +1,127 @@ +{ + "dataset_revision": "8a04d940a42cd40658986fdd8e3da561533a3646", + "evaluation_time": 7.871694087982178, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine": { + "accuracy": 0.5865, + "accuracy_threshold": 0.9961481690406799, + "ap": 0.5636790445966594, + "f1": 0.6246556473829201, + "f1_threshold": 0.8752727508544922, + "precision": 0.4541812719078618, + "recall": 1.0 + }, + "dot": { + "accuracy": 0.5865, + "accuracy_threshold": 0.9961481094360352, + "ap": 0.563678153658613, + "f1": 0.6246556473829201, + "f1_threshold": 0.8752727508544922, + "precision": 0.4541812719078618, + "recall": 1.0 + }, + "euclidean": { + "accuracy": 0.5865, + "accuracy_threshold": 0.08777029812335968, + "ap": 0.563678153658613, + "f1": 0.6246556473829201, + "f1_threshold": 0.49925684928894043, + "precision": 0.4541812719078618, + "recall": 1.0 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5639685167829116, + "manhattan": { + "accuracy": 0.5865, + "accuracy_threshold": 2.234175205230713, + "ap": 0.5639685167829116, + "f1": 0.6246556473829201, + "f1_threshold": 12.686552047729492, + "precision": 0.4541812719078618, + "recall": 1.0 + }, + "max": { + "accuracy": 0.5865, + "ap": 0.5639685167829116, + "f1": 0.6246556473829201 + }, + "similarity": { + "accuracy": 0.5865, + "accuracy_threshold": 0.9961482882499695, + "ap": 0.5636835769082358, + "f1": 0.6246556473829201, + "f1_threshold": 0.8752727508544922, + "precision": 0.4541812719078618, + "recall": 1.0 + } + } + ], + "validation": [ + { + "cosine": { + "accuracy": 0.619, + "accuracy_threshold": 0.9945603013038635, + "ap": 0.5441059800807568, + "f1": 0.6033559443056051, + "f1_threshold": 0.9101866483688354, + "precision": 0.43244626407369496, + "recall": 0.9976387249114522 + }, + "dot": { + "accuracy": 0.619, + "accuracy_threshold": 0.9945603013038635, + "ap": 0.5437686550681073, + "f1": 0.6033559443056051, + "f1_threshold": 0.9101867079734802, + "precision": 0.43244626407369496, + "recall": 0.9976387249114522 + }, + "euclidean": { + "accuracy": 0.619, + "accuracy_threshold": 0.10430458933115005, + "ap": 0.544105980080757, + "f1": 0.6033559443056051, + "f1_threshold": 0.4238162934780121, + "precision": 0.43244626407369496, + "recall": 0.9976387249114522 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5445599041471212, + "manhattan": { + "accuracy": 0.62, + "accuracy_threshold": 2.635448932647705, + "ap": 0.5443078672097414, + "f1": 0.6033559443056051, + "f1_threshold": 10.812749862670898, + "precision": 0.43244626407369496, + "recall": 0.9976387249114522 + }, + "max": { + "accuracy": 0.62, + "ap": 0.5445599041471212, + "f1": 0.6033559443056051 + }, + "similarity": { + "accuracy": 0.619, + "accuracy_threshold": 0.9945603609085083, + "ap": 0.5445599041471212, + "f1": 0.6033559443056051, + "f1_threshold": 0.910186767578125, + "precision": 0.43244626407369496, + "recall": 0.9976387249114522 + } + } + ] + }, + "task_name": "PawsX" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/PublicHealthQA.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/PublicHealthQA.json new file mode 100644 index 0000000..67fec5f --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/PublicHealthQA.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "main", + "evaluation_time": 2.769134283065796, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.80811, + "map_at_1": 0.64815, + "map_at_10": 0.76132, + "map_at_100": 0.76463, + "map_at_1000": 0.76463, + "map_at_20": 0.76463, + "map_at_3": 0.7428, + "map_at_5": 0.75638, + "mrr_at_1": 0.6481481481481481, + "mrr_at_10": 0.7613242210464433, + "mrr_at_100": 0.76462508910121, + "mrr_at_1000": 0.76462508910121, + "mrr_at_20": 0.76462508910121, + "mrr_at_3": 0.742798353909465, + "mrr_at_5": 0.7563786008230452, + "nauc_map_at_1000_diff1": 0.772840349093797, + "nauc_map_at_1000_max": 0.4787301617344485, + "nauc_map_at_1000_std": -0.14127558303114432, + "nauc_map_at_100_diff1": 0.772840349093797, + "nauc_map_at_100_max": 0.4787301617344485, + "nauc_map_at_100_std": -0.14127558303114432, + "nauc_map_at_10_diff1": 0.7737500191347674, + "nauc_map_at_10_max": 0.48244052308368274, + "nauc_map_at_10_std": -0.1424842086527368, + "nauc_map_at_1_diff1": 0.7860594868890265, + "nauc_map_at_1_max": 0.4578575739024529, + "nauc_map_at_1_std": -0.13305248030016928, + "nauc_map_at_20_diff1": 0.772840349093797, + "nauc_map_at_20_max": 0.4787301617344485, + "nauc_map_at_20_std": -0.14127558303114432, + "nauc_map_at_3_diff1": 0.7613196872597994, + "nauc_map_at_3_max": 0.4763938838866817, + "nauc_map_at_3_std": -0.14976860569300143, + "nauc_map_at_5_diff1": 0.7707584639023404, + "nauc_map_at_5_max": 0.48123118602793447, + "nauc_map_at_5_std": -0.14643527476734586, + "nauc_mrr_at_1000_diff1": 0.772840349093797, + "nauc_mrr_at_1000_max": 0.4787301617344485, + "nauc_mrr_at_1000_std": -0.14127558303114432, + "nauc_mrr_at_100_diff1": 0.772840349093797, + "nauc_mrr_at_100_max": 0.4787301617344485, + "nauc_mrr_at_100_std": -0.14127558303114432, + "nauc_mrr_at_10_diff1": 0.7737500191347674, + "nauc_mrr_at_10_max": 0.48244052308368274, + "nauc_mrr_at_10_std": -0.1424842086527368, + "nauc_mrr_at_1_diff1": 0.7860594868890265, + "nauc_mrr_at_1_max": 0.4578575739024529, + "nauc_mrr_at_1_std": -0.13305248030016928, + "nauc_mrr_at_20_diff1": 0.772840349093797, + "nauc_mrr_at_20_max": 0.4787301617344485, + "nauc_mrr_at_20_std": -0.14127558303114432, + "nauc_mrr_at_3_diff1": 0.7613196872597994, + "nauc_mrr_at_3_max": 0.4763938838866817, + "nauc_mrr_at_3_std": -0.14976860569300143, + "nauc_mrr_at_5_diff1": 0.7707584639023404, + "nauc_mrr_at_5_max": 0.48123118602793447, + "nauc_mrr_at_5_std": -0.14643527476734586, + "nauc_ndcg_at_1000_diff1": 0.7720516732537748, + "nauc_ndcg_at_1000_max": 0.4833287716817668, + "nauc_ndcg_at_1000_std": -0.14120035412947254, + "nauc_ndcg_at_100_diff1": 0.7720516732537748, + "nauc_ndcg_at_100_max": 0.4833287716817668, + "nauc_ndcg_at_100_std": -0.14120035412947254, + "nauc_ndcg_at_10_diff1": 0.7748287594226888, + "nauc_ndcg_at_10_max": 0.5002358941418021, + "nauc_ndcg_at_10_std": -0.1454882949880987, + "nauc_ndcg_at_1_diff1": 0.7860594868890265, + "nauc_ndcg_at_1_max": 0.4578575739024529, + "nauc_ndcg_at_1_std": -0.13305248030016928, + "nauc_ndcg_at_20_diff1": 0.7720516732537748, + "nauc_ndcg_at_20_max": 0.4833287716817668, + "nauc_ndcg_at_20_std": -0.14120035412947254, + "nauc_ndcg_at_3_diff1": 0.7505278397370317, + "nauc_ndcg_at_3_max": 0.4877250344501435, + "nauc_ndcg_at_3_std": -0.15632438713243627, + "nauc_ndcg_at_5_diff1": 0.7679631946099407, + "nauc_ndcg_at_5_max": 0.49808298645344784, + "nauc_ndcg_at_5_std": -0.15140400626087966, + "nauc_precision_at_1000_diff1": 1.0, + "nauc_precision_at_1000_max": 1.0, + "nauc_precision_at_1000_std": 1.0, + "nauc_precision_at_100_diff1": NaN, + "nauc_precision_at_100_max": NaN, + "nauc_precision_at_100_std": NaN, + "nauc_precision_at_10_diff1": 0.7994662940961413, + "nauc_precision_at_10_max": 0.7125745394066662, + "nauc_precision_at_10_std": -0.19102122307300537, + "nauc_precision_at_1_diff1": 0.7860594868890265, + "nauc_precision_at_1_max": 0.4578575739024529, + "nauc_precision_at_1_std": -0.13305248030016928, + "nauc_precision_at_20_diff1": 1.0, + "nauc_precision_at_20_max": 1.0, + "nauc_precision_at_20_std": 1.0, + "nauc_precision_at_3_diff1": 0.7003201148344009, + "nauc_precision_at_3_max": 0.5420063924123951, + "nauc_precision_at_3_std": -0.1864695022971129, + "nauc_precision_at_5_diff1": 0.7528642957312349, + "nauc_precision_at_5_max": 0.6191556320574426, + "nauc_precision_at_5_std": -0.1855212821374278, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": NaN, + "nauc_recall_at_100_max": NaN, + "nauc_recall_at_100_std": NaN, + "nauc_recall_at_10_diff1": 0.7994662940961409, + "nauc_recall_at_10_max": 0.7125745394066696, + "nauc_recall_at_10_std": -0.1910212230730084, + "nauc_recall_at_1_diff1": 0.7860594868890265, + "nauc_recall_at_1_max": 0.4578575739024529, + "nauc_recall_at_1_std": -0.13305248030016928, + "nauc_recall_at_20_diff1": NaN, + "nauc_recall_at_20_max": NaN, + "nauc_recall_at_20_std": NaN, + "nauc_recall_at_3_diff1": 0.7003201148344006, + "nauc_recall_at_3_max": 0.5420063924123951, + "nauc_recall_at_3_std": -0.1864695022971131, + "nauc_recall_at_5_diff1": 0.7528642957312367, + "nauc_recall_at_5_max": 0.6191556320574447, + "nauc_recall_at_5_std": -0.18552128213742827, + "ndcg_at_1": 0.64815, + "ndcg_at_10": 0.80811, + "ndcg_at_100": 0.82044, + "ndcg_at_1000": 0.82044, + "ndcg_at_20": 0.82044, + "ndcg_at_3": 0.77248, + "ndcg_at_5": 0.79614, + "precision_at_1": 0.64815, + "precision_at_10": 0.09506, + "precision_at_100": 0.01, + "precision_at_1000": 0.001, + "precision_at_20": 0.05, + "precision_at_3": 0.28601, + "precision_at_5": 0.18272, + "recall_at_1": 0.64815, + "recall_at_10": 0.95062, + "recall_at_100": 1.0, + "recall_at_1000": 1.0, + "recall_at_20": 1.0, + "recall_at_3": 0.85802, + "recall_at_5": 0.91358 + } + ] + }, + "task_name": "PublicHealthQA" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SIB200Classification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SIB200Classification.json new file mode 100644 index 0000000..f15a2a3 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SIB200Classification.json @@ -0,0 +1,201 @@ +{ + "dataset_revision": "a74d7350ea12af010cfb1c21e34f1f81fd2e615b", + "evaluation_time": 10.086787223815918, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.7348039215686275, + "f1": 0.712227205824662, + "f1_weighted": 0.7294732216797213, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7348039215686275, + "scores_per_experiment": [ + { + "accuracy": 0.7156862745098039, + "f1": 0.6984523538129084, + "f1_weighted": 0.7097267690262026 + }, + { + "accuracy": 0.7941176470588235, + "f1": 0.7770417064573595, + "f1_weighted": 0.7859998802750071 + }, + { + "accuracy": 0.6813725490196079, + "f1": 0.665057322075456, + "f1_weighted": 0.6671197499904612 + }, + { + "accuracy": 0.7205882352941176, + "f1": 0.6913918200301108, + "f1_weighted": 0.716534443978333 + }, + { + "accuracy": 0.7696078431372549, + "f1": 0.7703543024670413, + "f1_weighted": 0.7734678578329035 + }, + { + "accuracy": 0.7254901960784313, + "f1": 0.6796139212567187, + "f1_weighted": 0.7182319549869652 + }, + { + "accuracy": 0.75, + "f1": 0.7269135416227227, + "f1_weighted": 0.7443993569537661 + }, + { + "accuracy": 0.7450980392156863, + "f1": 0.7324883006731148, + "f1_weighted": 0.7462226786265057 + }, + { + "accuracy": 0.6862745098039216, + "f1": 0.6461408922850449, + "f1_weighted": 0.6748010548190021 + }, + { + "accuracy": 0.7598039215686274, + "f1": 0.7348178975661439, + "f1_weighted": 0.7582284703080665 + } + ] + } + ], + "train": [ + { + "accuracy": 0.7212553495007132, + "f1": 0.7057348770752447, + "f1_weighted": 0.7195241742657021, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7212553495007132, + "scores_per_experiment": [ + { + "accuracy": 0.7275320970042796, + "f1": 0.7125163149210506, + "f1_weighted": 0.730051882444584 + }, + { + "accuracy": 0.7446504992867332, + "f1": 0.7277512187828472, + "f1_weighted": 0.7410476454487562 + }, + { + "accuracy": 0.6861626248216833, + "f1": 0.6730812198844056, + "f1_weighted": 0.6794287853919108 + }, + { + "accuracy": 0.7360912981455064, + "f1": 0.7077966056487821, + "f1_weighted": 0.7298885063015492 + }, + { + "accuracy": 0.7203994293865906, + "f1": 0.7117832100667647, + "f1_weighted": 0.7201105109862848 + }, + { + "accuracy": 0.7275320970042796, + "f1": 0.7176909552383984, + "f1_weighted": 0.7296453273616821 + }, + { + "accuracy": 0.7332382310984308, + "f1": 0.7119941920066548, + "f1_weighted": 0.7295944801184617 + }, + { + "accuracy": 0.7118402282453637, + "f1": 0.6921227677303475, + "f1_weighted": 0.7124326380011514 + }, + { + "accuracy": 0.6776034236804565, + "f1": 0.6658620507331644, + "f1_weighted": 0.6753344777381557 + }, + { + "accuracy": 0.7475035663338089, + "f1": 0.7367502357400316, + "f1_weighted": 0.747707488864485 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.711111111111111, + "f1": 0.6926112691579703, + "f1_weighted": 0.708145862014237, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.711111111111111, + "scores_per_experiment": [ + { + "accuracy": 0.6868686868686869, + "f1": 0.6784308104512708, + "f1_weighted": 0.6932016207719532 + }, + { + "accuracy": 0.7171717171717171, + "f1": 0.7117985534134602, + "f1_weighted": 0.71962615045619 + }, + { + "accuracy": 0.6262626262626263, + "f1": 0.617257435124826, + "f1_weighted": 0.6114200772248248 + }, + { + "accuracy": 0.7676767676767676, + "f1": 0.7350152920690488, + "f1_weighted": 0.7611632065371068 + }, + { + "accuracy": 0.7676767676767676, + "f1": 0.7510137922445865, + "f1_weighted": 0.7672821711723313 + }, + { + "accuracy": 0.7070707070707071, + "f1": 0.6684179452918924, + "f1_weighted": 0.7083551039427356 + }, + { + "accuracy": 0.6666666666666666, + "f1": 0.635664606062386, + "f1_weighted": 0.6567421568963351 + }, + { + "accuracy": 0.7373737373737373, + "f1": 0.71419872972668, + "f1_weighted": 0.7304437945632499 + }, + { + "accuracy": 0.7070707070707071, + "f1": 0.6992464692693525, + "f1_weighted": 0.7082210590289088 + }, + { + "accuracy": 0.7272727272727273, + "f1": 0.7150690579262007, + "f1_weighted": 0.7250032795487341 + } + ] + } + ] + }, + "task_name": "SIB200Classification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SIB200ClusteringS2S.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SIB200ClusteringS2S.json new file mode 100644 index 0000000..bf1c0ad --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SIB200ClusteringS2S.json @@ -0,0 +1,33 @@ +{ + "dataset_revision": "a74d7350ea12af010cfb1c21e34f1f81fd2e615b", + "evaluation_time": 4.339077472686768, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.3637865013678009, + "v_measure": 0.3637865013678009, + "v_measures": { + "Level 0": [ + 0.3738218775055448, + 0.38397680426498715, + 0.39307659206999285, + 0.34394442950251336, + 0.3622755111397903, + 0.36394907852316705, + 0.3753006992705985, + 0.3902404974414029, + 0.3613022535675151, + 0.28997727039249716 + ] + } + } + ] + }, + "task_name": "SIB200ClusteringS2S" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STS17.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STS17.json new file mode 100644 index 0000000..0580bba --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STS17.json @@ -0,0 +1,54 @@ +{ + "dataset_revision": "faeb762787bd10488a50c8b5be4a3b82e411949c", + "evaluation_time": 1.290160894393921, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine_pearson": 0.8061747411995006, + "cosine_spearman": 0.8092850520982419, + "euclidean_pearson": 0.8062308192548338, + "euclidean_spearman": 0.8092850520982419, + "hf_subset": "es-en", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.8092850520982419, + "manhattan_pearson": 0.8037774447728261, + "manhattan_spearman": 0.8070392026281773, + "pearson": [ + 0.8061747471505586, + 1.8793685307813264e-58 + ], + "spearman": [ + 0.8092850520982419, + 3.1177319058178563e-59 + ] + }, + { + "cosine_pearson": 0.8794572768846557, + "cosine_spearman": 0.8742189027425435, + "euclidean_pearson": 0.8739720038018152, + "euclidean_spearman": 0.8742347717431318, + "hf_subset": "es-es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8742189027425435, + "manhattan_pearson": 0.8742653677655586, + "manhattan_spearman": 0.8764484604450371, + "pearson": [ + 0.8794572775772891, + 6.325029633589189e-82 + ], + "spearman": [ + 0.8742162170908536, + 8.814991516822531e-80 + ] + } + ] + }, + "task_name": "STS17" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STS22.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STS22.json new file mode 100644 index 0000000..0c196fb --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STS22.json @@ -0,0 +1,76 @@ +{ + "dataset_revision": "de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3", + "evaluation_time": 18.019099712371826, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine_pearson": 0.7536285647220418, + "cosine_spearman": 0.7865922376187726, + "euclidean_pearson": 0.7671307474757204, + "euclidean_spearman": 0.7865922376187726, + "hf_subset": "es-en", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.7865922376187726, + "manhattan_pearson": 0.7671535829479618, + "manhattan_spearman": 0.7847941594355752, + "pearson": [ + 0.7536285034988063, + 8.8421762586021e-70 + ], + "spearman": [ + 0.7865922376187726, + 6.756880288557219e-80 + ] + }, + { + "cosine_pearson": 0.6194193480204303, + "cosine_spearman": 0.6823157523696245, + "euclidean_pearson": 0.6467777361413329, + "euclidean_spearman": 0.6823157523696245, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6823157523696245, + "manhattan_pearson": 0.6482067157339559, + "manhattan_spearman": 0.6832827783355593, + "pearson": [ + 0.619419425291192, + 4.146083182990284e-23 + ], + "spearman": [ + 0.6823157523696245, + 1.965380323970148e-29 + ] + }, + { + "cosine_pearson": 0.6871320319309592, + "cosine_spearman": 0.7482729295225335, + "euclidean_pearson": 0.7045590878447581, + "euclidean_spearman": 0.7482729295225335, + "hf_subset": "es-it", + "languages": [ + "spa-Latn", + "ita-Latn" + ], + "main_score": 0.7482729295225335, + "manhattan_pearson": 0.7042416024680677, + "manhattan_spearman": 0.7485161011360759, + "pearson": [ + 0.6871319002811572, + 8.435008393649687e-32 + ], + "spearman": [ + 0.7482729295225335, + 2.2952345874336864e-40 + ] + } + ] + }, + "task_name": "STS22" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STSBenchmarkMultilingualSTS.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STSBenchmarkMultilingualSTS.json new file mode 100644 index 0000000..7d7fdd4 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STSBenchmarkMultilingualSTS.json @@ -0,0 +1,55 @@ +{ + "dataset_revision": "29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c", + "evaluation_time": 3.201539993286133, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "dev": [ + { + "cosine_pearson": 0.8661307214842713, + "cosine_spearman": 0.8646354604520479, + "euclidean_pearson": 0.8551953569289559, + "euclidean_spearman": 0.8646354604520479, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8646354604520479, + "manhattan_pearson": 0.8549865918988055, + "manhattan_spearman": 0.864607266482388, + "pearson": [ + 0.866130724275918, + 0.0 + ], + "spearman": [ + 0.8646354596834828, + 0.0 + ] + } + ], + "test": [ + { + "cosine_pearson": 0.8351880240147809, + "cosine_spearman": 0.8366686002602768, + "euclidean_pearson": 0.8256939660200426, + "euclidean_spearman": 0.8366690488047195, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8366686002602768, + "manhattan_pearson": 0.8244444685885318, + "manhattan_spearman": 0.8351754633747013, + "pearson": [ + 0.8351880203679104, + 0.0 + ], + "spearman": [ + 0.8366687514688184, + 0.0 + ] + } + ] + }, + "task_name": "STSBenchmarkMultilingualSTS" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STSES.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STSES.json new file mode 100644 index 0000000..0b54961 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/STSES.json @@ -0,0 +1,32 @@ +{ + "dataset_revision": "0912bb6c9393c76d62a7c5ee81c4c817ff47c9f4", + "evaluation_time": 0.46225929260253906, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine_pearson": 0.8334138834465503, + "cosine_spearman": 0.7923804835012699, + "euclidean_pearson": 0.8240821458640358, + "euclidean_spearman": 0.7923804835012699, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7923804835012699, + "manhattan_pearson": 0.8256844071102685, + "manhattan_spearman": 0.795620979733612, + "pearson": [ + 0.8334138673430774, + 3.031107312477924e-41 + ], + "spearman": [ + 0.7923804835012699, + 1.1660893672508765e-34 + ] + } + ] + }, + "task_name": "STSES" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishNewsClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishNewsClassification.json new file mode 100644 index 0000000..84fda01 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishNewsClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "0086c197b914690a9dace258a19398890a05299a", + "evaluation_time": 40.532639503479004, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "train": [ + { + "accuracy": 0.880517578125, + "f1": 0.8795020852026451, + "f1_weighted": 0.8794390702000751, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.880517578125, + "scores_per_experiment": [ + { + "accuracy": 0.85888671875, + "f1": 0.8557756982983112, + "f1_weighted": 0.8556918712461705 + }, + { + "accuracy": 0.87548828125, + "f1": 0.8745228799625533, + "f1_weighted": 0.8744590727427706 + }, + { + "accuracy": 0.89111328125, + "f1": 0.8897255097549966, + "f1_weighted": 0.8896925051268902 + }, + { + "accuracy": 0.884765625, + "f1": 0.883185227461091, + "f1_weighted": 0.8831221933606745 + }, + { + "accuracy": 0.88232421875, + "f1": 0.8810608049070522, + "f1_weighted": 0.8810105686660049 + }, + { + "accuracy": 0.88427734375, + "f1": 0.8837703054975764, + "f1_weighted": 0.8837182073213299 + }, + { + "accuracy": 0.87744140625, + "f1": 0.8768290737207671, + "f1_weighted": 0.8767650258041771 + }, + { + "accuracy": 0.87744140625, + "f1": 0.8770431707837255, + "f1_weighted": 0.8769589228230366 + }, + { + "accuracy": 0.880859375, + "f1": 0.8809094729200334, + "f1_weighted": 0.8808542578036764 + }, + { + "accuracy": 0.892578125, + "f1": 0.892198708720343, + "f1_weighted": 0.8921180771060196 + } + ] + } + ] + }, + "task_name": "SpanishNewsClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishNewsClusteringP2P.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishNewsClusteringP2P.json new file mode 100644 index 0000000..c76162c --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishNewsClusteringP2P.json @@ -0,0 +1,23 @@ +{ + "dataset_revision": "bf8ca8ddc5b7da4f7004720ddf99bbe0483480e6", + "evaluation_time": 14.555667161941528, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.4399933663826367, + "v_measure": 0.4399933663826367, + "v_measure_std": 0.0, + "v_measures": [ + 0.4399933663826367 + ] + } + ] + }, + "task_name": "SpanishNewsClusteringP2P" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishPassageRetrievalS2P.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishPassageRetrievalS2P.json new file mode 100644 index 0000000..481aee5 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishPassageRetrievalS2P.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "9cddf2ce5209ade52c2115ccfa00eb22c6d3a837", + "evaluation_time": 102.75115370750427, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.39456, + "map_at_1": 0.13416, + "map_at_10": 0.26131, + "map_at_100": 0.31617, + "map_at_1000": 0.31956, + "map_at_20": 0.29109, + "map_at_3": 0.19125, + "map_at_5": 0.23122, + "mrr_at_1": 0.49101796407185627, + "mrr_at_10": 0.6319432563444541, + "mrr_at_100": 0.636691876450451, + "mrr_at_1000": 0.6367160240405215, + "mrr_at_20": 0.6361386500079704, + "mrr_at_3": 0.6007984031936128, + "mrr_at_5": 0.6196606786427147, + "nauc_map_at_1000_diff1": 0.34903297109952164, + "nauc_map_at_1000_max": 0.13315868741173562, + "nauc_map_at_1000_std": -0.07360484239057748, + "nauc_map_at_100_diff1": 0.3499317742109723, + "nauc_map_at_100_max": 0.13720961206173846, + "nauc_map_at_100_std": -0.06518651811207607, + "nauc_map_at_10_diff1": 0.4079341251419084, + "nauc_map_at_10_max": 0.09464673930717196, + "nauc_map_at_10_std": -0.18665708002687612, + "nauc_map_at_1_diff1": 0.5559688403168959, + "nauc_map_at_1_max": 0.17021474013926507, + "nauc_map_at_1_std": -0.19872161200739266, + "nauc_map_at_20_diff1": 0.3798465733611344, + "nauc_map_at_20_max": 0.12264531455197225, + "nauc_map_at_20_std": -0.13057568241381282, + "nauc_map_at_3_diff1": 0.4266251200049149, + "nauc_map_at_3_max": 0.12799775016892515, + "nauc_map_at_3_std": -0.21260599831419072, + "nauc_map_at_5_diff1": 0.4531697107996353, + "nauc_map_at_5_max": 0.10189213721452048, + "nauc_map_at_5_std": -0.23788489986258368, + "nauc_mrr_at_1000_diff1": 0.41283127977749967, + "nauc_mrr_at_1000_max": 0.16359472560528898, + "nauc_mrr_at_1000_std": -0.06410401795425717, + "nauc_mrr_at_100_diff1": 0.41267532172021837, + "nauc_mrr_at_100_max": 0.1635135800287888, + "nauc_mrr_at_100_std": -0.06403390774613649, + "nauc_mrr_at_10_diff1": 0.41345929078201304, + "nauc_mrr_at_10_max": 0.17354177053114747, + "nauc_mrr_at_10_std": -0.06316245261178507, + "nauc_mrr_at_1_diff1": 0.44406063765174647, + "nauc_mrr_at_1_max": 0.047510289065823594, + "nauc_mrr_at_1_std": -0.11288689495861863, + "nauc_mrr_at_20_diff1": 0.41180246586425956, + "nauc_mrr_at_20_max": 0.16377194585952048, + "nauc_mrr_at_20_std": -0.06390499434789955, + "nauc_mrr_at_3_diff1": 0.4201071182282893, + "nauc_mrr_at_3_max": 0.18345382479047195, + "nauc_mrr_at_3_std": -0.062038428880900595, + "nauc_mrr_at_5_diff1": 0.41295469043471833, + "nauc_mrr_at_5_max": 0.18879877172052337, + "nauc_mrr_at_5_std": -0.04990782268783993, + "nauc_ndcg_at_1000_diff1": 0.2711906398669872, + "nauc_ndcg_at_1000_max": 0.17896236011707367, + "nauc_ndcg_at_1000_std": 0.03858347982579866, + "nauc_ndcg_at_100_diff1": 0.25244264702389174, + "nauc_ndcg_at_100_max": 0.21024596452771446, + "nauc_ndcg_at_100_std": 0.13079907403424326, + "nauc_ndcg_at_10_diff1": 0.38535892528399274, + "nauc_ndcg_at_10_max": 0.09435313943328828, + "nauc_ndcg_at_10_std": -0.15419801056845883, + "nauc_ndcg_at_1_diff1": 0.491625425616573, + "nauc_ndcg_at_1_max": 0.08180915439524253, + "nauc_ndcg_at_1_std": -0.17043835910928584, + "nauc_ndcg_at_20_diff1": 0.3308931041276888, + "nauc_ndcg_at_20_max": 0.1697715544605736, + "nauc_ndcg_at_20_std": -0.03055239803165603, + "nauc_ndcg_at_3_diff1": 0.3281480787893263, + "nauc_ndcg_at_3_max": 0.11426880708048484, + "nauc_ndcg_at_3_std": -0.12600530073839, + "nauc_ndcg_at_5_diff1": 0.4145551222958778, + "nauc_ndcg_at_5_max": 0.09703498008204502, + "nauc_ndcg_at_5_std": -0.18601248204848694, + "nauc_precision_at_1000_diff1": -0.43503777582289394, + "nauc_precision_at_1000_max": -0.1244612067506142, + "nauc_precision_at_1000_std": 0.1254101034705605, + "nauc_precision_at_100_diff1": -0.464234360006375, + "nauc_precision_at_100_max": 0.047520253903188286, + "nauc_precision_at_100_std": 0.4697986867421012, + "nauc_precision_at_10_diff1": -0.11680068978917933, + "nauc_precision_at_10_max": -0.04120850339101388, + "nauc_precision_at_10_std": 0.11257730276502088, + "nauc_precision_at_1_diff1": 0.491625425616573, + "nauc_precision_at_1_max": 0.08180915439524253, + "nauc_precision_at_1_std": -0.17043835910928584, + "nauc_precision_at_20_diff1": -0.337442151234946, + "nauc_precision_at_20_max": 0.03975399065627172, + "nauc_precision_at_20_std": 0.35344032384955276, + "nauc_precision_at_3_diff1": 0.12318638157064275, + "nauc_precision_at_3_max": 0.0531883944511154, + "nauc_precision_at_3_std": -0.059610079948081866, + "nauc_precision_at_5_diff1": 0.15791365332970575, + "nauc_precision_at_5_max": -0.014882792223851794, + "nauc_precision_at_5_std": -0.12789878478657007, + "nauc_recall_at_1000_diff1": 0.030931402953761903, + "nauc_recall_at_1000_max": 0.36869057111721737, + "nauc_recall_at_1000_std": 0.6092097911525233, + "nauc_recall_at_100_diff1": -0.09097790496009286, + "nauc_recall_at_100_max": 0.2734310026551087, + "nauc_recall_at_100_std": 0.6316233291179643, + "nauc_recall_at_10_diff1": 0.2925824393155479, + "nauc_recall_at_10_max": 0.06751316978818943, + "nauc_recall_at_10_std": -0.08238954058180982, + "nauc_recall_at_1_diff1": 0.5559688403168959, + "nauc_recall_at_1_max": 0.17021474013926507, + "nauc_recall_at_1_std": -0.19872161200739266, + "nauc_recall_at_20_diff1": 0.1856779145409813, + "nauc_recall_at_20_max": 0.1748885879590604, + "nauc_recall_at_20_std": 0.11664048389727447, + "nauc_recall_at_3_diff1": 0.3614613811354796, + "nauc_recall_at_3_max": 0.14206646746256324, + "nauc_recall_at_3_std": -0.1768243138954541, + "nauc_recall_at_5_diff1": 0.41616213386109674, + "nauc_recall_at_5_max": 0.09659783241904796, + "nauc_recall_at_5_std": -0.20342618402011914, + "ndcg_at_1": 0.46707, + "ndcg_at_10": 0.39456, + "ndcg_at_100": 0.52669, + "ndcg_at_1000": 0.563, + "ndcg_at_20": 0.45271, + "ndcg_at_3": 0.37849, + "ndcg_at_5": 0.37451, + "precision_at_1": 0.46707, + "precision_at_10": 0.18743, + "precision_at_100": 0.04162, + "precision_at_1000": 0.00528, + "precision_at_20": 0.13772, + "precision_at_3": 0.31737, + "precision_at_5": 0.26467, + "recall_at_1": 0.13416, + "recall_at_10": 0.40833, + "recall_at_100": 0.76804, + "recall_at_1000": 0.93381, + "recall_at_20": 0.55105, + "recall_at_3": 0.22573, + "recall_at_5": 0.30726 + } + ] + }, + "task_name": "SpanishPassageRetrievalS2P" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishPassageRetrievalS2S.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishPassageRetrievalS2S.json new file mode 100644 index 0000000..9c9727a --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishPassageRetrievalS2S.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "9cddf2ce5209ade52c2115ccfa00eb22c6d3a837", + "evaluation_time": 1.995056390762329, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.68418, + "map_at_1": 0.14601, + "map_at_10": 0.49454, + "map_at_100": 0.61273, + "map_at_1000": 0.61273, + "map_at_20": 0.56352, + "map_at_3": 0.29828, + "map_at_5": 0.38108, + "mrr_at_1": 0.7485029940119761, + "mrr_at_10": 0.8381237524950098, + "mrr_at_100": 0.8384980039920158, + "mrr_at_1000": 0.8384980039920158, + "mrr_at_20": 0.8384980039920158, + "mrr_at_3": 0.8253493013972055, + "mrr_at_5": 0.8355289421157683, + "nauc_map_at_1000_diff1": 0.4400624610257018, + "nauc_map_at_1000_max": 0.29974001899467595, + "nauc_map_at_1000_std": -0.15866575040901595, + "nauc_map_at_100_diff1": 0.4400624610257018, + "nauc_map_at_100_max": 0.29974001899467595, + "nauc_map_at_100_std": -0.15866575040901595, + "nauc_map_at_10_diff1": 0.5349453216538773, + "nauc_map_at_10_max": 0.23695157364594513, + "nauc_map_at_10_std": -0.2597716341813252, + "nauc_map_at_1_diff1": 0.7320603322099718, + "nauc_map_at_1_max": 0.20105711962887335, + "nauc_map_at_1_std": -0.35455239072910155, + "nauc_map_at_20_diff1": 0.47593630929461506, + "nauc_map_at_20_max": 0.2812321437708394, + "nauc_map_at_20_std": -0.20641113458282948, + "nauc_map_at_3_diff1": 0.6399454090734432, + "nauc_map_at_3_max": 0.2402795512552351, + "nauc_map_at_3_std": -0.31479128888168617, + "nauc_map_at_5_diff1": 0.6034537239086313, + "nauc_map_at_5_max": 0.20812653106459295, + "nauc_map_at_5_std": -0.3150482040550029, + "nauc_mrr_at_1000_diff1": 0.5492258329057919, + "nauc_mrr_at_1000_max": 0.4727178418843534, + "nauc_mrr_at_1000_std": -0.10596751703232096, + "nauc_mrr_at_100_diff1": 0.5492258329057919, + "nauc_mrr_at_100_max": 0.4727178418843534, + "nauc_mrr_at_100_std": -0.10596751703232096, + "nauc_mrr_at_10_diff1": 0.5492392168373608, + "nauc_mrr_at_10_max": 0.47406196254486793, + "nauc_mrr_at_10_std": -0.10314824130561216, + "nauc_mrr_at_1_diff1": 0.5609708005013923, + "nauc_mrr_at_1_max": 0.4369835591229655, + "nauc_mrr_at_1_std": -0.11919658175616465, + "nauc_mrr_at_20_diff1": 0.5492258329057919, + "nauc_mrr_at_20_max": 0.4727178418843534, + "nauc_mrr_at_20_std": -0.10596751703232096, + "nauc_mrr_at_3_diff1": 0.5385938402938206, + "nauc_mrr_at_3_max": 0.5002299004604179, + "nauc_mrr_at_3_std": -0.07492168974330009, + "nauc_mrr_at_5_diff1": 0.5486251908564401, + "nauc_mrr_at_5_max": 0.4817961571377137, + "nauc_mrr_at_5_std": -0.10211551434696368, + "nauc_ndcg_at_1000_diff1": 0.42940376554270665, + "nauc_ndcg_at_1000_max": 0.34890398959712965, + "nauc_ndcg_at_1000_std": -0.08871019071630187, + "nauc_ndcg_at_100_diff1": 0.42940376554270665, + "nauc_ndcg_at_100_max": 0.34890398959712965, + "nauc_ndcg_at_100_std": -0.08871019071630187, + "nauc_ndcg_at_10_diff1": 0.46358297248387537, + "nauc_ndcg_at_10_max": 0.28042383756239936, + "nauc_ndcg_at_10_std": -0.2182550358912685, + "nauc_ndcg_at_1_diff1": 0.6302769573017211, + "nauc_ndcg_at_1_max": 0.27201247622729924, + "nauc_ndcg_at_1_std": -0.2637935910219606, + "nauc_ndcg_at_20_diff1": 0.4498055094936794, + "nauc_ndcg_at_20_max": 0.29331166704609163, + "nauc_ndcg_at_20_std": -0.1972240981156351, + "nauc_ndcg_at_3_diff1": 0.35176438856055875, + "nauc_ndcg_at_3_max": 0.3475037585080442, + "nauc_ndcg_at_3_std": -0.02835725251293389, + "nauc_ndcg_at_5_diff1": 0.44565602890165334, + "nauc_ndcg_at_5_max": 0.3334550344928254, + "nauc_ndcg_at_5_std": -0.12082966720888173, + "nauc_precision_at_1000_diff1": -0.31855636784105423, + "nauc_precision_at_1000_max": 0.05370718996903022, + "nauc_precision_at_1000_std": 0.2629058814394909, + "nauc_precision_at_100_diff1": -0.318556367841054, + "nauc_precision_at_100_max": 0.05370718996903039, + "nauc_precision_at_100_std": 0.2629058814394914, + "nauc_precision_at_10_diff1": -0.30325201556039755, + "nauc_precision_at_10_max": 0.12265661593168634, + "nauc_precision_at_10_std": 0.2920556772398908, + "nauc_precision_at_1_diff1": 0.6302769573017211, + "nauc_precision_at_1_max": 0.27201247622729924, + "nauc_precision_at_1_std": -0.2637935910219606, + "nauc_precision_at_20_diff1": -0.3342944741102585, + "nauc_precision_at_20_max": 0.0844479446062558, + "nauc_precision_at_20_std": 0.2667698043518604, + "nauc_precision_at_3_diff1": 0.0579755848505851, + "nauc_precision_at_3_max": 0.2850103024703947, + "nauc_precision_at_3_std": 0.13638018083070327, + "nauc_precision_at_5_diff1": -0.10598934925393444, + "nauc_precision_at_5_max": 0.19345527130025864, + "nauc_precision_at_5_std": 0.20581411026633076, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": NaN, + "nauc_recall_at_100_max": NaN, + "nauc_recall_at_100_std": NaN, + "nauc_recall_at_10_diff1": 0.421262269298499, + "nauc_recall_at_10_max": 0.1333267431938536, + "nauc_recall_at_10_std": -0.34150453445773543, + "nauc_recall_at_1_diff1": 0.7320603322099718, + "nauc_recall_at_1_max": 0.20105711962887335, + "nauc_recall_at_1_std": -0.35455239072910155, + "nauc_recall_at_20_diff1": 0.3089410029111882, + "nauc_recall_at_20_max": 0.14234272434172338, + "nauc_recall_at_20_std": -0.32074504802000947, + "nauc_recall_at_3_diff1": 0.5495557057549783, + "nauc_recall_at_3_max": 0.2336919223835218, + "nauc_recall_at_3_std": -0.3065975041589497, + "nauc_recall_at_5_diff1": 0.527498860253811, + "nauc_recall_at_5_max": 0.1607554360326736, + "nauc_recall_at_5_std": -0.35373762809210046, + "ndcg_at_1": 0.71856, + "ndcg_at_10": 0.68418, + "ndcg_at_100": 0.80056, + "ndcg_at_1000": 0.80056, + "ndcg_at_20": 0.72733, + "ndcg_at_3": 0.6502, + "ndcg_at_5": 0.65283, + "precision_at_1": 0.71856, + "precision_at_10": 0.43832, + "precision_at_100": 0.07719, + "precision_at_1000": 0.00772, + "precision_at_20": 0.29491, + "precision_at_3": 0.59681, + "precision_at_5": 0.53533, + "recall_at_1": 0.14601, + "recall_at_10": 0.67581, + "recall_at_100": 1.0, + "recall_at_1000": 1.0, + "recall_at_20": 0.82177, + "recall_at_3": 0.34575, + "recall_at_5": 0.47316 + } + ] + }, + "task_name": "SpanishPassageRetrievalS2S" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishSentimentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishSentimentClassification.json new file mode 100644 index 0000000..8af15ab --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/SpanishSentimentClassification.json @@ -0,0 +1,181 @@ +{ + "dataset_revision": "2a6e340e4b59b7c0a78c03a0b79ac27e1b4a2662", + "evaluation_time": 3.0590689182281494, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.9141891891891893, + "ap": 0.9624810225060665, + "ap_weighted": 0.9624810225060665, + "f1": 0.8717654128294147, + "f1_weighted": 0.9191097981421988, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.9141891891891893, + "scores_per_experiment": [ + { + "accuracy": 0.9459459459459459, + "ap": 0.9656614682695159, + "ap_weighted": 0.9656614682695159, + "f1": 0.9080673965369983, + "f1_weighted": 0.9463446675186717 + }, + { + "accuracy": 0.9121621621621622, + "ap": 0.9553743529153365, + "ap_weighted": 0.9553743529153365, + "f1": 0.8587682595610364, + "f1_weighted": 0.9150958930743119 + }, + { + "accuracy": 0.956081081081081, + "ap": 0.9646751267760759, + "ap_weighted": 0.9646751267760759, + "f1": 0.922405275150733, + "f1_weighted": 0.9555629917590757 + }, + { + "accuracy": 0.9391891891891891, + "ap": 0.9549225354903084, + "ap_weighted": 0.9549225354903084, + "f1": 0.8933973589435774, + "f1_weighted": 0.9387171084650077 + }, + { + "accuracy": 0.9290540540540541, + "ap": 0.9749063079831208, + "ap_weighted": 0.9749063079831208, + "f1": 0.8895659743812958, + "f1_weighted": 0.9324005014839489 + }, + { + "accuracy": 0.9324324324324325, + "ap": 0.9723694865017646, + "ap_weighted": 0.9723694865017646, + "f1": 0.8927847000869313, + "f1_weighted": 0.9350756145887992 + }, + { + "accuracy": 0.9594594594594594, + "ap": 0.9813800575613498, + "ap_weighted": 0.9813800575613498, + "f1": 0.9330064126744624, + "f1_weighted": 0.960312783549298 + }, + { + "accuracy": 0.7466216216216216, + "ap": 0.9265462024836175, + "ap_weighted": 0.9265462024836175, + "f1": 0.6872138076787602, + "f1_weighted": 0.775634740058833 + }, + { + "accuracy": 0.8682432432432432, + "ap": 0.958715035746899, + "ap_weighted": 0.958715035746899, + "f1": 0.812885971310479, + "f1_weighted": 0.878902096845204 + }, + { + "accuracy": 0.9527027027027027, + "ap": 0.9702596513326767, + "ap_weighted": 0.9702596513326767, + "f1": 0.9195589719698735, + "f1_weighted": 0.9530515840788377 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.9224489795918366, + "ap": 0.9766353018843669, + "ap_weighted": 0.9766353018843669, + "f1": 0.8889448662398802, + "f1_weighted": 0.9279279023268703, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.9224489795918366, + "scores_per_experiment": [ + { + "accuracy": 0.9659863945578231, + "ap": 0.9859586081245778, + "ap_weighted": 0.9859586081245778, + "f1": 0.9440852034994294, + "f1_weighted": 0.9667005638314663 + }, + { + "accuracy": 0.9727891156462585, + "ap": 0.9941530331140721, + "ap_weighted": 0.9941530331140721, + "f1": 0.9558823529411764, + "f1_weighted": 0.9735322700508776 + }, + { + "accuracy": 0.9523809523809523, + "ap": 0.9764425170544475, + "ap_weighted": 0.9764425170544475, + "f1": 0.9217192848992012, + "f1_weighted": 0.9533807893640529 + }, + { + "accuracy": 0.9659863945578231, + "ap": 0.9793613313093832, + "ap_weighted": 0.9793613313093832, + "f1": 0.9424567446958427, + "f1_weighted": 0.9662367099818866 + }, + { + "accuracy": 0.9387755102040817, + "ap": 0.980114098706033, + "ap_weighted": 0.980114098706033, + "f1": 0.9045798773891092, + "f1_weighted": 0.941495617359818 + }, + { + "accuracy": 0.891156462585034, + "ap": 0.969886649338039, + "ap_weighted": 0.969886649338039, + "f1": 0.8437001594896332, + "f1_weighted": 0.8993587865768318 + }, + { + "accuracy": 0.9387755102040817, + "ap": 0.9868443245066622, + "ap_weighted": 0.9868443245066622, + "f1": 0.9069162034756912, + "f1_weighted": 0.9421096236989133 + }, + { + "accuracy": 0.7551020408163265, + "ap": 0.9406695707636147, + "ap_weighted": 0.9406695707636147, + "f1": 0.704159212880143, + "f1_weighted": 0.7834964039282977 + }, + { + "accuracy": 0.8979591836734694, + "ap": 0.9713476758931304, + "ap_weighted": 0.9713476758931304, + "f1": 0.8518642929123279, + "f1_weighted": 0.9052669102575527 + }, + { + "accuracy": 0.9455782312925171, + "ap": 0.9815752100337082, + "ap_weighted": 0.9815752100337082, + "f1": 0.9140853302162477, + "f1_weighted": 0.947701348219007 + } + ] + } + ] + }, + "task_name": "SpanishSentimentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/Tatoeba.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/Tatoeba.json new file mode 100644 index 0000000..9b0bc81 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/Tatoeba.json @@ -0,0 +1,23 @@ +{ + "dataset_revision": "69e8f12da6e31d59addadda9a9c8a2e601a0e282", + "evaluation_time": 1.8738479614257812, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.98, + "f1": 0.9736666666666667, + "hf_subset": "spa-eng", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.9736666666666667, + "precision": 0.9706666666666668, + "recall": 0.98 + } + ] + }, + "task_name": "Tatoeba" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/TweetSentimentClassification.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/TweetSentimentClassification.json new file mode 100644 index 0000000..91c7205 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/TweetSentimentClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "d522bb117c32f5e0207344f69f7075fc9941168b", + "evaluation_time": 0.9082455635070801, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.508984375, + "f1": 0.4988180299663833, + "f1_weighted": 0.498299423764233, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.508984375, + "scores_per_experiment": [ + { + "accuracy": 0.4375, + "f1": 0.43782183645107037, + "f1_weighted": 0.43751503801620584 + }, + { + "accuracy": 0.53125, + "f1": 0.5260328335053345, + "f1_weighted": 0.5256131921680589 + }, + { + "accuracy": 0.53125, + "f1": 0.5171218845449248, + "f1_weighted": 0.5164704903221073 + }, + { + "accuracy": 0.48046875, + "f1": 0.46814453760489333, + "f1_weighted": 0.467626331875842 + }, + { + "accuracy": 0.57421875, + "f1": 0.5588680598442138, + "f1_weighted": 0.5581704568375601 + }, + { + "accuracy": 0.45703125, + "f1": 0.4369076622897221, + "f1_weighted": 0.4362947417339028 + }, + { + "accuracy": 0.51171875, + "f1": 0.47792581849453164, + "f1_weighted": 0.4770039812499084 + }, + { + "accuracy": 0.5234375, + "f1": 0.5215914285146468, + "f1_weighted": 0.5213236443174536 + }, + { + "accuracy": 0.4765625, + "f1": 0.47642877711418663, + "f1_weighted": 0.4759224959897115 + }, + { + "accuracy": 0.56640625, + "f1": 0.5673374613003096, + "f1_weighted": 0.567053865131579 + } + ] + } + ] + }, + "task_name": "TweetSentimentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XMarket.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XMarket.json new file mode 100644 index 0000000..2a7a9c8 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XMarket.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "dfe57acff5b62c23732a7b7d3e3fb84ff501708b", + "evaluation_time": 65.39275741577148, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.14136, + "map_at_1": 0.03765, + "map_at_10": 0.07658, + "map_at_100": 0.09425, + "map_at_1000": 0.09944, + "map_at_20": 0.08427, + "map_at_3": 0.05879, + "map_at_5": 0.06712, + "mrr_at_1": 0.13258741258741258, + "mrr_at_10": 0.20078177378177403, + "mrr_at_100": 0.20975406976935212, + "mrr_at_1000": 0.21051168269316875, + "mrr_at_20": 0.20574719505670638, + "mrr_at_3": 0.1788344988344988, + "mrr_at_5": 0.19089044289044313, + "nauc_map_at_1000_diff1": 0.09031783846933336, + "nauc_map_at_1000_max": 0.28508840781452066, + "nauc_map_at_1000_std": 0.10990009501713457, + "nauc_map_at_100_diff1": 0.09219800646217237, + "nauc_map_at_100_max": 0.2778983692932695, + "nauc_map_at_100_std": 0.08660625822280786, + "nauc_map_at_10_diff1": 0.10964393580221556, + "nauc_map_at_10_max": 0.2629724904937544, + "nauc_map_at_10_std": 0.022552925601707286, + "nauc_map_at_1_diff1": 0.177790153955236, + "nauc_map_at_1_max": 0.2503114684267386, + "nauc_map_at_1_std": -0.06984555019252232, + "nauc_map_at_20_diff1": 0.1017213980274546, + "nauc_map_at_20_max": 0.26964929096513957, + "nauc_map_at_20_std": 0.04582608640946554, + "nauc_map_at_3_diff1": 0.1382531962559102, + "nauc_map_at_3_max": 0.25692428267866885, + "nauc_map_at_3_std": -0.02283791058492772, + "nauc_map_at_5_diff1": 0.1215670523672912, + "nauc_map_at_5_max": 0.2621222044678247, + "nauc_map_at_5_std": -0.0032406754800115086, + "nauc_mrr_at_1000_diff1": 0.057716375642956945, + "nauc_mrr_at_1000_max": 0.2525017167348518, + "nauc_mrr_at_1000_std": 0.15930844646343217, + "nauc_mrr_at_100_diff1": 0.05765900213841955, + "nauc_mrr_at_100_max": 0.25234304531883345, + "nauc_mrr_at_100_std": 0.15915497317368316, + "nauc_mrr_at_10_diff1": 0.05809527619125611, + "nauc_mrr_at_10_max": 0.25343730556083527, + "nauc_mrr_at_10_std": 0.1585579140620306, + "nauc_mrr_at_1_diff1": 0.06269768887097335, + "nauc_mrr_at_1_max": 0.2556142016991723, + "nauc_mrr_at_1_std": 0.1329139859864349, + "nauc_mrr_at_20_diff1": 0.057272142322204726, + "nauc_mrr_at_20_max": 0.2525776532870449, + "nauc_mrr_at_20_std": 0.1579979601866961, + "nauc_mrr_at_3_diff1": 0.05865427869772976, + "nauc_mrr_at_3_max": 0.2520602392159702, + "nauc_mrr_at_3_std": 0.15350714191932693, + "nauc_mrr_at_5_diff1": 0.05756610337496164, + "nauc_mrr_at_5_max": 0.25532474159510576, + "nauc_mrr_at_5_std": 0.1577074042500999, + "nauc_ndcg_at_1000_diff1": 0.06386098169684533, + "nauc_ndcg_at_1000_max": 0.28689820650002057, + "nauc_ndcg_at_1000_std": 0.1946528509624808, + "nauc_ndcg_at_100_diff1": 0.06532233341561283, + "nauc_ndcg_at_100_max": 0.2739160594199581, + "nauc_ndcg_at_100_std": 0.1542187295782816, + "nauc_ndcg_at_10_diff1": 0.07187608325837007, + "nauc_ndcg_at_10_max": 0.2743230520153759, + "nauc_ndcg_at_10_std": 0.1512706266962838, + "nauc_ndcg_at_1_diff1": 0.06269768887097335, + "nauc_ndcg_at_1_max": 0.2556142016991723, + "nauc_ndcg_at_1_std": 0.1329139859864349, + "nauc_ndcg_at_20_diff1": 0.07271365877545112, + "nauc_ndcg_at_20_max": 0.2763286627165861, + "nauc_ndcg_at_20_std": 0.14592344038779834, + "nauc_ndcg_at_3_diff1": 0.06994519135129283, + "nauc_ndcg_at_3_max": 0.26377519515711595, + "nauc_ndcg_at_3_std": 0.1485589780819923, + "nauc_ndcg_at_5_diff1": 0.06529133079187244, + "nauc_ndcg_at_5_max": 0.27183851644781126, + "nauc_ndcg_at_5_std": 0.1490611760732317, + "nauc_precision_at_1000_diff1": 0.029736829288251884, + "nauc_precision_at_1000_max": 0.14342450718277708, + "nauc_precision_at_1000_std": 0.34848037422931105, + "nauc_precision_at_100_diff1": 0.011810529193830329, + "nauc_precision_at_100_max": 0.23320172044915705, + "nauc_precision_at_100_std": 0.42315962914314703, + "nauc_precision_at_10_diff1": 0.00710678308908373, + "nauc_precision_at_10_max": 0.26702120128385476, + "nauc_precision_at_10_std": 0.30759060662374293, + "nauc_precision_at_1_diff1": 0.06269768887097335, + "nauc_precision_at_1_max": 0.2556142016991723, + "nauc_precision_at_1_std": 0.1329139859864349, + "nauc_precision_at_20_diff1": 0.0037464616977568206, + "nauc_precision_at_20_max": 0.26828566200285353, + "nauc_precision_at_20_std": 0.35727580434307143, + "nauc_precision_at_3_diff1": 0.03186745850434716, + "nauc_precision_at_3_max": 0.26613294998375153, + "nauc_precision_at_3_std": 0.21342388451748637, + "nauc_precision_at_5_diff1": 0.009275683459710167, + "nauc_precision_at_5_max": 0.2722131469890131, + "nauc_precision_at_5_std": 0.2484710842080359, + "nauc_recall_at_1000_diff1": 0.0329787627970685, + "nauc_recall_at_1000_max": 0.2341211768767686, + "nauc_recall_at_1000_std": 0.15903746076701955, + "nauc_recall_at_100_diff1": 0.04359051134210171, + "nauc_recall_at_100_max": 0.19765882997425935, + "nauc_recall_at_100_std": 0.09488583918222387, + "nauc_recall_at_10_diff1": 0.07577751250170042, + "nauc_recall_at_10_max": 0.2131902225770891, + "nauc_recall_at_10_std": 0.03918081323080116, + "nauc_recall_at_1_diff1": 0.177790153955236, + "nauc_recall_at_1_max": 0.2503114684267386, + "nauc_recall_at_1_std": -0.06984555019252232, + "nauc_recall_at_20_diff1": 0.062311488808302146, + "nauc_recall_at_20_max": 0.20676724199847538, + "nauc_recall_at_20_std": 0.047942357751242774, + "nauc_recall_at_3_diff1": 0.11893681816217043, + "nauc_recall_at_3_max": 0.23109074937264548, + "nauc_recall_at_3_std": -0.011357220270840662, + "nauc_recall_at_5_diff1": 0.08978175469293709, + "nauc_recall_at_5_max": 0.22954965980951872, + "nauc_recall_at_5_std": 0.012599974525164245, + "ndcg_at_1": 0.13259, + "ndcg_at_10": 0.14136, + "ndcg_at_100": 0.18212, + "ndcg_at_1000": 0.22976, + "ndcg_at_20": 0.15193, + "ndcg_at_3": 0.13214, + "ndcg_at_5": 0.13503, + "precision_at_1": 0.13259, + "precision_at_10": 0.07695, + "precision_at_100": 0.02836, + "precision_at_1000": 0.00829, + "precision_at_20": 0.05979, + "precision_at_3": 0.10816, + "precision_at_5": 0.09611, + "recall_at_1": 0.03765, + "recall_at_10": 0.14002, + "recall_at_100": 0.31553, + "recall_at_1000": 0.53681, + "recall_at_20": 0.18805, + "recall_at_3": 0.07767, + "recall_at_5": 0.10098 + } + ] + }, + "task_name": "XMarket" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XNLI.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XNLI.json new file mode 100644 index 0000000..827cc7e --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XNLI.json @@ -0,0 +1,127 @@ +{ + "dataset_revision": "09698e0180d87dc247ca447d3a1248b931ac0cdb", + "evaluation_time": 3.9763050079345703, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine": { + "accuracy": 0.7186813186813187, + "accuracy_threshold": 0.8673545122146606, + "ap": 0.7603625574106656, + "f1": 0.7450980392156864, + "f1_threshold": 0.8544580936431885, + "precision": 0.6551724137931034, + "recall": 0.8636363636363636 + }, + "dot": { + "accuracy": 0.7186813186813187, + "accuracy_threshold": 0.8673545122146606, + "ap": 0.7603625574106655, + "f1": 0.7450980392156864, + "f1_threshold": 0.8544580936431885, + "precision": 0.6551724137931034, + "recall": 0.8636363636363636 + }, + "euclidean": { + "accuracy": 0.7186813186813187, + "accuracy_threshold": 0.5150640606880188, + "ap": 0.7603625574106656, + "f1": 0.7450980392156864, + "f1_threshold": 0.5395218133926392, + "precision": 0.6551724137931034, + "recall": 0.8636363636363636 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7603625574106656, + "manhattan": { + "accuracy": 0.7223443223443223, + "accuracy_threshold": 13.11111068725586, + "ap": 0.759756685283129, + "f1": 0.7434292866082604, + "f1_threshold": 13.756207466125488, + "precision": 0.648471615720524, + "recall": 0.8709677419354839 + }, + "max": { + "accuracy": 0.7223443223443223, + "ap": 0.7603625574106656, + "f1": 0.7450980392156864 + }, + "similarity": { + "accuracy": 0.7186813186813187, + "accuracy_threshold": 0.8673545718193054, + "ap": 0.7603625574106655, + "f1": 0.7450980392156864, + "f1_threshold": 0.8544581532478333, + "precision": 0.6551724137931034, + "recall": 0.8636363636363636 + } + } + ], + "validation": [ + { + "cosine": { + "accuracy": 0.7304029304029304, + "accuracy_threshold": 0.8638375997543335, + "ap": 0.7850981749776771, + "f1": 0.7440918298446996, + "f1_threshold": 0.8607165217399597, + "precision": 0.6896120150187734, + "recall": 0.8079178885630498 + }, + "dot": { + "accuracy": 0.7304029304029304, + "accuracy_threshold": 0.863837718963623, + "ap": 0.7850981749776771, + "f1": 0.7440918298446996, + "f1_threshold": 0.8607164621353149, + "precision": 0.6896120150187734, + "recall": 0.8079178885630498 + }, + "euclidean": { + "accuracy": 0.7304029304029304, + "accuracy_threshold": 0.5218473672866821, + "ap": 0.7850981749776771, + "f1": 0.7440918298446996, + "f1_threshold": 0.5277944207191467, + "precision": 0.6896120150187734, + "recall": 0.8079178885630498 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7850981749776771, + "manhattan": { + "accuracy": 0.7296703296703296, + "accuracy_threshold": 13.037092208862305, + "ap": 0.7830208746989458, + "f1": 0.7421555252387448, + "f1_threshold": 13.392582893371582, + "precision": 0.6938775510204082, + "recall": 0.7976539589442815 + }, + "max": { + "accuracy": 0.7304029304029304, + "ap": 0.7850981749776771, + "f1": 0.7440918298446996 + }, + "similarity": { + "accuracy": 0.7304029304029304, + "accuracy_threshold": 0.8638375997543335, + "ap": 0.7850970469539559, + "f1": 0.7440918298446996, + "f1_threshold": 0.8607165813446045, + "precision": 0.6896120150187734, + "recall": 0.8079178885630498 + } + } + ] + }, + "task_name": "XNLI" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XPQARetrieval.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XPQARetrieval.json new file mode 100644 index 0000000..d7a6300 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XPQARetrieval.json @@ -0,0 +1,455 @@ +{ + "dataset_revision": "c99d599f0a6ab9b85b065da6f9d94f9cf731679f", + "evaluation_time": 11.41649866104126, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spa-spa", + "languages": [ + "spa-Latn", + "spa-Latn" + ], + "main_score": 0.61619, + "map_at_1": 0.30187, + "map_at_10": 0.54942, + "map_at_100": 0.56697, + "map_at_1000": 0.56772, + "map_at_20": 0.56078, + "map_at_3": 0.47072, + "map_at_5": 0.52572, + "mrr_at_1": 0.5775535939470365, + "mrr_at_10": 0.652219820252607, + "mrr_at_100": 0.6583683294988412, + "mrr_at_1000": 0.6585880891171735, + "mrr_at_20": 0.6565059346629117, + "mrr_at_3": 0.6324085750315255, + "mrr_at_5": 0.6443253467843627, + "nauc_map_at_1000_diff1": 0.4748972206316934, + "nauc_map_at_1000_max": 0.5312951631973426, + "nauc_map_at_1000_std": 0.0524886101807658, + "nauc_map_at_100_diff1": 0.47484431657099224, + "nauc_map_at_100_max": 0.5314784807957244, + "nauc_map_at_100_std": 0.05234266927934554, + "nauc_map_at_10_diff1": 0.4740865876092526, + "nauc_map_at_10_max": 0.529497026434042, + "nauc_map_at_10_std": 0.04885746822471935, + "nauc_map_at_1_diff1": 0.5409012291414941, + "nauc_map_at_1_max": 0.27584377293149737, + "nauc_map_at_1_std": -0.05652877146902706, + "nauc_map_at_20_diff1": 0.47373116482595956, + "nauc_map_at_20_max": 0.530544741702453, + "nauc_map_at_20_std": 0.051295993158354265, + "nauc_map_at_3_diff1": 0.48870894200957815, + "nauc_map_at_3_max": 0.44174822081894055, + "nauc_map_at_3_std": 0.006493948736834983, + "nauc_map_at_5_diff1": 0.4764999023879126, + "nauc_map_at_5_max": 0.5081310288676553, + "nauc_map_at_5_std": 0.04089533525554217, + "nauc_mrr_at_1000_diff1": 0.5212348450497635, + "nauc_mrr_at_1000_max": 0.5897149950022041, + "nauc_mrr_at_1000_std": 0.05300891452122039, + "nauc_mrr_at_100_diff1": 0.5210405466151165, + "nauc_mrr_at_100_max": 0.5897846786300716, + "nauc_mrr_at_100_std": 0.05305661008242558, + "nauc_mrr_at_10_diff1": 0.5207676151686406, + "nauc_mrr_at_10_max": 0.5914123524211155, + "nauc_mrr_at_10_std": 0.05337980227864153, + "nauc_mrr_at_1_diff1": 0.5538487579926195, + "nauc_mrr_at_1_max": 0.5868007785305142, + "nauc_mrr_at_1_std": 0.04568059505403089, + "nauc_mrr_at_20_diff1": 0.5207330287745288, + "nauc_mrr_at_20_max": 0.5899650887073677, + "nauc_mrr_at_20_std": 0.05292476883517292, + "nauc_mrr_at_3_diff1": 0.5247245612136222, + "nauc_mrr_at_3_max": 0.5842131350218454, + "nauc_mrr_at_3_std": 0.0516518629125782, + "nauc_mrr_at_5_diff1": 0.5204768798275652, + "nauc_mrr_at_5_max": 0.588507287642092, + "nauc_mrr_at_5_std": 0.05275232790649865, + "nauc_ndcg_at_1000_diff1": 0.47870231297635935, + "nauc_ndcg_at_1000_max": 0.558694947653507, + "nauc_ndcg_at_1000_std": 0.0695849505615743, + "nauc_ndcg_at_100_diff1": 0.47526116869835533, + "nauc_ndcg_at_100_max": 0.5602670289458344, + "nauc_ndcg_at_100_std": 0.07051709009097783, + "nauc_ndcg_at_10_diff1": 0.4714789192552923, + "nauc_ndcg_at_10_max": 0.5574162088578613, + "nauc_ndcg_at_10_std": 0.060363718063062115, + "nauc_ndcg_at_1_diff1": 0.5538487579926195, + "nauc_ndcg_at_1_max": 0.5868007785305142, + "nauc_ndcg_at_1_std": 0.04568059505403089, + "nauc_ndcg_at_20_diff1": 0.4709640867253716, + "nauc_ndcg_at_20_max": 0.5571710534062148, + "nauc_ndcg_at_20_std": 0.06458781555651759, + "nauc_ndcg_at_3_diff1": 0.48234188278243656, + "nauc_ndcg_at_3_max": 0.5314332821792904, + "nauc_ndcg_at_3_std": 0.052850302921030594, + "nauc_ndcg_at_5_diff1": 0.4748298301627245, + "nauc_ndcg_at_5_max": 0.5280860992139537, + "nauc_ndcg_at_5_std": 0.049896188177799616, + "nauc_precision_at_1000_diff1": -0.1418225517442295, + "nauc_precision_at_1000_max": 0.21283474441357914, + "nauc_precision_at_1000_std": 0.16403980818297306, + "nauc_precision_at_100_diff1": -0.07193185816671922, + "nauc_precision_at_100_max": 0.28999381070670466, + "nauc_precision_at_100_std": 0.16880123574613717, + "nauc_precision_at_10_diff1": 0.047404433231320216, + "nauc_precision_at_10_max": 0.4272166359316348, + "nauc_precision_at_10_std": 0.1549425025764785, + "nauc_precision_at_1_diff1": 0.5538487579926195, + "nauc_precision_at_1_max": 0.5868007785305142, + "nauc_precision_at_1_std": 0.04568059505403089, + "nauc_precision_at_20_diff1": -0.004608081087104946, + "nauc_precision_at_20_max": 0.366546568018257, + "nauc_precision_at_20_std": 0.15997622662860153, + "nauc_precision_at_3_diff1": 0.1775063971065687, + "nauc_precision_at_3_max": 0.477183503831632, + "nauc_precision_at_3_std": 0.11306678329697378, + "nauc_precision_at_5_diff1": 0.09959251056723122, + "nauc_precision_at_5_max": 0.4453588080616318, + "nauc_precision_at_5_std": 0.13528278667053495, + "nauc_recall_at_1000_diff1": 0.402340812320752, + "nauc_recall_at_1000_max": 0.7615494724452475, + "nauc_recall_at_1000_std": 0.7286415093440027, + "nauc_recall_at_100_diff1": 0.33945920100932736, + "nauc_recall_at_100_max": 0.5330522755636333, + "nauc_recall_at_100_std": 0.18492573560880501, + "nauc_recall_at_10_diff1": 0.3944817775537866, + "nauc_recall_at_10_max": 0.5362152405713227, + "nauc_recall_at_10_std": 0.07463465074314947, + "nauc_recall_at_1_diff1": 0.5409012291414941, + "nauc_recall_at_1_max": 0.27584377293149737, + "nauc_recall_at_1_std": -0.05652877146902706, + "nauc_recall_at_20_diff1": 0.3665904954144193, + "nauc_recall_at_20_max": 0.518283209165561, + "nauc_recall_at_20_std": 0.09251796823380928, + "nauc_recall_at_3_diff1": 0.43964808287755086, + "nauc_recall_at_3_max": 0.3967521653025011, + "nauc_recall_at_3_std": 0.014752646414836382, + "nauc_recall_at_5_diff1": 0.4149717688281849, + "nauc_recall_at_5_max": 0.4777449735678539, + "nauc_recall_at_5_std": 0.06252072597225404, + "ndcg_at_1": 0.57755, + "ndcg_at_10": 0.61619, + "ndcg_at_100": 0.67209, + "ndcg_at_1000": 0.68578, + "ndcg_at_20": 0.64432, + "ndcg_at_3": 0.56539, + "ndcg_at_5": 0.57976, + "precision_at_1": 0.57755, + "precision_at_10": 0.17327, + "precision_at_100": 0.02224, + "precision_at_1000": 0.00242, + "precision_at_20": 0.09691, + "precision_at_3": 0.39975, + "precision_at_5": 0.30013, + "recall_at_1": 0.30187, + "recall_at_10": 0.69676, + "recall_at_100": 0.90208, + "recall_at_1000": 0.98928, + "recall_at_20": 0.78594, + "recall_at_3": 0.51654, + "recall_at_5": 0.61051 + }, + { + "hf_subset": "eng-spa", + "languages": [ + "eng-Latn", + "spa-Latn" + ], + "main_score": 0.36674, + "map_at_1": 0.15601, + "map_at_10": 0.29453, + "map_at_100": 0.31599, + "map_at_1000": 0.31795, + "map_at_20": 0.30604, + "map_at_3": 0.24152, + "map_at_5": 0.27104, + "mrr_at_1": 0.3316519546027743, + "mrr_at_10": 0.4172131147540984, + "mrr_at_100": 0.4281264653387791, + "mrr_at_1000": 0.42864917952635234, + "mrr_at_20": 0.42387114470172904, + "mrr_at_3": 0.39386296763345957, + "mrr_at_5": 0.40590584279108854, + "nauc_map_at_1000_diff1": 0.27392365893276577, + "nauc_map_at_1000_max": 0.3029181577035992, + "nauc_map_at_1000_std": -0.03598945494007468, + "nauc_map_at_100_diff1": 0.27361367947636606, + "nauc_map_at_100_max": 0.302726871369882, + "nauc_map_at_100_std": -0.0356581977229275, + "nauc_map_at_10_diff1": 0.2739773935561657, + "nauc_map_at_10_max": 0.3016958824105278, + "nauc_map_at_10_std": -0.04265365170866708, + "nauc_map_at_1_diff1": 0.33497938935829996, + "nauc_map_at_1_max": 0.203764636020361, + "nauc_map_at_1_std": -0.06529168679290469, + "nauc_map_at_20_diff1": 0.27356622039810036, + "nauc_map_at_20_max": 0.3027189524497705, + "nauc_map_at_20_std": -0.04029612981084758, + "nauc_map_at_3_diff1": 0.28231502359380795, + "nauc_map_at_3_max": 0.27450066561456865, + "nauc_map_at_3_std": -0.058938411314953615, + "nauc_map_at_5_diff1": 0.2745101886519744, + "nauc_map_at_5_max": 0.29788811963133927, + "nauc_map_at_5_std": -0.04410425644912979, + "nauc_mrr_at_1000_diff1": 0.33263509026819926, + "nauc_mrr_at_1000_max": 0.3300927343396619, + "nauc_mrr_at_1000_std": -0.04136444503907277, + "nauc_mrr_at_100_diff1": 0.3324288764482916, + "nauc_mrr_at_100_max": 0.3300557198149589, + "nauc_mrr_at_100_std": -0.04104425669252002, + "nauc_mrr_at_10_diff1": 0.3325791121843401, + "nauc_mrr_at_10_max": 0.3303976096283068, + "nauc_mrr_at_10_std": -0.04336475208538617, + "nauc_mrr_at_1_diff1": 0.36658998272809284, + "nauc_mrr_at_1_max": 0.3326133806084119, + "nauc_mrr_at_1_std": -0.04419406752440945, + "nauc_mrr_at_20_diff1": 0.33130628528921807, + "nauc_mrr_at_20_max": 0.3287746391909199, + "nauc_mrr_at_20_std": -0.042857848859060826, + "nauc_mrr_at_3_diff1": 0.3408330771640461, + "nauc_mrr_at_3_max": 0.3351877632218261, + "nauc_mrr_at_3_std": -0.04848601832240106, + "nauc_mrr_at_5_diff1": 0.33551233208325426, + "nauc_mrr_at_5_max": 0.3356600343326422, + "nauc_mrr_at_5_std": -0.04794267913135989, + "nauc_ndcg_at_1000_diff1": 0.2864581142331351, + "nauc_ndcg_at_1000_max": 0.311319701373552, + "nauc_ndcg_at_1000_std": -0.027820795143928323, + "nauc_ndcg_at_100_diff1": 0.2779007106144894, + "nauc_ndcg_at_100_max": 0.3074983062248152, + "nauc_ndcg_at_100_std": -0.017871597815198533, + "nauc_ndcg_at_10_diff1": 0.2770583774167871, + "nauc_ndcg_at_10_max": 0.30429734587859314, + "nauc_ndcg_at_10_std": -0.04200412553712649, + "nauc_ndcg_at_1_diff1": 0.36658998272809284, + "nauc_ndcg_at_1_max": 0.3326133806084119, + "nauc_ndcg_at_1_std": -0.04419406752440945, + "nauc_ndcg_at_20_diff1": 0.2736743789009511, + "nauc_ndcg_at_20_max": 0.30294059290060377, + "nauc_ndcg_at_20_std": -0.03810474995969198, + "nauc_ndcg_at_3_diff1": 0.28121862776680623, + "nauc_ndcg_at_3_max": 0.31611986583164947, + "nauc_ndcg_at_3_std": -0.04634698438236966, + "nauc_ndcg_at_5_diff1": 0.2806506022122615, + "nauc_ndcg_at_5_max": 0.30831582285262943, + "nauc_ndcg_at_5_std": -0.0463791664697724, + "nauc_precision_at_1000_diff1": 0.06989869530412869, + "nauc_precision_at_1000_max": 0.18488553669174262, + "nauc_precision_at_1000_std": 0.04794409831645095, + "nauc_precision_at_100_diff1": 0.10347159272423, + "nauc_precision_at_100_max": 0.2507233155407764, + "nauc_precision_at_100_std": 0.07392427666440425, + "nauc_precision_at_10_diff1": 0.15847796882381948, + "nauc_precision_at_10_max": 0.3329565195959006, + "nauc_precision_at_10_std": 0.014731894295389979, + "nauc_precision_at_1_diff1": 0.36658998272809284, + "nauc_precision_at_1_max": 0.3326133806084119, + "nauc_precision_at_1_std": -0.04419406752440945, + "nauc_precision_at_20_diff1": 0.12930750921348785, + "nauc_precision_at_20_max": 0.2986541811325777, + "nauc_precision_at_20_std": 0.017186237178083225, + "nauc_precision_at_3_diff1": 0.21431374308154147, + "nauc_precision_at_3_max": 0.3648506340195946, + "nauc_precision_at_3_std": -0.013719987472869817, + "nauc_precision_at_5_diff1": 0.17935765292606096, + "nauc_precision_at_5_max": 0.36215309862413886, + "nauc_precision_at_5_std": 0.009643135070867623, + "nauc_recall_at_1000_diff1": 0.2523857121391749, + "nauc_recall_at_1000_max": 0.1168553681427761, + "nauc_recall_at_1000_std": 0.134594713982591, + "nauc_recall_at_100_diff1": 0.18339594402805762, + "nauc_recall_at_100_max": 0.2198598120533351, + "nauc_recall_at_100_std": 0.09518398139400887, + "nauc_recall_at_10_diff1": 0.21124037087749856, + "nauc_recall_at_10_max": 0.23701805976664797, + "nauc_recall_at_10_std": -0.0423372213090928, + "nauc_recall_at_1_diff1": 0.33497938935829996, + "nauc_recall_at_1_max": 0.203764636020361, + "nauc_recall_at_1_std": -0.06529168679290469, + "nauc_recall_at_20_diff1": 0.1966934360804329, + "nauc_recall_at_20_max": 0.22463062833903089, + "nauc_recall_at_20_std": -0.0248258891328285, + "nauc_recall_at_3_diff1": 0.23939633855932405, + "nauc_recall_at_3_max": 0.24857189890471226, + "nauc_recall_at_3_std": -0.06861976273632474, + "nauc_recall_at_5_diff1": 0.22706873001684946, + "nauc_recall_at_5_max": 0.26284552641066217, + "nauc_recall_at_5_std": -0.05343735232165742, + "ndcg_at_1": 0.33165, + "ndcg_at_10": 0.36674, + "ndcg_at_100": 0.44809, + "ndcg_at_1000": 0.48298, + "ndcg_at_20": 0.39771, + "ndcg_at_3": 0.31668, + "ndcg_at_5": 0.32662, + "precision_at_1": 0.33165, + "precision_at_10": 0.11274, + "precision_at_100": 0.01883, + "precision_at_1000": 0.00241, + "precision_at_20": 0.06822, + "precision_at_3": 0.22951, + "precision_at_5": 0.17554, + "recall_at_1": 0.15601, + "recall_at_10": 0.44515, + "recall_at_100": 0.75536, + "recall_at_1000": 0.97486, + "recall_at_20": 0.53989, + "recall_at_3": 0.28554, + "recall_at_5": 0.35107 + }, + { + "hf_subset": "spa-eng", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.55726, + "map_at_1": 0.25847, + "map_at_10": 0.4832, + "map_at_100": 0.50232, + "map_at_1000": 0.50341, + "map_at_20": 0.4945, + "map_at_3": 0.40527, + "map_at_5": 0.45457, + "mrr_at_1": 0.5119798234552333, + "mrr_at_10": 0.5941892351728416, + "mrr_at_100": 0.6007796366702531, + "mrr_at_1000": 0.601038475576041, + "mrr_at_20": 0.5980615470866368, + "mrr_at_3": 0.5710382513661199, + "mrr_at_5": 0.5833963850357289, + "nauc_map_at_1000_diff1": 0.4115692188850089, + "nauc_map_at_1000_max": 0.4655552410549127, + "nauc_map_at_1000_std": 0.04411494975534272, + "nauc_map_at_100_diff1": 0.41130294072859275, + "nauc_map_at_100_max": 0.4655454179721071, + "nauc_map_at_100_std": 0.04407096653185655, + "nauc_map_at_10_diff1": 0.4118525894874998, + "nauc_map_at_10_max": 0.4617557892616359, + "nauc_map_at_10_std": 0.04064368326229362, + "nauc_map_at_1_diff1": 0.4677341461342895, + "nauc_map_at_1_max": 0.26757223055332285, + "nauc_map_at_1_std": -0.06994512905310285, + "nauc_map_at_20_diff1": 0.4113645650213926, + "nauc_map_at_20_max": 0.4643774121115777, + "nauc_map_at_20_std": 0.03931871160941215, + "nauc_map_at_3_diff1": 0.41498585578849917, + "nauc_map_at_3_max": 0.39885193597509705, + "nauc_map_at_3_std": 0.0009233885990435192, + "nauc_map_at_5_diff1": 0.40931439197902214, + "nauc_map_at_5_max": 0.4409818149483978, + "nauc_map_at_5_std": 0.03035508986505188, + "nauc_mrr_at_1000_diff1": 0.47347661553411563, + "nauc_mrr_at_1000_max": 0.5307288649223942, + "nauc_mrr_at_1000_std": 0.07081405698053583, + "nauc_mrr_at_100_diff1": 0.4732686484248579, + "nauc_mrr_at_100_max": 0.5307904031701284, + "nauc_mrr_at_100_std": 0.07082862305811939, + "nauc_mrr_at_10_diff1": 0.47243188439908484, + "nauc_mrr_at_10_max": 0.5293705390166491, + "nauc_mrr_at_10_std": 0.07090136577116442, + "nauc_mrr_at_1_diff1": 0.5143174467288483, + "nauc_mrr_at_1_max": 0.5375734225509118, + "nauc_mrr_at_1_std": 0.07539946225711334, + "nauc_mrr_at_20_diff1": 0.4733545322028797, + "nauc_mrr_at_20_max": 0.5308989051713754, + "nauc_mrr_at_20_std": 0.07014504349881644, + "nauc_mrr_at_3_diff1": 0.4766627543629703, + "nauc_mrr_at_3_max": 0.5267540225042011, + "nauc_mrr_at_3_std": 0.06649245692929424, + "nauc_mrr_at_5_diff1": 0.4716089313601245, + "nauc_mrr_at_5_max": 0.5251764071963655, + "nauc_mrr_at_5_std": 0.06623822502396166, + "nauc_ndcg_at_1000_diff1": 0.4201192054532371, + "nauc_ndcg_at_1000_max": 0.49388223932359954, + "nauc_ndcg_at_1000_std": 0.06520620176668382, + "nauc_ndcg_at_100_diff1": 0.41324132704051947, + "nauc_ndcg_at_100_max": 0.4944647048729509, + "nauc_ndcg_at_100_std": 0.06970542959436896, + "nauc_ndcg_at_10_diff1": 0.4129905252642672, + "nauc_ndcg_at_10_max": 0.48629538604992756, + "nauc_ndcg_at_10_std": 0.054381569059580366, + "nauc_ndcg_at_1_diff1": 0.5143174467288483, + "nauc_ndcg_at_1_max": 0.5375734225509118, + "nauc_ndcg_at_1_std": 0.07539946225711334, + "nauc_ndcg_at_20_diff1": 0.411156623079673, + "nauc_ndcg_at_20_max": 0.491116781719036, + "nauc_ndcg_at_20_std": 0.04943006021485661, + "nauc_ndcg_at_3_diff1": 0.4126385694703844, + "nauc_ndcg_at_3_max": 0.46655071963439265, + "nauc_ndcg_at_3_std": 0.04648197847924885, + "nauc_ndcg_at_5_diff1": 0.4125033051724908, + "nauc_ndcg_at_5_max": 0.4600353239260384, + "nauc_ndcg_at_5_std": 0.03931026879679215, + "nauc_precision_at_1000_diff1": -0.0648530003453625, + "nauc_precision_at_1000_max": 0.2013633733620228, + "nauc_precision_at_1000_std": 0.18568427865254633, + "nauc_precision_at_100_diff1": -0.0035012919625095973, + "nauc_precision_at_100_max": 0.2831333162990303, + "nauc_precision_at_100_std": 0.20376582557177383, + "nauc_precision_at_10_diff1": 0.11109116850576423, + "nauc_precision_at_10_max": 0.4106139732548851, + "nauc_precision_at_10_std": 0.16772095721576355, + "nauc_precision_at_1_diff1": 0.5143174467288483, + "nauc_precision_at_1_max": 0.5375734225509118, + "nauc_precision_at_1_std": 0.07539946225711334, + "nauc_precision_at_20_diff1": 0.06585456035051646, + "nauc_precision_at_20_max": 0.3610569987085884, + "nauc_precision_at_20_std": 0.15834692138361703, + "nauc_precision_at_3_diff1": 0.21161416027607186, + "nauc_precision_at_3_max": 0.46080942408218445, + "nauc_precision_at_3_std": 0.1333500427950949, + "nauc_precision_at_5_diff1": 0.1557868037507786, + "nauc_precision_at_5_max": 0.42923789399322637, + "nauc_precision_at_5_std": 0.15421875038235924, + "nauc_recall_at_1000_diff1": 0.4741966824195512, + "nauc_recall_at_1000_max": 0.5553476552589002, + "nauc_recall_at_1000_std": 0.28207062354473184, + "nauc_recall_at_100_diff1": 0.2668251355443733, + "nauc_recall_at_100_max": 0.4612206849794517, + "nauc_recall_at_100_std": 0.17422137740473376, + "nauc_recall_at_10_diff1": 0.3294055564890863, + "nauc_recall_at_10_max": 0.4433655148662183, + "nauc_recall_at_10_std": 0.051665809681908044, + "nauc_recall_at_1_diff1": 0.4677341461342895, + "nauc_recall_at_1_max": 0.26757223055332285, + "nauc_recall_at_1_std": -0.06994512905310285, + "nauc_recall_at_20_diff1": 0.30846630565368016, + "nauc_recall_at_20_max": 0.4533229372855015, + "nauc_recall_at_20_std": 0.02682982457460292, + "nauc_recall_at_3_diff1": 0.3553743893077027, + "nauc_recall_at_3_max": 0.35176960150677256, + "nauc_recall_at_3_std": -0.014309744457852111, + "nauc_recall_at_5_diff1": 0.34487000461399697, + "nauc_recall_at_5_max": 0.3985662144475021, + "nauc_recall_at_5_std": 0.020966244099429515, + "ndcg_at_1": 0.51198, + "ndcg_at_10": 0.55726, + "ndcg_at_100": 0.61926, + "ndcg_at_1000": 0.637, + "ndcg_at_20": 0.58432, + "ndcg_at_3": 0.49529, + "ndcg_at_5": 0.51268, + "precision_at_1": 0.51198, + "precision_at_10": 0.16091, + "precision_at_100": 0.02164, + "precision_at_1000": 0.00243, + "precision_at_20": 0.09117, + "precision_at_3": 0.35267, + "precision_at_5": 0.26734, + "recall_at_1": 0.25847, + "recall_at_10": 0.65177, + "recall_at_100": 0.88052, + "recall_at_1000": 0.99065, + "recall_at_20": 0.73319, + "recall_at_3": 0.45429, + "recall_at_5": 0.54733 + } + ] + }, + "task_name": "XPQARetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XQuADRetrieval.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XQuADRetrieval.json new file mode 100644 index 0000000..afec40b --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/XQuADRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "51adfef1c1287aab1d2d91b5bead9bcfb9c68583", + "evaluation_time": 3.212631940841675, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "validation": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.97644, + "map_at_1": 0.94595, + "map_at_10": 0.96874, + "map_at_100": 0.96881, + "map_at_1000": 0.96881, + "map_at_20": 0.96881, + "map_at_3": 0.96678, + "map_at_5": 0.96851, + "mrr_at_1": 0.9459459459459459, + "mrr_at_10": 0.9687369288931791, + "mrr_at_100": 0.9688137102199603, + "mrr_at_1000": 0.9688137102199603, + "mrr_at_20": 0.9688137102199603, + "mrr_at_3": 0.9667792792792793, + "mrr_at_5": 0.9685106981981982, + "nauc_map_at_1000_diff1": 0.9116511693827875, + "nauc_map_at_1000_max": 0.6329735439157124, + "nauc_map_at_1000_std": 0.25616693895030773, + "nauc_map_at_100_diff1": 0.9116511693827875, + "nauc_map_at_100_max": 0.6329735439157124, + "nauc_map_at_100_std": 0.25616693895030773, + "nauc_map_at_10_diff1": 0.9115472298768826, + "nauc_map_at_10_max": 0.633874951783346, + "nauc_map_at_10_std": 0.2579937744885127, + "nauc_map_at_1_diff1": 0.9105937786062325, + "nauc_map_at_1_max": 0.6114006173797208, + "nauc_map_at_1_std": 0.2290961271365201, + "nauc_map_at_20_diff1": 0.9116511693827875, + "nauc_map_at_20_max": 0.6329735439157124, + "nauc_map_at_20_std": 0.25616693895030773, + "nauc_map_at_3_diff1": 0.9096893687907531, + "nauc_map_at_3_max": 0.6314998748125582, + "nauc_map_at_3_std": 0.25184562505840513, + "nauc_map_at_5_diff1": 0.9121827070234979, + "nauc_map_at_5_max": 0.6365053284309163, + "nauc_map_at_5_std": 0.26067158274926133, + "nauc_mrr_at_1000_diff1": 0.9116511693827875, + "nauc_mrr_at_1000_max": 0.6329735439157124, + "nauc_mrr_at_1000_std": 0.25616693895030773, + "nauc_mrr_at_100_diff1": 0.9116511693827875, + "nauc_mrr_at_100_max": 0.6329735439157124, + "nauc_mrr_at_100_std": 0.25616693895030773, + "nauc_mrr_at_10_diff1": 0.9115472298768826, + "nauc_mrr_at_10_max": 0.633874951783346, + "nauc_mrr_at_10_std": 0.2579937744885127, + "nauc_mrr_at_1_diff1": 0.9105937786062325, + "nauc_mrr_at_1_max": 0.6114006173797208, + "nauc_mrr_at_1_std": 0.2290961271365201, + "nauc_mrr_at_20_diff1": 0.9116511693827875, + "nauc_mrr_at_20_max": 0.6329735439157124, + "nauc_mrr_at_20_std": 0.25616693895030773, + "nauc_mrr_at_3_diff1": 0.9096893687907531, + "nauc_mrr_at_3_max": 0.6314998748125582, + "nauc_mrr_at_3_std": 0.25184562505840513, + "nauc_mrr_at_5_diff1": 0.9121827070234979, + "nauc_mrr_at_5_max": 0.6365053284309163, + "nauc_mrr_at_5_std": 0.26067158274926133, + "nauc_ndcg_at_1000_diff1": 0.9117471841340751, + "nauc_ndcg_at_1000_max": 0.6352471222165315, + "nauc_ndcg_at_1000_std": 0.2592658103966081, + "nauc_ndcg_at_100_diff1": 0.9117471841340751, + "nauc_ndcg_at_100_max": 0.6352471222165315, + "nauc_ndcg_at_100_std": 0.2592658103966081, + "nauc_ndcg_at_10_diff1": 0.9113229403113509, + "nauc_ndcg_at_10_max": 0.6388952782330287, + "nauc_ndcg_at_10_std": 0.2666744260786783, + "nauc_ndcg_at_1_diff1": 0.9105937786062325, + "nauc_ndcg_at_1_max": 0.6114006173797208, + "nauc_ndcg_at_1_std": 0.2290961271365201, + "nauc_ndcg_at_20_diff1": 0.9117471841340751, + "nauc_ndcg_at_20_max": 0.6352471222165315, + "nauc_ndcg_at_20_std": 0.2592658103966081, + "nauc_ndcg_at_3_diff1": 0.9080837777074015, + "nauc_ndcg_at_3_max": 0.6372758487083894, + "nauc_ndcg_at_3_std": 0.25807675461428875, + "nauc_ndcg_at_5_diff1": 0.9133389512162878, + "nauc_ndcg_at_5_max": 0.6471047414184676, + "nauc_ndcg_at_5_std": 0.2747233649363483, + "nauc_precision_at_1000_diff1": 1.0, + "nauc_precision_at_1000_max": 1.0, + "nauc_precision_at_1000_std": 1.0, + "nauc_precision_at_100_diff1": 1.0, + "nauc_precision_at_100_max": 1.0, + "nauc_precision_at_100_std": 1.0, + "nauc_precision_at_10_diff1": 0.8693300960160147, + "nauc_precision_at_10_max": 1.0, + "nauc_precision_at_10_std": 1.0, + "nauc_precision_at_1_diff1": 0.9105937786062325, + "nauc_precision_at_1_max": 0.6114006173797208, + "nauc_precision_at_1_std": 0.2290961271365201, + "nauc_precision_at_20_diff1": 1.0, + "nauc_precision_at_20_max": 1.0, + "nauc_precision_at_20_std": 1.0, + "nauc_precision_at_3_diff1": 0.8920562597274606, + "nauc_precision_at_3_max": 0.6859849158759296, + "nauc_precision_at_3_std": 0.31005770089823753, + "nauc_precision_at_5_diff1": 0.956443365338596, + "nauc_precision_at_5_max": 1.0, + "nauc_precision_at_5_std": 0.7424515775553914, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": NaN, + "nauc_recall_at_100_max": NaN, + "nauc_recall_at_100_std": NaN, + "nauc_recall_at_10_diff1": 0.869330096015902, + "nauc_recall_at_10_max": 1.0, + "nauc_recall_at_10_std": 1.0, + "nauc_recall_at_1_diff1": 0.9105937786062325, + "nauc_recall_at_1_max": 0.6114006173797208, + "nauc_recall_at_1_std": 0.2290961271365201, + "nauc_recall_at_20_diff1": NaN, + "nauc_recall_at_20_max": NaN, + "nauc_recall_at_20_std": NaN, + "nauc_recall_at_3_diff1": 0.8920562597274793, + "nauc_recall_at_3_max": 0.685984915875934, + "nauc_recall_at_3_std": 0.31005770089825885, + "nauc_recall_at_5_diff1": 0.9564433653385848, + "nauc_recall_at_5_max": 1.0, + "nauc_recall_at_5_std": 0.7424515775553884, + "ndcg_at_1": 0.94595, + "ndcg_at_10": 0.97644, + "ndcg_at_100": 0.97668, + "ndcg_at_1000": 0.97668, + "ndcg_at_20": 0.97668, + "ndcg_at_3": 0.97277, + "ndcg_at_5": 0.9759, + "precision_at_1": 0.94595, + "precision_at_10": 0.09992, + "precision_at_100": 0.01, + "precision_at_1000": 0.001, + "precision_at_20": 0.05, + "precision_at_3": 0.32995, + "precision_at_5": 0.19949, + "recall_at_1": 0.94595, + "recall_at_10": 0.99916, + "recall_at_100": 1.0, + "recall_at_1000": 1.0, + "recall_at_20": 1.0, + "recall_at_3": 0.98986, + "recall_at_5": 0.99747 + } + ] + }, + "task_name": "XQuADRetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/model_meta.json b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/model_meta.json new file mode 100644 index 0000000..7a28942 --- /dev/null +++ b/evaluation/embeddings_model/results/intfloat/multilingual-e5-large/intfloat__multilingual-e5-large/ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb/model_meta.json @@ -0,0 +1 @@ +{"name": "intfloat/multilingual-e5-large", "revision": "ab10c1a7f42e74530fe7ae5be82e6d4f11a719eb", "release_date": null, "languages": [], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": null, "similarity_fn_name": null, "framework": ["Sentence Transformers"], "loader": null} \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 55e35a1..b940147 100644 --- a/requirements.txt +++ b/requirements.txt @@ -19,7 +19,7 @@ langsmith==0.0.41 qdrant-client==1.5.4 supabase==1.0.2 pinecone-client==2.2.2 -sentence_transformers==2.2.2 +sentence_transformers==3.0.1 openai==1.3.8 tavily-python==0.2.9 @@ -30,5 +30,5 @@ black==23.9.1 isort==5.12.0 # Evaluation -mteb==1.12.25 +mteb==1.12.39 # ragas==0.1.0rc1 From d328defd039b4ed62240b6ddaba86076396aa0a2 Mon Sep 17 00:00:00 2001 From: Bukosabino Date: Wed, 19 Jun 2024 13:19:09 +0200 Subject: [PATCH 5/9] Update requirements --- requirements.txt | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index e0274a7..db72b0f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,15 +23,10 @@ langchain-community==0.2.1 qdrant-client==1.8.0 supabase==1.0.2 pinecone-client==2.2.2 -<<<<<<< develop-embeddings-evaluation + sentence_transformers==3.0.1 -openai==1.3.8 -tavily-python==0.2.9 -======= -sentence_transformers==2.2.2 openai==1.30.5 tavily-python==0.3.3 ->>>>>>> main sendgrid==6.10.0 From 5cf62c651d0188fc12aae04cfb0a1765eb6460de Mon Sep 17 00:00:00 2001 From: Bukosabino Date: Thu, 20 Jun 2024 08:49:08 +0200 Subject: [PATCH 6/9] Update README.md with the results --- evaluation/embeddings_model/README.md | 33 +++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/evaluation/embeddings_model/README.md b/evaluation/embeddings_model/README.md index 4d95b7f..642e1c3 100644 --- a/evaluation/embeddings_model/README.md +++ b/evaluation/embeddings_model/README.md @@ -1,3 +1,36 @@ ```` python mteb_benchmark.py ```` + + + +| Benchmark | dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn | intfloat__multilingual-e5-large | +|-----------------------------|---------|----------| +| AmazonReviewsClassification | 0.28194 | 0.42702 +| CataloniaTweetClassification | 0.48369999999999996 | 0.5025000000000001 +| MassiveIntentClassification | 0.5473100201748486 | 0.6470073974445192 +| MassiveScenarioClassification | 0.6322797579018158 | 0.689340954942838 +| MintakaRetrieval | 0.16548 | 0.2836 +| MIRACLRetrieval | 0.70137 | 0.82005 +| MLSUMClusteringS2S.v2 | 0.42841628413893035 | 0.48075917245775485 +| MTOPDomainClassification | 0.7924616410940628 | 0.8998999332888593 +| MTOPIntentClassification | 0.5307538358905937 | 0.6673782521681121 +| MultiEURLEXMultilabelClassification | 0.05144 | 0.05226000000000001 +| MultiHateClassification | 0.5578 | 0.639 +| PawsX | 0.6015684593563027 | 0.5639685167829116 +| PublicHealthQA | 0.62516 | 0.80811 +| SIB200Classification | 0.6549019607843137 | 0.7348039215686275 +| SIB200ClusteringS2S | 0.3347573603718645 | 0.3637865013678009 +| SpanishNewsClassification | 0.81318359375 | 0.880517578125 +| SpanishNewsClusteringP2P | 0.379918321557151 | 0.4399933663826367 +| SpanishSentimentClassification | 0.6378378378378378 | 0.9141891891891893 +| STS17 | 0.23167578806693545 | 0.8092850520982419 +| STS22 | 0.49970798735740846 | 0.7865922376187726 +| STSBenchmarkMultilingualSTS | 0.7724973718736371 | 0.8646354604520479 +| STSES | 0.6040795444089487 | 0.7923804835012699 +| Tatoeba | 0.15162606837606837 | 0.9736666666666667 +| TweetSentimentClassification | 0.408203125 | 0.508984375 +| XMarket | 0.11391 | 0.14136 +| XNLI | 0.5793703625227221 | 0.7603625574106656 +| XPQARetrieval | 0.47322 | 0.61619 +| XQuADRetrieval | 0.81996 | 0.97644 From f6db8ba84decc1f63bca41d933c2efb08abbc093 Mon Sep 17 00:00:00 2001 From: Bukosabino Date: Wed, 26 Jun 2024 11:47:58 +0000 Subject: [PATCH 7/9] adding bge-m3 results --- .../AmazonReviewsClassification.json | 137 ++++++ .../BibleNLPBitextMining.json | 35 ++ .../CataloniaTweetClassification.json | 137 ++++++ .../MTOPDomainClassification.json | 137 ++++++ .../MTOPIntentClassification.json | 137 ++++++ .../MassiveIntentClassification.json | 137 ++++++ .../MassiveScenarioClassification.json | 137 ++++++ .../MintakaRetrieval.json | 158 ++++++ .../MultiHateClassification.json | 95 ++++ .../PawsX.json | 127 +++++ .../PublicHealthQA.json | 158 ++++++ .../SIB200Classification.json | 201 ++++++++ .../SIB200ClusteringS2S.json | 33 ++ .../STS17.json | 54 +++ .../STSBenchmarkMultilingualSTS.json | 55 +++ .../STSES.json | 32 ++ .../SpanishPassageRetrievalS2S.json | 158 ++++++ .../SpanishSentimentClassification.json | 181 +++++++ .../Tatoeba.json | 23 + .../TweetSentimentClassification.json | 73 +++ .../XNLI.json | 127 +++++ .../XPQARetrieval.json | 455 ++++++++++++++++++ .../XQuADRetrieval.json | 158 ++++++ .../model_meta.json | 1 + 24 files changed, 2946 insertions(+) create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/AmazonReviewsClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/BibleNLPBitextMining.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/CataloniaTweetClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MTOPDomainClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MTOPIntentClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MassiveIntentClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MassiveScenarioClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MintakaRetrieval.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MultiHateClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/PawsX.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/PublicHealthQA.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SIB200Classification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SIB200ClusteringS2S.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STS17.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STSBenchmarkMultilingualSTS.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STSES.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SpanishPassageRetrievalS2S.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SpanishSentimentClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/Tatoeba.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/TweetSentimentClassification.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XNLI.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XPQARetrieval.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XQuADRetrieval.json create mode 100644 evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/model_meta.json diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/AmazonReviewsClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/AmazonReviewsClassification.json new file mode 100644 index 0000000..e39ec8b --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/AmazonReviewsClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "1399c76144fd37290681b995c656ef9b2e06e26d", + "evaluation_time": 89.11965489387512, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.44678000000000007, + "f1": 0.42405774139116525, + "f1_weighted": 0.42405774139116525, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.44678000000000007, + "scores_per_experiment": [ + { + "accuracy": 0.4432, + "f1": 0.4128581086837859, + "f1_weighted": 0.4128581086837859 + }, + { + "accuracy": 0.4536, + "f1": 0.4362083315439791, + "f1_weighted": 0.43620833154397903 + }, + { + "accuracy": 0.468, + "f1": 0.4297143374330804, + "f1_weighted": 0.42971433743308046 + }, + { + "accuracy": 0.4484, + "f1": 0.44175168895312933, + "f1_weighted": 0.4417516889531293 + }, + { + "accuracy": 0.437, + "f1": 0.40908631376427473, + "f1_weighted": 0.4090863137642748 + }, + { + "accuracy": 0.4506, + "f1": 0.42870402357493365, + "f1_weighted": 0.42870402357493365 + }, + { + "accuracy": 0.4334, + "f1": 0.41082859750405804, + "f1_weighted": 0.41082859750405804 + }, + { + "accuracy": 0.4514, + "f1": 0.4396235588515759, + "f1_weighted": 0.43962355885157595 + }, + { + "accuracy": 0.4492, + "f1": 0.41488193691105657, + "f1_weighted": 0.4148819369110565 + }, + { + "accuracy": 0.433, + "f1": 0.41692051669177876, + "f1_weighted": 0.4169205166917787 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.44446, + "f1": 0.42214102696443695, + "f1_weighted": 0.42214102696443695, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.44446, + "scores_per_experiment": [ + { + "accuracy": 0.4336, + "f1": 0.40320353806833475, + "f1_weighted": 0.4032035380683348 + }, + { + "accuracy": 0.4488, + "f1": 0.4305457057707863, + "f1_weighted": 0.4305457057707864 + }, + { + "accuracy": 0.464, + "f1": 0.42430424103687503, + "f1_weighted": 0.42430424103687503 + }, + { + "accuracy": 0.4442, + "f1": 0.4381740531500947, + "f1_weighted": 0.4381740531500947 + }, + { + "accuracy": 0.4406, + "f1": 0.4156932047502909, + "f1_weighted": 0.415693204750291 + }, + { + "accuracy": 0.4572, + "f1": 0.4340821005328042, + "f1_weighted": 0.43408210053280416 + }, + { + "accuracy": 0.437, + "f1": 0.4166405917478004, + "f1_weighted": 0.4166405917478004 + }, + { + "accuracy": 0.4414, + "f1": 0.43121042189248604, + "f1_weighted": 0.431210421892486 + }, + { + "accuracy": 0.4458, + "f1": 0.41170102764187205, + "f1_weighted": 0.41170102764187205 + }, + { + "accuracy": 0.432, + "f1": 0.41585538505302555, + "f1_weighted": 0.4158553850530255 + } + ] + } + ] + }, + "task_name": "AmazonReviewsClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/BibleNLPBitextMining.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/BibleNLPBitextMining.json new file mode 100644 index 0000000..976e2ed --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/BibleNLPBitextMining.json @@ -0,0 +1,35 @@ +{ + "dataset_revision": "264a18480c529d9e922483839b4b9758e690b762", + "evaluation_time": 9.615000247955322, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "train": [ + { + "accuracy": 0.98828125, + "f1": 0.984375, + "hf_subset": "eng_Latn-spa_Latn", + "languages": [ + "eng-Latn", + "spa-Latn" + ], + "main_score": 0.984375, + "precision": 0.982421875, + "recall": 0.98828125 + }, + { + "accuracy": 0.9921875, + "f1": 0.9895833333333333, + "hf_subset": "spa_Latn-eng_Latn", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.9895833333333333, + "precision": 0.98828125, + "recall": 0.9921875 + } + ] + }, + "task_name": "BibleNLPBitextMining" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/CataloniaTweetClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/CataloniaTweetClassification.json new file mode 100644 index 0000000..a13d514 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/CataloniaTweetClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "cf24d44e517efa534f048e5fc5981f399ed25bee", + "evaluation_time": 22.496618270874023, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.4875999999999999, + "f1": 0.495389318235397, + "f1_weighted": 0.47891884944166285, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.4875999999999999, + "scores_per_experiment": [ + { + "accuracy": 0.5365, + "f1": 0.5446280613952249, + "f1_weighted": 0.5289936214084807 + }, + { + "accuracy": 0.4645, + "f1": 0.47137929338019835, + "f1_weighted": 0.45125851157391206 + }, + { + "accuracy": 0.441, + "f1": 0.4423184099500607, + "f1_weighted": 0.43361017360084164 + }, + { + "accuracy": 0.4865, + "f1": 0.5006347979068946, + "f1_weighted": 0.4830379744816243 + }, + { + "accuracy": 0.4785, + "f1": 0.4794393853836449, + "f1_weighted": 0.4685082062301844 + }, + { + "accuracy": 0.5285, + "f1": 0.5408500269333003, + "f1_weighted": 0.5252252868634425 + }, + { + "accuracy": 0.5215, + "f1": 0.5334697717156517, + "f1_weighted": 0.5110434772460865 + }, + { + "accuracy": 0.472, + "f1": 0.4752905999468883, + "f1_weighted": 0.4603400353982301 + }, + { + "accuracy": 0.456, + "f1": 0.4575064934604902, + "f1_weighted": 0.45021574487739774 + }, + { + "accuracy": 0.491, + "f1": 0.5083763422816154, + "f1_weighted": 0.47695546273642897 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.48850000000000005, + "f1": 0.4947283256005882, + "f1_weighted": 0.48115402766318677, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.48850000000000005, + "scores_per_experiment": [ + { + "accuracy": 0.539, + "f1": 0.5464470729825366, + "f1_weighted": 0.5336021569805925 + }, + { + "accuracy": 0.484, + "f1": 0.4894218537719817, + "f1_weighted": 0.47470516066031826 + }, + { + "accuracy": 0.445, + "f1": 0.4455307659258912, + "f1_weighted": 0.43910157608256967 + }, + { + "accuracy": 0.498, + "f1": 0.5070390548133686, + "f1_weighted": 0.4952619526445418 + }, + { + "accuracy": 0.4595, + "f1": 0.4639483576499602, + "f1_weighted": 0.45036650516929616 + }, + { + "accuracy": 0.5245, + "f1": 0.5407858817012889, + "f1_weighted": 0.5213218129077658 + }, + { + "accuracy": 0.5095, + "f1": 0.5196772939931399, + "f1_weighted": 0.5028498831598263 + }, + { + "accuracy": 0.4725, + "f1": 0.4708719174588281, + "f1_weighted": 0.4612039498229505 + }, + { + "accuracy": 0.44, + "f1": 0.4423566083741999, + "f1_weighted": 0.4321500582434104 + }, + { + "accuracy": 0.513, + "f1": 0.5212044493346871, + "f1_weighted": 0.5009772209605959 + } + ] + } + ] + }, + "task_name": "CataloniaTweetClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MTOPDomainClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MTOPDomainClassification.json new file mode 100644 index 0000000..c3e6647 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MTOPDomainClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "d80d48c1eb48d3562165c59d59d0034df9fff0bf", + "evaluation_time": 10.824077606201172, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.9183789192795198, + "f1": 0.9125084267753119, + "f1_weighted": 0.9181251805850845, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.9183789192795198, + "scores_per_experiment": [ + { + "accuracy": 0.9362908605737158, + "f1": 0.9308539277147656, + "f1_weighted": 0.9362089935650718 + }, + { + "accuracy": 0.9016010673782522, + "f1": 0.8945649587237674, + "f1_weighted": 0.9011989713674253 + }, + { + "accuracy": 0.9249499666444296, + "f1": 0.9214388031500154, + "f1_weighted": 0.9251358742918964 + }, + { + "accuracy": 0.9179452968645764, + "f1": 0.9108133592737104, + "f1_weighted": 0.9176579135328727 + }, + { + "accuracy": 0.9119412941961308, + "f1": 0.9062853507390213, + "f1_weighted": 0.9113575364567782 + }, + { + "accuracy": 0.9272848565710473, + "f1": 0.9229018879547749, + "f1_weighted": 0.927101991099451 + }, + { + "accuracy": 0.9122748498999332, + "f1": 0.9071886871692388, + "f1_weighted": 0.912118802972318 + }, + { + "accuracy": 0.9202801867911942, + "f1": 0.914854662047599, + "f1_weighted": 0.9203755598145666 + }, + { + "accuracy": 0.9176117411607738, + "f1": 0.9111876537946383, + "f1_weighted": 0.9174438365478041 + }, + { + "accuracy": 0.9136090727151435, + "f1": 0.9049949771855882, + "f1_weighted": 0.912652326202661 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.9167648984937788, + "f1": 0.9151690563394344, + "f1_weighted": 0.9163419092915351, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.9167648984937788, + "scores_per_experiment": [ + { + "accuracy": 0.9430255402750491, + "f1": 0.9430157969374904, + "f1_weighted": 0.9428002532060559 + }, + { + "accuracy": 0.9004584151931893, + "f1": 0.898756763052187, + "f1_weighted": 0.9009253993550324 + }, + { + "accuracy": 0.9305828421741977, + "f1": 0.9315398707026387, + "f1_weighted": 0.9304914018258688 + }, + { + "accuracy": 0.9037328094302554, + "f1": 0.9008378583695488, + "f1_weighted": 0.9033764381281825 + }, + { + "accuracy": 0.9056974459724951, + "f1": 0.9050508927567276, + "f1_weighted": 0.9043298908088192 + }, + { + "accuracy": 0.9390962671905697, + "f1": 0.9382265049600694, + "f1_weighted": 0.938910170183542 + }, + { + "accuracy": 0.908316961362148, + "f1": 0.9062969177982023, + "f1_weighted": 0.9077502841379549 + }, + { + "accuracy": 0.9142108709888671, + "f1": 0.9113885964330617, + "f1_weighted": 0.9142924523310937 + }, + { + "accuracy": 0.9122462344466273, + "f1": 0.9097908275107668, + "f1_weighted": 0.9111483479925627 + }, + { + "accuracy": 0.9102815979043877, + "f1": 0.9067865348736504, + "f1_weighted": 0.909394454946237 + } + ] + } + ] + }, + "task_name": "MTOPDomainClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MTOPIntentClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MTOPIntentClassification.json new file mode 100644 index 0000000..1a72dbf --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MTOPIntentClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba", + "evaluation_time": 29.898176431655884, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.6644096064042696, + "f1": 0.4488866093717652, + "f1_weighted": 0.700051965759593, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6644096064042696, + "scores_per_experiment": [ + { + "accuracy": 0.6387591727818546, + "f1": 0.42735975482733596, + "f1_weighted": 0.668545638844319 + }, + { + "accuracy": 0.6847898599066043, + "f1": 0.4485453420050805, + "f1_weighted": 0.7205127873937188 + }, + { + "accuracy": 0.6804536357571714, + "f1": 0.4561783115132103, + "f1_weighted": 0.7096556876504303 + }, + { + "accuracy": 0.6524349566377585, + "f1": 0.43977668509705514, + "f1_weighted": 0.6855732997891514 + }, + { + "accuracy": 0.6697798532354903, + "f1": 0.4623431669597747, + "f1_weighted": 0.701098190352369 + }, + { + "accuracy": 0.675783855903936, + "f1": 0.45333566359938254, + "f1_weighted": 0.7117802808562116 + }, + { + "accuracy": 0.6891260840560374, + "f1": 0.44821198126938516, + "f1_weighted": 0.7275149220334678 + }, + { + "accuracy": 0.6684456304202802, + "f1": 0.4686355825536871, + "f1_weighted": 0.7148380400687169 + }, + { + "accuracy": 0.6220813875917278, + "f1": 0.4317942763136276, + "f1_weighted": 0.6612177531113019 + }, + { + "accuracy": 0.6624416277518346, + "f1": 0.4526853295791135, + "f1_weighted": 0.6997830574962438 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.6766208251473477, + "f1": 0.4369630170320039, + "f1_weighted": 0.7075650019192082, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6766208251473477, + "scores_per_experiment": [ + { + "accuracy": 0.6450556647020301, + "f1": 0.43011625621006044, + "f1_weighted": 0.6749545936602348 + }, + { + "accuracy": 0.6922069417157826, + "f1": 0.44607665105209493, + "f1_weighted": 0.7234065429265448 + }, + { + "accuracy": 0.6856581532416502, + "f1": 0.4277651246246946, + "f1_weighted": 0.7100797973049608 + }, + { + "accuracy": 0.6633922724296005, + "f1": 0.4513594963942233, + "f1_weighted": 0.6903332608806062 + }, + { + "accuracy": 0.6843483955468238, + "f1": 0.41048463209498315, + "f1_weighted": 0.7090143714592488 + }, + { + "accuracy": 0.6954813359528488, + "f1": 0.451414846724882, + "f1_weighted": 0.7292903271522736 + }, + { + "accuracy": 0.7118533071381794, + "f1": 0.4550512207248254, + "f1_weighted": 0.7435826815964672 + }, + { + "accuracy": 0.6817288801571709, + "f1": 0.46056565795829646, + "f1_weighted": 0.7198513277215912 + }, + { + "accuracy": 0.6240995415848068, + "f1": 0.4103954855273591, + "f1_weighted": 0.6615954047831369 + }, + { + "accuracy": 0.6823837590045841, + "f1": 0.4264007990086195, + "f1_weighted": 0.7135417117070183 + } + ] + } + ] + }, + "task_name": "MTOPIntentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MassiveIntentClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MassiveIntentClassification.json new file mode 100644 index 0000000..c01b9c5 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MassiveIntentClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "4672e20407010da34463acc759c162ca9734bca6", + "evaluation_time": 21.93426489830017, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.6703429724277068, + "f1": 0.6529101983570066, + "f1_weighted": 0.6611355125498407, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6703429724277068, + "scores_per_experiment": [ + { + "accuracy": 0.6859448554135844, + "f1": 0.6650703253442898, + "f1_weighted": 0.6808375374590983 + }, + { + "accuracy": 0.695359784801614, + "f1": 0.6730125992525006, + "f1_weighted": 0.6863133318483826 + }, + { + "accuracy": 0.6620712844653666, + "f1": 0.6501073380142928, + "f1_weighted": 0.6517195823295019 + }, + { + "accuracy": 0.6782111634162744, + "f1": 0.6520345547932778, + "f1_weighted": 0.6696497818810366 + }, + { + "accuracy": 0.6698049764626766, + "f1": 0.6475162578276497, + "f1_weighted": 0.657067486798334 + }, + { + "accuracy": 0.6519838601210491, + "f1": 0.6411272686259522, + "f1_weighted": 0.6468490539913321 + }, + { + "accuracy": 0.6674512441156691, + "f1": 0.6501441010934843, + "f1_weighted": 0.6581287867131467 + }, + { + "accuracy": 0.6533288500336247, + "f1": 0.6362029087185123, + "f1_weighted": 0.644705544160695 + }, + { + "accuracy": 0.6587088096839274, + "f1": 0.6566754485149066, + "f1_weighted": 0.6397039978536526 + }, + { + "accuracy": 0.6805648957632818, + "f1": 0.6572111813851994, + "f1_weighted": 0.6763800224632268 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.6733890801770782, + "f1": 0.6380868554047734, + "f1_weighted": 0.6620131215608797, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6733890801770782, + "scores_per_experiment": [ + { + "accuracy": 0.6851942941465814, + "f1": 0.6492114698911075, + "f1_weighted": 0.6795688669115247 + }, + { + "accuracy": 0.6960157402852927, + "f1": 0.6553434005712407, + "f1_weighted": 0.6838698305652127 + }, + { + "accuracy": 0.6709296606000984, + "f1": 0.633195026120348, + "f1_weighted": 0.6613709965270368 + }, + { + "accuracy": 0.6738809640924742, + "f1": 0.6298318278578342, + "f1_weighted": 0.6604105925514125 + }, + { + "accuracy": 0.6797835710772258, + "f1": 0.6402595069390927, + "f1_weighted": 0.6637239515297518 + }, + { + "accuracy": 0.6733890801770782, + "f1": 0.6368756353314711, + "f1_weighted": 0.6670137102499007 + }, + { + "accuracy": 0.6522380718150517, + "f1": 0.6221036527298678, + "f1_weighted": 0.642322704630368 + }, + { + "accuracy": 0.6556812592228234, + "f1": 0.6294366928467141, + "f1_weighted": 0.6454071253182313 + }, + { + "accuracy": 0.6591244466305952, + "f1": 0.6428789623061602, + "f1_weighted": 0.638569491605358 + }, + { + "accuracy": 0.6876537137235612, + "f1": 0.6417323794538976, + "f1_weighted": 0.6778739457199999 + } + ] + } + ] + }, + "task_name": "MassiveIntentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MassiveScenarioClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MassiveScenarioClassification.json new file mode 100644 index 0000000..ff8358d --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MassiveScenarioClassification.json @@ -0,0 +1,137 @@ +{ + "dataset_revision": "fad2c6e8459f9e1c45d9315f4953d921437d70f8", + "evaluation_time": 14.426787614822388, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.721990585070612, + "f1": 0.7152998907106446, + "f1_weighted": 0.7144036628076448, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.721990585070612, + "scores_per_experiment": [ + { + "accuracy": 0.7390719569603228, + "f1": 0.7356973675537498, + "f1_weighted": 0.7334166152385508 + }, + { + "accuracy": 0.7404169468728985, + "f1": 0.7345313311432703, + "f1_weighted": 0.7314614958081921 + }, + { + "accuracy": 0.7222595830531271, + "f1": 0.7036423152088521, + "f1_weighted": 0.7108669268027763 + }, + { + "accuracy": 0.7027572293207801, + "f1": 0.698765248644501, + "f1_weighted": 0.6974345698615605 + }, + { + "accuracy": 0.7256220578345662, + "f1": 0.715590643049413, + "f1_weighted": 0.713421421501628 + }, + { + "accuracy": 0.7084734364492267, + "f1": 0.6976943855472375, + "f1_weighted": 0.699243074656696 + }, + { + "accuracy": 0.7125084061869535, + "f1": 0.7043251848783254, + "f1_weighted": 0.7021422041176608 + }, + { + "accuracy": 0.7088096839273705, + "f1": 0.7087016498534129, + "f1_weighted": 0.7041987374939753 + }, + { + "accuracy": 0.7293207800941492, + "f1": 0.7319291838529053, + "f1_weighted": 0.7248123677713798 + }, + { + "accuracy": 0.730665770006725, + "f1": 0.7221215973747787, + "f1_weighted": 0.7270392148240279 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.7224790949335957, + "f1": 0.7122282114515955, + "f1_weighted": 0.7146555718791052, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7224790949335957, + "scores_per_experiment": [ + { + "accuracy": 0.7388096409247418, + "f1": 0.7337793572622747, + "f1_weighted": 0.7313949597256444 + }, + { + "accuracy": 0.7383177570093458, + "f1": 0.7315208612473253, + "f1_weighted": 0.7316812478627228 + }, + { + "accuracy": 0.7166748647319232, + "f1": 0.701424795498779, + "f1_weighted": 0.7070608231646982 + }, + { + "accuracy": 0.6930644367929168, + "f1": 0.6817796060688871, + "f1_weighted": 0.6829238430914637 + }, + { + "accuracy": 0.7314313821938022, + "f1": 0.7180767079084828, + "f1_weighted": 0.7223795285667627 + }, + { + "accuracy": 0.7073290703394, + "f1": 0.6933149555207739, + "f1_weighted": 0.6977281802058393 + }, + { + "accuracy": 0.7088047220855878, + "f1": 0.693270250914662, + "f1_weighted": 0.6989967297825418 + }, + { + "accuracy": 0.7147073290703394, + "f1": 0.7096624422668026, + "f1_weighted": 0.70640016702009 + }, + { + "accuracy": 0.7432365961633055, + "f1": 0.742371505468767, + "f1_weighted": 0.7387450406157731 + }, + { + "accuracy": 0.7324151500245942, + "f1": 0.7170816323592007, + "f1_weighted": 0.7292451987555165 + } + ] + } + ] + }, + "task_name": "MassiveScenarioClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MintakaRetrieval.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MintakaRetrieval.json new file mode 100644 index 0000000..03059fc --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MintakaRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "efa78cc2f74bbcd21eff2261f9e13aebe40b814e", + "evaluation_time": 28.245105981826782, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.22341, + "map_at_1": 0.12871, + "map_at_10": 0.18973, + "map_at_100": 0.19947, + "map_at_1000": 0.20077, + "map_at_20": 0.19524, + "map_at_3": 0.17224, + "map_at_5": 0.18123, + "mrr_at_1": 0.12871287128712872, + "mrr_at_10": 0.18972903242705197, + "mrr_at_100": 0.19947399482208625, + "mrr_at_1000": 0.20076962574113508, + "mrr_at_20": 0.19524432355898969, + "mrr_at_3": 0.17223597359735957, + "mrr_at_5": 0.18122937293729335, + "nauc_map_at_1000_diff1": 0.2027486462257375, + "nauc_map_at_1000_max": 0.33234390318993456, + "nauc_map_at_1000_std": 0.07675814461650028, + "nauc_map_at_100_diff1": 0.20250747072265196, + "nauc_map_at_100_max": 0.3323075095815488, + "nauc_map_at_100_std": 0.07709513818168237, + "nauc_map_at_10_diff1": 0.20428228683560745, + "nauc_map_at_10_max": 0.33519720248543433, + "nauc_map_at_10_std": 0.07366054279250642, + "nauc_map_at_1_diff1": 0.281240764978414, + "nauc_map_at_1_max": 0.3439394887824967, + "nauc_map_at_1_std": 0.03609737508962941, + "nauc_map_at_20_diff1": 0.2027225963647895, + "nauc_map_at_20_max": 0.33338764332397486, + "nauc_map_at_20_std": 0.0765403355014395, + "nauc_map_at_3_diff1": 0.21761577556701, + "nauc_map_at_3_max": 0.3465824077885606, + "nauc_map_at_3_std": 0.0710392421946564, + "nauc_map_at_5_diff1": 0.21110519198293165, + "nauc_map_at_5_max": 0.34125216639726874, + "nauc_map_at_5_std": 0.07152709206385177, + "nauc_mrr_at_1000_diff1": 0.2027486462257375, + "nauc_mrr_at_1000_max": 0.33234390318993456, + "nauc_mrr_at_1000_std": 0.07675814461650028, + "nauc_mrr_at_100_diff1": 0.20250747072265196, + "nauc_mrr_at_100_max": 0.3323075095815488, + "nauc_mrr_at_100_std": 0.07709513818168237, + "nauc_mrr_at_10_diff1": 0.20428228683560745, + "nauc_mrr_at_10_max": 0.33519720248543433, + "nauc_mrr_at_10_std": 0.07366054279250642, + "nauc_mrr_at_1_diff1": 0.281240764978414, + "nauc_mrr_at_1_max": 0.3439394887824967, + "nauc_mrr_at_1_std": 0.03609737508962941, + "nauc_mrr_at_20_diff1": 0.2027225963647895, + "nauc_mrr_at_20_max": 0.33338764332397486, + "nauc_mrr_at_20_std": 0.0765403355014395, + "nauc_mrr_at_3_diff1": 0.21761577556701, + "nauc_mrr_at_3_max": 0.3465824077885606, + "nauc_mrr_at_3_std": 0.0710392421946564, + "nauc_mrr_at_5_diff1": 0.21110519198293165, + "nauc_mrr_at_5_max": 0.34125216639726874, + "nauc_mrr_at_5_std": 0.07152709206385177, + "nauc_ndcg_at_1000_diff1": 0.18064929431090243, + "nauc_ndcg_at_1000_max": 0.3174813305373451, + "nauc_ndcg_at_1000_std": 0.09163599808888812, + "nauc_ndcg_at_100_diff1": 0.1706424141365636, + "nauc_ndcg_at_100_max": 0.31134528504031234, + "nauc_ndcg_at_100_std": 0.09960111117734331, + "nauc_ndcg_at_10_diff1": 0.1773073323565288, + "nauc_ndcg_at_10_max": 0.32382276974882823, + "nauc_ndcg_at_10_std": 0.08571957545722132, + "nauc_ndcg_at_1_diff1": 0.281240764978414, + "nauc_ndcg_at_1_max": 0.3439394887824967, + "nauc_ndcg_at_1_std": 0.03609737508962941, + "nauc_ndcg_at_20_diff1": 0.1721545339923501, + "nauc_ndcg_at_20_max": 0.3176013158610389, + "nauc_ndcg_at_20_std": 0.09486174298777408, + "nauc_ndcg_at_3_diff1": 0.20190800113457869, + "nauc_ndcg_at_3_max": 0.3458475109793588, + "nauc_ndcg_at_3_std": 0.07951116391589273, + "nauc_ndcg_at_5_diff1": 0.19155741798370976, + "nauc_ndcg_at_5_max": 0.3365538979405097, + "nauc_ndcg_at_5_std": 0.08007452837431996, + "nauc_precision_at_1000_diff1": 0.09234110863845976, + "nauc_precision_at_1000_max": 0.21827904498512676, + "nauc_precision_at_1000_std": 0.20128583287527202, + "nauc_precision_at_100_diff1": 0.08359690436005066, + "nauc_precision_at_100_max": 0.24271815426588872, + "nauc_precision_at_100_std": 0.17103624518716243, + "nauc_precision_at_10_diff1": 0.11437596606467229, + "nauc_precision_at_10_max": 0.2945335036086182, + "nauc_precision_at_10_std": 0.1142368999220429, + "nauc_precision_at_1_diff1": 0.281240764978414, + "nauc_precision_at_1_max": 0.3439394887824967, + "nauc_precision_at_1_std": 0.03609737508962941, + "nauc_precision_at_20_diff1": 0.09967927607969396, + "nauc_precision_at_20_max": 0.2754307845734562, + "nauc_precision_at_20_std": 0.14144703524646526, + "nauc_precision_at_3_diff1": 0.1643514947034949, + "nauc_precision_at_3_max": 0.3437093365807185, + "nauc_precision_at_3_std": 0.09972465744145485, + "nauc_precision_at_5_diff1": 0.14602008452003706, + "nauc_precision_at_5_max": 0.32424824924884443, + "nauc_precision_at_5_std": 0.09952250231676363, + "nauc_recall_at_1000_diff1": 0.0923411086384618, + "nauc_recall_at_1000_max": 0.21827904498512754, + "nauc_recall_at_1000_std": 0.201285832875271, + "nauc_recall_at_100_diff1": 0.0835969043600502, + "nauc_recall_at_100_max": 0.24271815426588803, + "nauc_recall_at_100_std": 0.1710362451871617, + "nauc_recall_at_10_diff1": 0.11437596606467236, + "nauc_recall_at_10_max": 0.2945335036086183, + "nauc_recall_at_10_std": 0.11423689992204261, + "nauc_recall_at_1_diff1": 0.281240764978414, + "nauc_recall_at_1_max": 0.3439394887824967, + "nauc_recall_at_1_std": 0.03609737508962941, + "nauc_recall_at_20_diff1": 0.0996792760796939, + "nauc_recall_at_20_max": 0.2754307845734561, + "nauc_recall_at_20_std": 0.1414470352464654, + "nauc_recall_at_3_diff1": 0.16435149470349478, + "nauc_recall_at_3_max": 0.3437093365807186, + "nauc_recall_at_3_std": 0.09972465744145496, + "nauc_recall_at_5_diff1": 0.14602008452003695, + "nauc_recall_at_5_max": 0.3242482492488444, + "nauc_recall_at_5_std": 0.09952250231676366, + "ndcg_at_1": 0.12871, + "ndcg_at_10": 0.22341, + "ndcg_at_100": 0.27555, + "ndcg_at_1000": 0.31755, + "ndcg_at_20": 0.24319, + "ndcg_at_3": 0.18619, + "ndcg_at_5": 0.20254, + "precision_at_1": 0.12871, + "precision_at_10": 0.03317, + "precision_at_100": 0.00587, + "precision_at_1000": 0.00093, + "precision_at_20": 0.02046, + "precision_at_3": 0.0755, + "precision_at_5": 0.0533, + "recall_at_1": 0.12871, + "recall_at_10": 0.33168, + "recall_at_100": 0.58746, + "recall_at_1000": 0.93441, + "recall_at_20": 0.40924, + "recall_at_3": 0.22649, + "recall_at_5": 0.2665 + } + ] + }, + "task_name": "MintakaRetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MultiHateClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MultiHateClassification.json new file mode 100644 index 0000000..4791703 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/MultiHateClassification.json @@ -0,0 +1,95 @@ +{ + "dataset_revision": "8f95949846bb9e33c6aaf730ccfdb8fe6bcfb7a9", + "evaluation_time": 3.0023152828216553, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.6253, + "ap": 0.35331834223362935, + "ap_weighted": 0.35331834223362935, + "f1": 0.5813968427602964, + "f1_weighted": 0.6343096217751171, + "hf_subset": "spa", + "languages": [ + "spa-Latn" + ], + "main_score": 0.6253, + "scores_per_experiment": [ + { + "accuracy": 0.584, + "ap": 0.31834577876845915, + "ap_weighted": 0.31834577876845915, + "f1": 0.5378517215023374, + "f1_weighted": 0.597435574752484 + }, + { + "accuracy": 0.657, + "ap": 0.37961623004078243, + "ap_weighted": 0.37961623004078243, + "f1": 0.6197469931742923, + "f1_weighted": 0.6683068231579625 + }, + { + "accuracy": 0.615, + "ap": 0.378066954528493, + "ap_weighted": 0.378066954528493, + "f1": 0.5994552591524474, + "f1_weighted": 0.6316494432428102 + }, + { + "accuracy": 0.592, + "ap": 0.339672510348186, + "ap_weighted": 0.339672510348186, + "f1": 0.5624195624195625, + "f1_weighted": 0.6088380952380953 + }, + { + "accuracy": 0.617, + "ap": 0.34544720539178975, + "ap_weighted": 0.34544720539178975, + "f1": 0.577146478447167, + "f1_weighted": 0.6301114191427887 + }, + { + "accuracy": 0.652, + "ap": 0.3639063113392722, + "ap_weighted": 0.3639063113392722, + "f1": 0.6046730373379501, + "f1_weighted": 0.6604805540030263 + }, + { + "accuracy": 0.623, + "ap": 0.34612377390445276, + "ap_weighted": 0.34612377390445276, + "f1": 0.579690933476335, + "f1_weighted": 0.6347379713008064 + }, + { + "accuracy": 0.589, + "ap": 0.37370868213346087, + "ap_weighted": 0.37370868213346087, + "f1": 0.5809026195115834, + "f1_weighted": 0.6046704701538425 + }, + { + "accuracy": 0.645, + "ap": 0.31248110669911616, + "ap_weighted": 0.31248110669911616, + "f1": 0.5310132359954607, + "f1_weighted": 0.6253471096543898 + }, + { + "accuracy": 0.679, + "ap": 0.37581486918228135, + "ap_weighted": 0.37581486918228135, + "f1": 0.621068586585828, + "f1_weighted": 0.681518757104964 + } + ] + } + ] + }, + "task_name": "MultiHateClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/PawsX.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/PawsX.json new file mode 100644 index 0000000..715415c --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/PawsX.json @@ -0,0 +1,127 @@ +{ + "dataset_revision": "8a04d940a42cd40658986fdd8e3da561533a3646", + "evaluation_time": 16.08146381378174, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine": { + "accuracy": 0.5925, + "accuracy_threshold": 0.9891663193702698, + "ap": 0.5729783665388318, + "f1": 0.6244406196213426, + "f1_threshold": 0.6571763753890991, + "precision": 0.45395395395395394, + "recall": 1.0 + }, + "dot": { + "accuracy": 0.5925, + "accuracy_threshold": 0.989166259765625, + "ap": 0.5729783665388318, + "f1": 0.6244406196213426, + "f1_threshold": 0.6571763753890991, + "precision": 0.45395395395395394, + "recall": 1.0 + }, + "euclidean": { + "accuracy": 0.5925, + "accuracy_threshold": 0.14719846844673157, + "ap": 0.5729891822904158, + "f1": 0.6244406196213426, + "f1_threshold": 0.827045738697052, + "precision": 0.45395395395395394, + "recall": 1.0 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.5735093608198505, + "manhattan": { + "accuracy": 0.5915, + "accuracy_threshold": 3.696669101715088, + "ap": 0.5735093608198505, + "f1": 0.6244406196213426, + "f1_threshold": 21.06389617919922, + "precision": 0.45395395395395394, + "recall": 1.0 + }, + "max": { + "accuracy": 0.5925, + "ap": 0.5735093608198505, + "f1": 0.6244406196213426 + }, + "similarity": { + "accuracy": 0.5925, + "accuracy_threshold": 0.989166259765625, + "ap": 0.5729818855905429, + "f1": 0.6244406196213426, + "f1_threshold": 0.6571764945983887, + "precision": 0.45395395395395394, + "recall": 1.0 + } + } + ], + "validation": [ + { + "cosine": { + "accuracy": 0.6285, + "accuracy_threshold": 0.9821840524673462, + "ap": 0.5614087965570115, + "f1": 0.6032763532763532, + "f1_threshold": 0.7300994396209717, + "precision": 0.43192248852626214, + "recall": 1.0 + }, + "dot": { + "accuracy": 0.6285, + "accuracy_threshold": 0.9821840524673462, + "ap": 0.5613806861392907, + "f1": 0.6032763532763532, + "f1_threshold": 0.7300993800163269, + "precision": 0.43192248852626214, + "recall": 1.0 + }, + "euclidean": { + "accuracy": 0.6285, + "accuracy_threshold": 0.1887643039226532, + "ap": 0.5614087965570115, + "f1": 0.6032763532763532, + "f1_threshold": 0.7347081899642944, + "precision": 0.43192248852626214, + "recall": 1.0 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.561588251951701, + "manhattan": { + "accuracy": 0.6275, + "accuracy_threshold": 4.7729034423828125, + "ap": 0.561588251951701, + "f1": 0.6034912718204489, + "f1_threshold": 18.49922752380371, + "precision": 0.43214285714285716, + "recall": 1.0 + }, + "max": { + "accuracy": 0.6285, + "ap": 0.561588251951701, + "f1": 0.6034912718204489 + }, + "similarity": { + "accuracy": 0.6285, + "accuracy_threshold": 0.9821840524673462, + "ap": 0.5610849900734205, + "f1": 0.6032763532763532, + "f1_threshold": 0.7300993800163269, + "precision": 0.43192248852626214, + "recall": 1.0 + } + } + ] + }, + "task_name": "PawsX" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/PublicHealthQA.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/PublicHealthQA.json new file mode 100644 index 0000000..8e70b5e --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/PublicHealthQA.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "main", + "evaluation_time": 29.330244064331055, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.79986, + "map_at_1": 0.66049, + "map_at_10": 0.75861, + "map_at_100": 0.76217, + "map_at_1000": 0.76217, + "map_at_20": 0.76085, + "map_at_3": 0.74177, + "map_at_5": 0.75226, + "mrr_at_1": 0.6604938271604939, + "mrr_at_10": 0.758612580834803, + "mrr_at_100": 0.7621733860200397, + "mrr_at_1000": 0.7621733860200397, + "mrr_at_20": 0.7608541321504283, + "mrr_at_3": 0.7417695473251028, + "mrr_at_5": 0.7522633744855967, + "nauc_map_at_1000_diff1": 0.6774619754625252, + "nauc_map_at_1000_max": 0.4236581592472692, + "nauc_map_at_1000_std": -0.08714783484175787, + "nauc_map_at_100_diff1": 0.6774619754625252, + "nauc_map_at_100_max": 0.4236581592472692, + "nauc_map_at_100_std": -0.08714783484175787, + "nauc_map_at_10_diff1": 0.6748903298278056, + "nauc_map_at_10_max": 0.4225312433072887, + "nauc_map_at_10_std": -0.08686429045815387, + "nauc_map_at_1_diff1": 0.7064052409698545, + "nauc_map_at_1_max": 0.4342494609664663, + "nauc_map_at_1_std": -0.07607275266362531, + "nauc_map_at_20_diff1": 0.6775511135059535, + "nauc_map_at_20_max": 0.4251292390484833, + "nauc_map_at_20_std": -0.08840174714571644, + "nauc_map_at_3_diff1": 0.6744129638828698, + "nauc_map_at_3_max": 0.417797107518186, + "nauc_map_at_3_std": -0.0885677543135244, + "nauc_map_at_5_diff1": 0.6731851323787795, + "nauc_map_at_5_max": 0.41858047576563845, + "nauc_map_at_5_std": -0.08788180458237041, + "nauc_mrr_at_1000_diff1": 0.6774619754625252, + "nauc_mrr_at_1000_max": 0.4236581592472692, + "nauc_mrr_at_1000_std": -0.08714783484175787, + "nauc_mrr_at_100_diff1": 0.6774619754625252, + "nauc_mrr_at_100_max": 0.4236581592472692, + "nauc_mrr_at_100_std": -0.08714783484175787, + "nauc_mrr_at_10_diff1": 0.6748903298278056, + "nauc_mrr_at_10_max": 0.4225312433072887, + "nauc_mrr_at_10_std": -0.08686429045815387, + "nauc_mrr_at_1_diff1": 0.7064052409698545, + "nauc_mrr_at_1_max": 0.4342494609664663, + "nauc_mrr_at_1_std": -0.07607275266362531, + "nauc_mrr_at_20_diff1": 0.6775511135059535, + "nauc_mrr_at_20_max": 0.4251292390484833, + "nauc_mrr_at_20_std": -0.08840174714571644, + "nauc_mrr_at_3_diff1": 0.6744129638828698, + "nauc_mrr_at_3_max": 0.417797107518186, + "nauc_mrr_at_3_std": -0.0885677543135244, + "nauc_mrr_at_5_diff1": 0.6731851323787795, + "nauc_mrr_at_5_max": 0.41858047576563845, + "nauc_mrr_at_5_std": -0.08788180458237041, + "nauc_ndcg_at_1000_diff1": 0.6757905695030055, + "nauc_ndcg_at_1000_max": 0.42679039390447276, + "nauc_ndcg_at_1000_std": -0.08953081840047505, + "nauc_ndcg_at_100_diff1": 0.6757905695030055, + "nauc_ndcg_at_100_max": 0.42679039390447276, + "nauc_ndcg_at_100_std": -0.08953081840047505, + "nauc_ndcg_at_10_diff1": 0.6678585315282368, + "nauc_ndcg_at_10_max": 0.42971172804058005, + "nauc_ndcg_at_10_std": -0.09124694767206765, + "nauc_ndcg_at_1_diff1": 0.7064052409698545, + "nauc_ndcg_at_1_max": 0.4342494609664663, + "nauc_ndcg_at_1_std": -0.07607275266362531, + "nauc_ndcg_at_20_diff1": 0.6766853993287619, + "nauc_ndcg_at_20_max": 0.4379399080889285, + "nauc_ndcg_at_20_std": -0.10065852812661286, + "nauc_ndcg_at_3_diff1": 0.6674759617755959, + "nauc_ndcg_at_3_max": 0.4191587801756736, + "nauc_ndcg_at_3_std": -0.09301620135869994, + "nauc_ndcg_at_5_diff1": 0.6645402441922457, + "nauc_ndcg_at_5_max": 0.4211343056226352, + "nauc_ndcg_at_5_std": -0.09138637755826204, + "nauc_precision_at_1000_diff1": 1.0, + "nauc_precision_at_1000_max": 1.0, + "nauc_precision_at_1000_std": 1.0, + "nauc_precision_at_100_diff1": NaN, + "nauc_precision_at_100_max": NaN, + "nauc_precision_at_100_std": NaN, + "nauc_precision_at_10_diff1": 0.6170970825585262, + "nauc_precision_at_10_max": 0.49874689435679553, + "nauc_precision_at_10_std": -0.12963008497867104, + "nauc_precision_at_1_diff1": 0.7064052409698545, + "nauc_precision_at_1_max": 0.4342494609664663, + "nauc_precision_at_1_std": -0.07607275266362531, + "nauc_precision_at_20_diff1": 0.6944620275438194, + "nauc_precision_at_20_max": 0.6445936812192934, + "nauc_precision_at_20_std": -0.3196232555015065, + "nauc_precision_at_3_diff1": 0.6410597399431719, + "nauc_precision_at_3_max": 0.4285180505795624, + "nauc_precision_at_3_std": -0.11142865556400589, + "nauc_precision_at_5_diff1": 0.6213273205999843, + "nauc_precision_at_5_max": 0.44046332547202827, + "nauc_precision_at_5_std": -0.10859453314531092, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": NaN, + "nauc_recall_at_100_max": NaN, + "nauc_recall_at_100_std": NaN, + "nauc_recall_at_10_diff1": 0.6170970825585277, + "nauc_recall_at_10_max": 0.49874689435679687, + "nauc_recall_at_10_std": -0.1296300849786717, + "nauc_recall_at_1_diff1": 0.7064052409698545, + "nauc_recall_at_1_max": 0.4342494609664663, + "nauc_recall_at_1_std": -0.07607275266362531, + "nauc_recall_at_20_diff1": 0.6944620275438232, + "nauc_recall_at_20_max": 0.6445936812192942, + "nauc_recall_at_20_std": -0.3196232555015088, + "nauc_recall_at_3_diff1": 0.6410597399431729, + "nauc_recall_at_3_max": 0.4285180505795632, + "nauc_recall_at_3_std": -0.11142865556400469, + "nauc_recall_at_5_diff1": 0.6213273205999849, + "nauc_recall_at_5_max": 0.4404633254720291, + "nauc_recall_at_5_std": -0.10859453314531155, + "ndcg_at_1": 0.66049, + "ndcg_at_10": 0.79986, + "ndcg_at_100": 0.81627, + "ndcg_at_1000": 0.81627, + "ndcg_at_20": 0.80778, + "ndcg_at_3": 0.76697, + "ndcg_at_5": 0.78531, + "precision_at_1": 0.66049, + "precision_at_10": 0.09259, + "precision_at_100": 0.01, + "precision_at_1000": 0.001, + "precision_at_20": 0.04784, + "precision_at_3": 0.27984, + "precision_at_5": 0.17654, + "recall_at_1": 0.66049, + "recall_at_10": 0.92593, + "recall_at_100": 1.0, + "recall_at_1000": 1.0, + "recall_at_20": 0.95679, + "recall_at_3": 0.83951, + "recall_at_5": 0.88272 + } + ] + }, + "task_name": "PublicHealthQA" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SIB200Classification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SIB200Classification.json new file mode 100644 index 0000000..a4a38d4 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SIB200Classification.json @@ -0,0 +1,201 @@ +{ + "dataset_revision": "a74d7350ea12af010cfb1c21e34f1f81fd2e615b", + "evaluation_time": 9.380834102630615, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.7313725490196079, + "f1": 0.7158860950686685, + "f1_weighted": 0.7294810195233961, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7313725490196079, + "scores_per_experiment": [ + { + "accuracy": 0.7205882352941176, + "f1": 0.698876666164186, + "f1_weighted": 0.7193281399554278 + }, + { + "accuracy": 0.7401960784313726, + "f1": 0.7248190002467382, + "f1_weighted": 0.7407413387059063 + }, + { + "accuracy": 0.7009803921568627, + "f1": 0.6932738108834562, + "f1_weighted": 0.6889686878986371 + }, + { + "accuracy": 0.7009803921568627, + "f1": 0.688702750260041, + "f1_weighted": 0.7007311357160484 + }, + { + "accuracy": 0.7696078431372549, + "f1": 0.7720180553424376, + "f1_weighted": 0.7710889033976164 + }, + { + "accuracy": 0.7303921568627451, + "f1": 0.7089918984772579, + "f1_weighted": 0.7325520311271134 + }, + { + "accuracy": 0.7058823529411765, + "f1": 0.6863066948279117, + "f1_weighted": 0.7071244708179001 + }, + { + "accuracy": 0.7647058823529411, + "f1": 0.7468146804049587, + "f1_weighted": 0.7615657666401597 + }, + { + "accuracy": 0.7156862745098039, + "f1": 0.696087194998397, + "f1_weighted": 0.7111704302074016 + }, + { + "accuracy": 0.7647058823529411, + "f1": 0.7429701990813002, + "f1_weighted": 0.76153929076775 + } + ] + } + ], + "train": [ + { + "accuracy": 0.7189728958630528, + "f1": 0.7044454457497162, + "f1_weighted": 0.7186698842984296, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7189728958630528, + "scores_per_experiment": [ + { + "accuracy": 0.7132667617689016, + "f1": 0.6927333719048233, + "f1_weighted": 0.715556401353179 + }, + { + "accuracy": 0.7303851640513552, + "f1": 0.7085804545233062, + "f1_weighted": 0.7284398611837252 + }, + { + "accuracy": 0.7061340941512125, + "f1": 0.6936285279699883, + "f1_weighted": 0.7002274765640324 + }, + { + "accuracy": 0.7275320970042796, + "f1": 0.7145960216175439, + "f1_weighted": 0.7269980485211858 + }, + { + "accuracy": 0.7232524964336662, + "f1": 0.7133043652086116, + "f1_weighted": 0.7213114631954249 + }, + { + "accuracy": 0.7275320970042796, + "f1": 0.7202057481752846, + "f1_weighted": 0.731774779581867 + }, + { + "accuracy": 0.7146932952924394, + "f1": 0.6985620191817462, + "f1_weighted": 0.7113943910736094 + }, + { + "accuracy": 0.6947218259629101, + "f1": 0.6799291601843608, + "f1_weighted": 0.697107829338363 + }, + { + "accuracy": 0.7118402282453637, + "f1": 0.6987834120088381, + "f1_weighted": 0.7130663587315121 + }, + { + "accuracy": 0.7403708987161198, + "f1": 0.7241313767226594, + "f1_weighted": 0.7408222334413973 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.7181818181818181, + "f1": 0.6990759625244847, + "f1_weighted": 0.7210130168057647, + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7181818181818181, + "scores_per_experiment": [ + { + "accuracy": 0.7070707070707071, + "f1": 0.6920516800508415, + "f1_weighted": 0.7197167261241607 + }, + { + "accuracy": 0.7777777777777778, + "f1": 0.7568592154518928, + "f1_weighted": 0.7766572430575158 + }, + { + "accuracy": 0.6565656565656566, + "f1": 0.6364879207983173, + "f1_weighted": 0.6433078918665838 + }, + { + "accuracy": 0.8080808080808081, + "f1": 0.7945763397661398, + "f1_weighted": 0.8081888895501698 + }, + { + "accuracy": 0.7474747474747475, + "f1": 0.7183350135752882, + "f1_weighted": 0.7474879049249759 + }, + { + "accuracy": 0.6565656565656566, + "f1": 0.6351516546468211, + "f1_weighted": 0.6689783622114448 + }, + { + "accuracy": 0.7373737373737373, + "f1": 0.7261363262916057, + "f1_weighted": 0.7376607374960472 + }, + { + "accuracy": 0.6767676767676768, + "f1": 0.6496268810554525, + "f1_weighted": 0.6774387453175332 + }, + { + "accuracy": 0.6464646464646465, + "f1": 0.6342330855347903, + "f1_weighted": 0.6584786714296429 + }, + { + "accuracy": 0.7676767676767676, + "f1": 0.7473015080736974, + "f1_weighted": 0.7722149960795729 + } + ] + } + ] + }, + "task_name": "SIB200Classification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SIB200ClusteringS2S.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SIB200ClusteringS2S.json new file mode 100644 index 0000000..b7e3416 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SIB200ClusteringS2S.json @@ -0,0 +1,33 @@ +{ + "dataset_revision": "a74d7350ea12af010cfb1c21e34f1f81fd2e615b", + "evaluation_time": 24.67245888710022, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spa_Latn", + "languages": [ + "spa-Latn" + ], + "main_score": 0.34998316595531576, + "v_measure": 0.34998316595531576, + "v_measures": { + "Level 0": [ + 0.3443452686541507, + 0.34394969767236816, + 0.42680313302599254, + 0.35232038009471517, + 0.3190133318169187, + 0.31193223476738957, + 0.3751270924476364, + 0.38288136249720717, + 0.2935564975845718, + 0.3499026609922071 + ] + } + } + ] + }, + "task_name": "SIB200ClusteringS2S" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STS17.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STS17.json new file mode 100644 index 0000000..dfcb417 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STS17.json @@ -0,0 +1,54 @@ +{ + "dataset_revision": "faeb762787bd10488a50c8b5be4a3b82e411949c", + "evaluation_time": 1.2508361339569092, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine_pearson": 0.7494548211539521, + "cosine_spearman": 0.7557298844031564, + "euclidean_pearson": 0.7548137411165575, + "euclidean_spearman": 0.7557298844031564, + "hf_subset": "es-en", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.7557298844031564, + "manhattan_pearson": 0.7557228083491458, + "manhattan_spearman": 0.7569408439770529, + "pearson": [ + 0.7494548096529067, + 2.5703763238667655e-46 + ], + "spearman": [ + 0.7557298844031564, + 1.711164569770582e-47 + ] + }, + { + "cosine_pearson": 0.8770418044924028, + "cosine_spearman": 0.8735156107182851, + "euclidean_pearson": 0.8865508027039452, + "euclidean_spearman": 0.8735122563582298, + "hf_subset": "es-es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8735156107182851, + "manhattan_pearson": 0.8858136974063854, + "manhattan_spearman": 0.8716684770763752, + "pearson": [ + 0.877041838387769, + 6.330228394912483e-81 + ], + "spearman": [ + 0.8735033141891, + 1.696062877951556e-79 + ] + } + ] + }, + "task_name": "STS17" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STSBenchmarkMultilingualSTS.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STSBenchmarkMultilingualSTS.json new file mode 100644 index 0000000..57068f4 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STSBenchmarkMultilingualSTS.json @@ -0,0 +1,55 @@ +{ + "dataset_revision": "29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c", + "evaluation_time": 26.43315863609314, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "dev": [ + { + "cosine_pearson": 0.8454934704653636, + "cosine_spearman": 0.8468700424822017, + "euclidean_pearson": 0.8417114425601384, + "euclidean_spearman": 0.8468700409766542, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8468700424822017, + "manhattan_pearson": 0.8405075955393287, + "manhattan_spearman": 0.8455413229790223, + "pearson": [ + 0.8454934713283186, + 0.0 + ], + "spearman": [ + 0.8468700409766542, + 0.0 + ] + } + ], + "test": [ + { + "cosine_pearson": 0.8203573598224699, + "cosine_spearman": 0.8247123575339489, + "euclidean_pearson": 0.815933918995886, + "euclidean_spearman": 0.8247154217328009, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8247123575339489, + "manhattan_pearson": 0.8145366310497997, + "manhattan_spearman": 0.8232887455960749, + "pearson": [ + 0.8203573527931765, + 0.0 + ], + "spearman": [ + 0.8247171583712931, + 0.0 + ] + } + ] + }, + "task_name": "STSBenchmarkMultilingualSTS" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STSES.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STSES.json new file mode 100644 index 0000000..5f736f3 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/STSES.json @@ -0,0 +1,32 @@ +{ + "dataset_revision": "0912bb6c9393c76d62a7c5ee81c4c817ff47c9f4", + "evaluation_time": 1.0742108821868896, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine_pearson": 0.815644709155301, + "cosine_spearman": 0.7743978294342545, + "euclidean_pearson": 0.810947871906862, + "euclidean_spearman": 0.7743978294342545, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.7743978294342545, + "manhattan_pearson": 0.8067157450347956, + "manhattan_spearman": 0.7708116371930783, + "pearson": [ + 0.8156447268453755, + 3.423713999369678e-38 + ], + "spearman": [ + 0.7743978294342545, + 3.171257065149321e-32 + ] + } + ] + }, + "task_name": "STSES" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SpanishPassageRetrievalS2S.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SpanishPassageRetrievalS2S.json new file mode 100644 index 0000000..2895474 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SpanishPassageRetrievalS2S.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "9cddf2ce5209ade52c2115ccfa00eb22c6d3a837", + "evaluation_time": 22.305843830108643, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.70373, + "map_at_1": 0.15854, + "map_at_10": 0.51967, + "map_at_100": 0.63525, + "map_at_1000": 0.63531, + "map_at_20": 0.58813, + "map_at_3": 0.31737, + "map_at_5": 0.40325, + "mrr_at_1": 0.7544910179640718, + "mrr_at_10": 0.8404191616766465, + "mrr_at_100": 0.8411401277712133, + "mrr_at_1000": 0.8411401277712133, + "mrr_at_20": 0.8408797789037309, + "mrr_at_3": 0.8253493013972055, + "mrr_at_5": 0.8388223552894211, + "nauc_map_at_1000_diff1": 0.34780986840999617, + "nauc_map_at_1000_max": 0.22237047642628488, + "nauc_map_at_1000_std": -0.12575120173953228, + "nauc_map_at_100_diff1": 0.3478680904367282, + "nauc_map_at_100_max": 0.22258955018438925, + "nauc_map_at_100_std": -0.12547095494706675, + "nauc_map_at_10_diff1": 0.4769298756809457, + "nauc_map_at_10_max": 0.19339875263389794, + "nauc_map_at_10_std": -0.3097969413847308, + "nauc_map_at_1_diff1": 0.7076811362225229, + "nauc_map_at_1_max": 0.1639273306318211, + "nauc_map_at_1_std": -0.4324695301980782, + "nauc_map_at_20_diff1": 0.40151426836703374, + "nauc_map_at_20_max": 0.2066749474747903, + "nauc_map_at_20_std": -0.19478867203413594, + "nauc_map_at_3_diff1": 0.6641796208585916, + "nauc_map_at_3_max": 0.13852784043706112, + "nauc_map_at_3_std": -0.39966924788334546, + "nauc_map_at_5_diff1": 0.5773954975542298, + "nauc_map_at_5_max": 0.12174083915916546, + "nauc_map_at_5_std": -0.35953257570961594, + "nauc_mrr_at_1000_diff1": 0.4667117035242082, + "nauc_mrr_at_1000_max": 0.4107438706780414, + "nauc_mrr_at_1000_std": -0.17866901602604005, + "nauc_mrr_at_100_diff1": 0.4667117035242082, + "nauc_mrr_at_100_max": 0.4107438706780414, + "nauc_mrr_at_100_std": -0.17866901602604005, + "nauc_mrr_at_10_diff1": 0.4679439410265647, + "nauc_mrr_at_10_max": 0.4132493696110517, + "nauc_mrr_at_10_std": -0.18058301452301814, + "nauc_mrr_at_1_diff1": 0.4609036624080412, + "nauc_mrr_at_1_max": 0.4011211225755551, + "nauc_mrr_at_1_std": -0.1900189979001662, + "nauc_mrr_at_20_diff1": 0.4676690947558237, + "nauc_mrr_at_20_max": 0.41180173873011877, + "nauc_mrr_at_20_std": -0.1793623774521881, + "nauc_mrr_at_3_diff1": 0.4775428572163414, + "nauc_mrr_at_3_max": 0.4138505586310526, + "nauc_mrr_at_3_std": -0.19043898646219992, + "nauc_mrr_at_5_diff1": 0.467349238059751, + "nauc_mrr_at_5_max": 0.3992261886630868, + "nauc_mrr_at_5_std": -0.18634513854121443, + "nauc_ndcg_at_1000_diff1": 0.3378247949952837, + "nauc_ndcg_at_1000_max": 0.2764497665987558, + "nauc_ndcg_at_1000_std": -0.07543513586371473, + "nauc_ndcg_at_100_diff1": 0.3382858645749299, + "nauc_ndcg_at_100_max": 0.2779895997848291, + "nauc_ndcg_at_100_std": -0.07342518176520252, + "nauc_ndcg_at_10_diff1": 0.3899729632422709, + "nauc_ndcg_at_10_max": 0.26261313388869745, + "nauc_ndcg_at_10_std": -0.17915099579503663, + "nauc_ndcg_at_1_diff1": 0.4609036624080412, + "nauc_ndcg_at_1_max": 0.4011211225755551, + "nauc_ndcg_at_1_std": -0.1900189979001662, + "nauc_ndcg_at_20_diff1": 0.4091923698924338, + "nauc_ndcg_at_20_max": 0.2392260031511339, + "nauc_ndcg_at_20_std": -0.176726253145597, + "nauc_ndcg_at_3_diff1": 0.2681887882910025, + "nauc_ndcg_at_3_max": 0.2657146436919544, + "nauc_ndcg_at_3_std": -0.03533754944326467, + "nauc_ndcg_at_5_diff1": 0.30959772405929786, + "nauc_ndcg_at_5_max": 0.24672056568274603, + "nauc_ndcg_at_5_std": -0.0698637034606598, + "nauc_precision_at_1000_diff1": -0.4157370458610295, + "nauc_precision_at_1000_max": 0.04991803551955242, + "nauc_precision_at_1000_std": 0.42387507985629497, + "nauc_precision_at_100_diff1": -0.4147694032210082, + "nauc_precision_at_100_max": 0.05126928425733801, + "nauc_precision_at_100_std": 0.42486192584115157, + "nauc_precision_at_10_diff1": -0.4168532761764801, + "nauc_precision_at_10_max": 0.15486716158833408, + "nauc_precision_at_10_std": 0.36524527190025147, + "nauc_precision_at_1_diff1": 0.4609036624080412, + "nauc_precision_at_1_max": 0.4011211225755551, + "nauc_precision_at_1_std": -0.1900189979001662, + "nauc_precision_at_20_diff1": -0.40420304646890826, + "nauc_precision_at_20_max": 0.07423470631588905, + "nauc_precision_at_20_std": 0.4172969899844053, + "nauc_precision_at_3_diff1": -0.001969301754427926, + "nauc_precision_at_3_max": 0.1846471055080392, + "nauc_precision_at_3_std": 0.14916997977440735, + "nauc_precision_at_5_diff1": -0.2951153770711459, + "nauc_precision_at_5_max": 0.11901343884058992, + "nauc_precision_at_5_std": 0.3053281318232279, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": 0.5544761861882196, + "nauc_recall_at_100_max": 1.0, + "nauc_recall_at_100_std": 0.8690196078430636, + "nauc_recall_at_10_diff1": 0.47528545780774856, + "nauc_recall_at_10_max": 0.1635934001342106, + "nauc_recall_at_10_std": -0.4065361897631217, + "nauc_recall_at_1_diff1": 0.7076811362225229, + "nauc_recall_at_1_max": 0.1639273306318211, + "nauc_recall_at_1_std": -0.4324695301980782, + "nauc_recall_at_20_diff1": 0.45345202228741366, + "nauc_recall_at_20_max": 0.15309688151967915, + "nauc_recall_at_20_std": -0.28361168437162326, + "nauc_recall_at_3_diff1": 0.6531549959201955, + "nauc_recall_at_3_max": 0.10033116888202623, + "nauc_recall_at_3_std": -0.432775458386401, + "nauc_recall_at_5_diff1": 0.5421383865805801, + "nauc_recall_at_5_max": 0.059072564149629685, + "nauc_recall_at_5_std": -0.4110006532379634, + "ndcg_at_1": 0.75449, + "ndcg_at_10": 0.70373, + "ndcg_at_100": 0.81399, + "ndcg_at_1000": 0.81423, + "ndcg_at_20": 0.74139, + "ndcg_at_3": 0.68539, + "ndcg_at_5": 0.682, + "precision_at_1": 0.75449, + "precision_at_10": 0.44132, + "precision_at_100": 0.07713, + "precision_at_1000": 0.00772, + "precision_at_20": 0.29641, + "precision_at_3": 0.63074, + "precision_at_5": 0.55928, + "recall_at_1": 0.15854, + "recall_at_10": 0.68702, + "recall_at_100": 0.99914, + "recall_at_1000": 1.0, + "recall_at_20": 0.82174, + "recall_at_3": 0.35747, + "recall_at_5": 0.48718 + } + ] + }, + "task_name": "SpanishPassageRetrievalS2S" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SpanishSentimentClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SpanishSentimentClassification.json new file mode 100644 index 0000000..23fd303 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/SpanishSentimentClassification.json @@ -0,0 +1,181 @@ +{ + "dataset_revision": "2a6e340e4b59b7c0a78c03a0b79ac27e1b4a2662", + "evaluation_time": 45.127585649490356, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.9395270270270271, + "ap": 0.96762964734462, + "ap_weighted": 0.96762964734462, + "f1": 0.9013317184449618, + "f1_weighted": 0.9410572325708759, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.9395270270270271, + "scores_per_experiment": [ + { + "accuracy": 0.9594594594594594, + "ap": 0.9622826761187416, + "ap_weighted": 0.9622826761187416, + "f1": 0.9266297612162273, + "f1_weighted": 0.9584646201187554 + }, + { + "accuracy": 0.9358108108108109, + "ap": 0.9603816710391093, + "ap_weighted": 0.9603816710391093, + "f1": 0.8916273244050487, + "f1_weighted": 0.9365121359918547 + }, + { + "accuracy": 0.9527027027027027, + "ap": 0.9577758085954806, + "ap_weighted": 0.9577758085954806, + "f1": 0.9144013880855986, + "f1_weighted": 0.9515420568052146 + }, + { + "accuracy": 0.9527027027027027, + "ap": 0.9639594612605409, + "ap_weighted": 0.9639594612605409, + "f1": 0.9170868347338936, + "f1_weighted": 0.952335528806117 + }, + { + "accuracy": 0.9391891891891891, + "ap": 0.9803548380882665, + "ap_weighted": 0.9803548380882665, + "f1": 0.9047414717871701, + "f1_weighted": 0.9418985602208086 + }, + { + "accuracy": 0.9324324324324325, + "ap": 0.9789154987601923, + "ap_weighted": 0.9789154987601923, + "f1": 0.8954802259887006, + "f1_weighted": 0.9357917239273171 + }, + { + "accuracy": 0.956081081081081, + "ap": 0.9806607292081456, + "ap_weighted": 0.9806607292081456, + "f1": 0.927927927927928, + "f1_weighted": 0.957146335524714 + }, + { + "accuracy": 0.8851351351351351, + "ap": 0.9721615560812901, + "ap_weighted": 0.9721615560812901, + "f1": 0.8377192982456141, + "f1_weighted": 0.8946183025130392 + }, + { + "accuracy": 0.9290540540540541, + "ap": 0.9589508141106331, + "ap_weighted": 0.9589508141106331, + "f1": 0.8819305943358596, + "f1_weighted": 0.9303140396080165 + }, + { + "accuracy": 0.9527027027027027, + "ap": 0.960853420183801, + "ap_weighted": 0.960853420183801, + "f1": 0.9157723577235772, + "f1_weighted": 0.9519490221929245 + } + ] + } + ], + "validation": [ + { + "accuracy": 0.9482993197278912, + "ap": 0.9795397355802565, + "ap_weighted": 0.9795397355802565, + "f1": 0.9211470172533256, + "f1_weighted": 0.9509377510008991, + "hf_subset": "default", + "languages": [ + "spa-Latn" + ], + "main_score": 0.9482993197278912, + "scores_per_experiment": [ + { + "accuracy": 0.9795918367346939, + "ap": 0.9888809242705346, + "ap_weighted": 0.9888809242705346, + "f1": 0.9654740468175057, + "f1_weighted": 0.9797420259891321 + }, + { + "accuracy": 0.9795918367346939, + "ap": 0.9888809242705346, + "ap_weighted": 0.9888809242705346, + "f1": 0.9654740468175057, + "f1_weighted": 0.9797420259891321 + }, + { + "accuracy": 0.9659863945578231, + "ap": 0.9728928016132614, + "ap_weighted": 0.9728928016132614, + "f1": 0.9406923263132414, + "f1_weighted": 0.9657229146802755 + }, + { + "accuracy": 0.9727891156462585, + "ap": 0.987419761292947, + "ap_weighted": 0.987419761292947, + "f1": 0.9546296296296296, + "f1_weighted": 0.9731796422272612 + }, + { + "accuracy": 0.9659863945578231, + "ap": 0.9859586081245778, + "ap_weighted": 0.9859586081245778, + "f1": 0.9440852034994294, + "f1_weighted": 0.9667005638314663 + }, + { + "accuracy": 0.8843537414965986, + "ap": 0.9618510397731177, + "ap_weighted": 0.9618510397731177, + "f1": 0.8321128653006382, + "f1_weighted": 0.8926358316252265 + }, + { + "accuracy": 0.9523809523809523, + "ap": 0.9830363322302418, + "ap_weighted": 0.9830363322302418, + "f1": 0.9238285587386187, + "f1_weighted": 0.9539671964721931 + }, + { + "accuracy": 0.8435374149659864, + "ap": 0.9530985948098247, + "ap_weighted": 0.9530985948098247, + "f1": 0.7864586622876271, + "f1_weighted": 0.8578071031355763 + }, + { + "accuracy": 0.9727891156462585, + "ap": 0.987419761292947, + "ap_weighted": 0.987419761292947, + "f1": 0.9546296296296296, + "f1_weighted": 0.9731796422272612 + }, + { + "accuracy": 0.9659863945578231, + "ap": 0.9859586081245778, + "ap_weighted": 0.9859586081245778, + "f1": 0.9440852034994294, + "f1_weighted": 0.9667005638314663 + } + ] + } + ] + }, + "task_name": "SpanishSentimentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/Tatoeba.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/Tatoeba.json new file mode 100644 index 0000000..3716b37 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/Tatoeba.json @@ -0,0 +1,23 @@ +{ + "dataset_revision": "69e8f12da6e31d59addadda9a9c8a2e601a0e282", + "evaluation_time": 1.955474853515625, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.982, + "f1": 0.9761666666666667, + "hf_subset": "spa-eng", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.9761666666666667, + "precision": 0.9733333333333334, + "recall": 0.982 + } + ] + }, + "task_name": "Tatoeba" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/TweetSentimentClassification.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/TweetSentimentClassification.json new file mode 100644 index 0000000..aab195b --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/TweetSentimentClassification.json @@ -0,0 +1,73 @@ +{ + "dataset_revision": "d522bb117c32f5e0207344f69f7075fc9941168b", + "evaluation_time": 1.3757116794586182, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "accuracy": 0.54765625, + "f1": 0.5330622374595151, + "f1_weighted": 0.5324301455861884, + "hf_subset": "spanish", + "languages": [ + "spa-Latn" + ], + "main_score": 0.54765625, + "scores_per_experiment": [ + { + "accuracy": 0.53125, + "f1": 0.5219718092852421, + "f1_weighted": 0.5214625771849419 + }, + { + "accuracy": 0.578125, + "f1": 0.5692276454953941, + "f1_weighted": 0.5685870248417789 + }, + { + "accuracy": 0.55078125, + "f1": 0.5214820956943677, + "f1_weighted": 0.520552798795375 + }, + { + "accuracy": 0.40625, + "f1": 0.3849672980740942, + "f1_weighted": 0.3846176388841741 + }, + { + "accuracy": 0.6015625, + "f1": 0.5798869628366644, + "f1_weighted": 0.5791264090177134 + }, + { + "accuracy": 0.4921875, + "f1": 0.4740495694841133, + "f1_weighted": 0.473291563353316 + }, + { + "accuracy": 0.60546875, + "f1": 0.5914882108604925, + "f1_weighted": 0.5908166960508048 + }, + { + "accuracy": 0.5546875, + "f1": 0.5476255423063933, + "f1_weighted": 0.547038864006118 + }, + { + "accuracy": 0.5546875, + "f1": 0.5435550556032483, + "f1_weighted": 0.542962621370001 + }, + { + "accuracy": 0.6015625, + "f1": 0.5963681849551414, + "f1_weighted": 0.5958452623576604 + } + ] + } + ] + }, + "task_name": "TweetSentimentClassification" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XNLI.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XNLI.json new file mode 100644 index 0000000..107bc15 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XNLI.json @@ -0,0 +1,127 @@ +{ + "dataset_revision": "09698e0180d87dc247ca447d3a1248b931ac0cdb", + "evaluation_time": 7.478406190872192, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "cosine": { + "accuracy": 0.7604395604395604, + "accuracy_threshold": 0.6557025909423828, + "ap": 0.8076605260070395, + "f1": 0.7773279352226721, + "f1_threshold": 0.6379867792129517, + "precision": 0.72, + "recall": 0.844574780058651 + }, + "dot": { + "accuracy": 0.7604395604395604, + "accuracy_threshold": 0.6557024717330933, + "ap": 0.8076605260070395, + "f1": 0.7773279352226721, + "f1_threshold": 0.6379867792129517, + "precision": 0.72, + "recall": 0.844574780058651 + }, + "euclidean": { + "accuracy": 0.7604395604395604, + "accuracy_threshold": 0.8298161625862122, + "ap": 0.8076605260070395, + "f1": 0.7773279352226721, + "f1_threshold": 0.8508973717689514, + "precision": 0.72, + "recall": 0.844574780058651 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.8076605260070395, + "manhattan": { + "accuracy": 0.7582417582417582, + "accuracy_threshold": 20.810562133789062, + "ap": 0.8073691355948076, + "f1": 0.7731204258150365, + "f1_threshold": 21.702495574951172, + "precision": 0.707673568818514, + "recall": 0.8519061583577713 + }, + "max": { + "accuracy": 0.7604395604395604, + "ap": 0.8076605260070395, + "f1": 0.7773279352226721 + }, + "similarity": { + "accuracy": 0.7604395604395604, + "accuracy_threshold": 0.6557024717330933, + "ap": 0.8076605260070395, + "f1": 0.7773279352226721, + "f1_threshold": 0.6379868984222412, + "precision": 0.72, + "recall": 0.844574780058651 + } + } + ], + "validation": [ + { + "cosine": { + "accuracy": 0.7721611721611722, + "accuracy_threshold": 0.674301028251648, + "ap": 0.83101273867086, + "f1": 0.7692307692307693, + "f1_threshold": 0.6320723295211792, + "precision": 0.7125, + "recall": 0.8357771260997068 + }, + "dot": { + "accuracy": 0.7721611721611722, + "accuracy_threshold": 0.674301028251648, + "ap": 0.83101273867086, + "f1": 0.7692307692307693, + "f1_threshold": 0.6320723295211792, + "precision": 0.7125, + "recall": 0.8357771260997068 + }, + "euclidean": { + "accuracy": 0.7721611721611722, + "accuracy_threshold": 0.8070923089981079, + "ap": 0.83101273867086, + "f1": 0.7692307692307693, + "f1_threshold": 0.8578200936317444, + "precision": 0.7125, + "recall": 0.8357771260997068 + }, + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.83101273867086, + "manhattan": { + "accuracy": 0.7648351648351648, + "accuracy_threshold": 20.66407012939453, + "ap": 0.8295791594338434, + "f1": 0.7671043538355218, + "f1_threshold": 21.42278480529785, + "precision": 0.7254901960784313, + "recall": 0.8137829912023461 + }, + "max": { + "accuracy": 0.7721611721611722, + "ap": 0.83101273867086, + "f1": 0.7692307692307693 + }, + "similarity": { + "accuracy": 0.7721611721611722, + "accuracy_threshold": 0.6743011474609375, + "ap": 0.83101273867086, + "f1": 0.7692307692307693, + "f1_threshold": 0.6320723295211792, + "precision": 0.7125, + "recall": 0.8357771260997068 + } + } + ] + }, + "task_name": "XNLI" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XPQARetrieval.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XPQARetrieval.json new file mode 100644 index 0000000..f29bc9d --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XPQARetrieval.json @@ -0,0 +1,455 @@ +{ + "dataset_revision": "c99d599f0a6ab9b85b065da6f9d94f9cf731679f", + "evaluation_time": 39.83676791191101, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "test": [ + { + "hf_subset": "spa-spa", + "languages": [ + "spa-Latn", + "spa-Latn" + ], + "main_score": 0.62131, + "map_at_1": 0.30767, + "map_at_10": 0.55365, + "map_at_100": 0.57054, + "map_at_1000": 0.57147, + "map_at_20": 0.5639, + "map_at_3": 0.47682, + "map_at_5": 0.52589, + "mrr_at_1": 0.5825977301387137, + "mrr_at_10": 0.6588762785484096, + "mrr_at_100": 0.6647916594509349, + "mrr_at_1000": 0.6650319822384685, + "mrr_at_20": 0.6629959048883439, + "mrr_at_3": 0.6397646069777213, + "mrr_at_5": 0.6515552753257667, + "nauc_map_at_1000_diff1": 0.4358346558983322, + "nauc_map_at_1000_max": 0.45433756058062036, + "nauc_map_at_1000_std": -0.12543865212024713, + "nauc_map_at_100_diff1": 0.43571116691176875, + "nauc_map_at_100_max": 0.4542784044743202, + "nauc_map_at_100_std": -0.12574122976030602, + "nauc_map_at_10_diff1": 0.43300540933795, + "nauc_map_at_10_max": 0.44949008537769325, + "nauc_map_at_10_std": -0.12870689577178945, + "nauc_map_at_1_diff1": 0.5087981028915581, + "nauc_map_at_1_max": 0.184279816635002, + "nauc_map_at_1_std": -0.14673155609048116, + "nauc_map_at_20_diff1": 0.4333753103703655, + "nauc_map_at_20_max": 0.4529815237337824, + "nauc_map_at_20_std": -0.12654439180812416, + "nauc_map_at_3_diff1": 0.4414869765929918, + "nauc_map_at_3_max": 0.35655936086351914, + "nauc_map_at_3_std": -0.13507918419506287, + "nauc_map_at_5_diff1": 0.4318554010888159, + "nauc_map_at_5_max": 0.42411923599416457, + "nauc_map_at_5_std": -0.12494545944654273, + "nauc_mrr_at_1000_diff1": 0.5369045416723449, + "nauc_mrr_at_1000_max": 0.5049049842162314, + "nauc_mrr_at_1000_std": -0.18991107291058085, + "nauc_mrr_at_100_diff1": 0.5367914776099293, + "nauc_mrr_at_100_max": 0.5049847001541511, + "nauc_mrr_at_100_std": -0.18977696935732827, + "nauc_mrr_at_10_diff1": 0.5359454228762074, + "nauc_mrr_at_10_max": 0.503467137173582, + "nauc_mrr_at_10_std": -0.19249699705090878, + "nauc_mrr_at_1_diff1": 0.5465616766093035, + "nauc_mrr_at_1_max": 0.48493723491485774, + "nauc_mrr_at_1_std": -0.18358322711351044, + "nauc_mrr_at_20_diff1": 0.5362971057585659, + "nauc_mrr_at_20_max": 0.5053223289620384, + "nauc_mrr_at_20_std": -0.1904156575425689, + "nauc_mrr_at_3_diff1": 0.5430639019578393, + "nauc_mrr_at_3_max": 0.509347538460306, + "nauc_mrr_at_3_std": -0.18436056584997068, + "nauc_mrr_at_5_diff1": 0.5368615216505613, + "nauc_mrr_at_5_max": 0.5012004098366819, + "nauc_mrr_at_5_std": -0.1884226682863678, + "nauc_ndcg_at_1000_diff1": 0.4584969048369708, + "nauc_ndcg_at_1000_max": 0.4844900176177759, + "nauc_ndcg_at_1000_std": -0.13701698971869145, + "nauc_ndcg_at_100_diff1": 0.45515065975534635, + "nauc_ndcg_at_100_max": 0.4865140123080581, + "nauc_ndcg_at_100_std": -0.1348360137832305, + "nauc_ndcg_at_10_diff1": 0.4432893756069937, + "nauc_ndcg_at_10_max": 0.47246039014427976, + "nauc_ndcg_at_10_std": -0.15354526223697054, + "nauc_ndcg_at_1_diff1": 0.5465616766093035, + "nauc_ndcg_at_1_max": 0.48493723491485774, + "nauc_ndcg_at_1_std": -0.18358322711351044, + "nauc_ndcg_at_20_diff1": 0.4444393561645031, + "nauc_ndcg_at_20_max": 0.4805038097326813, + "nauc_ndcg_at_20_std": -0.14617617574907252, + "nauc_ndcg_at_3_diff1": 0.4509787454765602, + "nauc_ndcg_at_3_max": 0.4633005933047545, + "nauc_ndcg_at_3_std": -0.13333216736302128, + "nauc_ndcg_at_5_diff1": 0.44549176581156746, + "nauc_ndcg_at_5_max": 0.44402605750222407, + "nauc_ndcg_at_5_std": -0.14395319505890006, + "nauc_precision_at_1000_diff1": -0.10847210854052373, + "nauc_precision_at_1000_max": 0.2698590743987273, + "nauc_precision_at_1000_std": 0.10144043535764428, + "nauc_precision_at_100_diff1": -0.056926376435649996, + "nauc_precision_at_100_max": 0.3268077669839777, + "nauc_precision_at_100_std": 0.07810738556853757, + "nauc_precision_at_10_diff1": 0.036785739099808035, + "nauc_precision_at_10_max": 0.42009919238630083, + "nauc_precision_at_10_std": -0.0042626085526570835, + "nauc_precision_at_1_diff1": 0.5465616766093035, + "nauc_precision_at_1_max": 0.48493723491485774, + "nauc_precision_at_1_std": -0.18358322711351044, + "nauc_precision_at_20_diff1": 0.0024843035416631035, + "nauc_precision_at_20_max": 0.39133479571650653, + "nauc_precision_at_20_std": 0.023323975318933963, + "nauc_precision_at_3_diff1": 0.14734689557052333, + "nauc_precision_at_3_max": 0.45642620147716617, + "nauc_precision_at_3_std": -0.045538288680961346, + "nauc_precision_at_5_diff1": 0.0865138762363379, + "nauc_precision_at_5_max": 0.443537554945808, + "nauc_precision_at_5_std": -0.010358338825898419, + "nauc_recall_at_1000_diff1": 0.11533806559247073, + "nauc_recall_at_1000_max": 0.6091823184075964, + "nauc_recall_at_1000_std": 0.5763610781511838, + "nauc_recall_at_100_diff1": 0.33378914360694933, + "nauc_recall_at_100_max": 0.5063990653399928, + "nauc_recall_at_100_std": 0.012900761253729862, + "nauc_recall_at_10_diff1": 0.34997404624893247, + "nauc_recall_at_10_max": 0.4355999995336417, + "nauc_recall_at_10_std": -0.17213918798549133, + "nauc_recall_at_1_diff1": 0.5087981028915581, + "nauc_recall_at_1_max": 0.184279816635002, + "nauc_recall_at_1_std": -0.14673155609048116, + "nauc_recall_at_20_diff1": 0.3210039858271469, + "nauc_recall_at_20_max": 0.44974326408245163, + "nauc_recall_at_20_std": -0.14162201319398784, + "nauc_recall_at_3_diff1": 0.4031889131226075, + "nauc_recall_at_3_max": 0.33174067558263554, + "nauc_recall_at_3_std": -0.13671252420278793, + "nauc_recall_at_5_diff1": 0.3776285834912711, + "nauc_recall_at_5_max": 0.3917973140957106, + "nauc_recall_at_5_std": -0.14254559607831271, + "ndcg_at_1": 0.5826, + "ndcg_at_10": 0.62131, + "ndcg_at_100": 0.67641, + "ndcg_at_1000": 0.69068, + "ndcg_at_20": 0.64624, + "ndcg_at_3": 0.57045, + "ndcg_at_5": 0.58088, + "precision_at_1": 0.5826, + "precision_at_10": 0.17491, + "precision_at_100": 0.02242, + "precision_at_1000": 0.00244, + "precision_at_20": 0.09641, + "precision_at_3": 0.40395, + "precision_at_5": 0.2976, + "recall_at_1": 0.30767, + "recall_at_10": 0.69863, + "recall_at_100": 0.90467, + "recall_at_1000": 0.99424, + "recall_at_20": 0.7776, + "recall_at_3": 0.52024, + "recall_at_5": 0.60565 + }, + { + "hf_subset": "eng-spa", + "languages": [ + "eng-Latn", + "spa-Latn" + ], + "main_score": 0.42226, + "map_at_1": 0.16812, + "map_at_10": 0.34067, + "map_at_100": 0.36199, + "map_at_1000": 0.36356, + "map_at_20": 0.35241, + "map_at_3": 0.27303, + "map_at_5": 0.31373, + "mrr_at_1": 0.3707440100882724, + "mrr_at_10": 0.46614974278908683, + "mrr_at_100": 0.4747257633445253, + "mrr_at_1000": 0.4752351388123732, + "mrr_at_20": 0.4714925229790201, + "mrr_at_3": 0.43778898696931473, + "mrr_at_5": 0.4525430853299704, + "nauc_map_at_1000_diff1": 0.36857954452637665, + "nauc_map_at_1000_max": 0.3366952666222641, + "nauc_map_at_1000_std": -0.033898191948170626, + "nauc_map_at_100_diff1": 0.3679165519073717, + "nauc_map_at_100_max": 0.3363005147018941, + "nauc_map_at_100_std": -0.034120125544841604, + "nauc_map_at_10_diff1": 0.3647340024784974, + "nauc_map_at_10_max": 0.3291350821422701, + "nauc_map_at_10_std": -0.0431006016701204, + "nauc_map_at_1_diff1": 0.4397872286440893, + "nauc_map_at_1_max": 0.13690130408267454, + "nauc_map_at_1_std": -0.10049392344067777, + "nauc_map_at_20_diff1": 0.3685314775310463, + "nauc_map_at_20_max": 0.3367438075919675, + "nauc_map_at_20_std": -0.03583371094829659, + "nauc_map_at_3_diff1": 0.3864317738194924, + "nauc_map_at_3_max": 0.27107655139508796, + "nauc_map_at_3_std": -0.05872789124318661, + "nauc_map_at_5_diff1": 0.3661130005531357, + "nauc_map_at_5_max": 0.3140456521316093, + "nauc_map_at_5_std": -0.044669795776272836, + "nauc_mrr_at_1000_diff1": 0.41457409193376704, + "nauc_mrr_at_1000_max": 0.3386875926865476, + "nauc_mrr_at_1000_std": -0.049324065040671494, + "nauc_mrr_at_100_diff1": 0.41448821086891685, + "nauc_mrr_at_100_max": 0.3387559252240035, + "nauc_mrr_at_100_std": -0.04931264233531165, + "nauc_mrr_at_10_diff1": 0.41201219803825373, + "nauc_mrr_at_10_max": 0.33782164495636746, + "nauc_mrr_at_10_std": -0.05245382519822492, + "nauc_mrr_at_1_diff1": 0.46237275651030857, + "nauc_mrr_at_1_max": 0.3373716643785202, + "nauc_mrr_at_1_std": -0.05512185485814805, + "nauc_mrr_at_20_diff1": 0.4139293819590452, + "nauc_mrr_at_20_max": 0.33926651057786034, + "nauc_mrr_at_20_std": -0.049671092002497835, + "nauc_mrr_at_3_diff1": 0.4191383774576965, + "nauc_mrr_at_3_max": 0.33873672301980917, + "nauc_mrr_at_3_std": -0.05521709098019772, + "nauc_mrr_at_5_diff1": 0.4104212962343956, + "nauc_mrr_at_5_max": 0.3374575320386898, + "nauc_mrr_at_5_std": -0.05097508416279156, + "nauc_ndcg_at_1000_diff1": 0.37601190896914827, + "nauc_ndcg_at_1000_max": 0.3523164833219444, + "nauc_ndcg_at_1000_std": -0.02267724530737706, + "nauc_ndcg_at_100_diff1": 0.3655462380040346, + "nauc_ndcg_at_100_max": 0.3473728737730819, + "nauc_ndcg_at_100_std": -0.022302358007367094, + "nauc_ndcg_at_10_diff1": 0.35809430560291866, + "nauc_ndcg_at_10_max": 0.3370553888948655, + "nauc_ndcg_at_10_std": -0.04355932537711316, + "nauc_ndcg_at_1_diff1": 0.46237275651030857, + "nauc_ndcg_at_1_max": 0.3373716643785202, + "nauc_ndcg_at_1_std": -0.05512185485814805, + "nauc_ndcg_at_20_diff1": 0.3675109720920026, + "nauc_ndcg_at_20_max": 0.35243672262076653, + "nauc_ndcg_at_20_std": -0.025720887684613263, + "nauc_ndcg_at_3_diff1": 0.37578251536294394, + "nauc_ndcg_at_3_max": 0.33686429015009983, + "nauc_ndcg_at_3_std": -0.03488423643330452, + "nauc_ndcg_at_5_diff1": 0.3567200530201888, + "nauc_ndcg_at_5_max": 0.32488814531001, + "nauc_ndcg_at_5_std": -0.039584545893125954, + "nauc_precision_at_1000_diff1": 0.05731657441695552, + "nauc_precision_at_1000_max": 0.2687395311375437, + "nauc_precision_at_1000_std": 0.0883621405840065, + "nauc_precision_at_100_diff1": 0.10070727729648858, + "nauc_precision_at_100_max": 0.3169905387614638, + "nauc_precision_at_100_std": 0.08151390125412254, + "nauc_precision_at_10_diff1": 0.1933501814870509, + "nauc_precision_at_10_max": 0.41439778582448045, + "nauc_precision_at_10_std": 0.026526740643510825, + "nauc_precision_at_1_diff1": 0.46237275651030857, + "nauc_precision_at_1_max": 0.3373716643785202, + "nauc_precision_at_1_std": -0.05512185485814805, + "nauc_precision_at_20_diff1": 0.18623628842852807, + "nauc_precision_at_20_max": 0.4078016683452761, + "nauc_precision_at_20_std": 0.0562632257192118, + "nauc_precision_at_3_diff1": 0.2878213862141047, + "nauc_precision_at_3_max": 0.42659023952455294, + "nauc_precision_at_3_std": 0.01697015372210527, + "nauc_precision_at_5_diff1": 0.21810149123281572, + "nauc_precision_at_5_max": 0.4285031530986277, + "nauc_precision_at_5_std": 0.030320145720878888, + "nauc_recall_at_1000_diff1": 0.41284114454971155, + "nauc_recall_at_1000_max": 0.5515962619999322, + "nauc_recall_at_1000_std": 0.3780658319784837, + "nauc_recall_at_100_diff1": 0.24098675007826206, + "nauc_recall_at_100_max": 0.292446771696002, + "nauc_recall_at_100_std": 0.03965770475694044, + "nauc_recall_at_10_diff1": 0.26280469280549834, + "nauc_recall_at_10_max": 0.2855449583786948, + "nauc_recall_at_10_std": -0.039746168246652905, + "nauc_recall_at_1_diff1": 0.4397872286440893, + "nauc_recall_at_1_max": 0.13690130408267454, + "nauc_recall_at_1_std": -0.10049392344067777, + "nauc_recall_at_20_diff1": 0.27793695059351786, + "nauc_recall_at_20_max": 0.3244021879366212, + "nauc_recall_at_20_std": 0.021458372124719582, + "nauc_recall_at_3_diff1": 0.30779842229245036, + "nauc_recall_at_3_max": 0.24317871168500765, + "nauc_recall_at_3_std": -0.04885795390000189, + "nauc_recall_at_5_diff1": 0.26223617043875924, + "nauc_recall_at_5_max": 0.27657063181788233, + "nauc_recall_at_5_std": -0.024278596698545603, + "ndcg_at_1": 0.37074, + "ndcg_at_10": 0.42226, + "ndcg_at_100": 0.49688, + "ndcg_at_1000": 0.52527, + "ndcg_at_20": 0.45221, + "ndcg_at_3": 0.35718, + "ndcg_at_5": 0.3762, + "precision_at_1": 0.37074, + "precision_at_10": 0.13153, + "precision_at_100": 0.02024, + "precision_at_1000": 0.00244, + "precision_at_20": 0.07724, + "precision_at_3": 0.26272, + "precision_at_5": 0.20681, + "recall_at_1": 0.16812, + "recall_at_10": 0.52533, + "recall_at_100": 0.80223, + "recall_at_1000": 0.98306, + "recall_at_20": 0.61721, + "recall_at_3": 0.32879, + "recall_at_5": 0.41597 + }, + { + "hf_subset": "spa-eng", + "languages": [ + "spa-Latn", + "eng-Latn" + ], + "main_score": 0.54568, + "map_at_1": 0.25076, + "map_at_10": 0.47252, + "map_at_100": 0.49183, + "map_at_1000": 0.49294, + "map_at_20": 0.48378, + "map_at_3": 0.39858, + "map_at_5": 0.44287, + "mrr_at_1": 0.4930643127364439, + "mrr_at_10": 0.5779769410916951, + "mrr_at_100": 0.5854714761180847, + "mrr_at_1000": 0.5857712226869909, + "mrr_at_20": 0.5827856808873947, + "mrr_at_3": 0.5559058427910885, + "mrr_at_5": 0.5680748213535095, + "nauc_map_at_1000_diff1": 0.44037870899964965, + "nauc_map_at_1000_max": 0.43023175627950555, + "nauc_map_at_1000_std": -0.06649325559576834, + "nauc_map_at_100_diff1": 0.4402205862015298, + "nauc_map_at_100_max": 0.4303113918373693, + "nauc_map_at_100_std": -0.06665063725840305, + "nauc_map_at_10_diff1": 0.4398234449630802, + "nauc_map_at_10_max": 0.426352881839255, + "nauc_map_at_10_std": -0.06968968677954576, + "nauc_map_at_1_diff1": 0.5031559366613207, + "nauc_map_at_1_max": 0.19366482945714678, + "nauc_map_at_1_std": -0.10314670804994072, + "nauc_map_at_20_diff1": 0.4400604951987163, + "nauc_map_at_20_max": 0.429235910199617, + "nauc_map_at_20_std": -0.06937879137307573, + "nauc_map_at_3_diff1": 0.4499442274853501, + "nauc_map_at_3_max": 0.3600817992851635, + "nauc_map_at_3_std": -0.08966707135558066, + "nauc_map_at_5_diff1": 0.44061409513482563, + "nauc_map_at_5_max": 0.40214397206346714, + "nauc_map_at_5_std": -0.08150270548986857, + "nauc_mrr_at_1000_diff1": 0.47289757327150717, + "nauc_mrr_at_1000_max": 0.46743176315383217, + "nauc_mrr_at_1000_std": -0.06964823385613415, + "nauc_mrr_at_100_diff1": 0.47283297195326174, + "nauc_mrr_at_100_max": 0.4675810361649063, + "nauc_mrr_at_100_std": -0.06944908655826587, + "nauc_mrr_at_10_diff1": 0.4721576256021172, + "nauc_mrr_at_10_max": 0.4677638762163337, + "nauc_mrr_at_10_std": -0.0687749388674001, + "nauc_mrr_at_1_diff1": 0.49835947769517386, + "nauc_mrr_at_1_max": 0.4391667120739193, + "nauc_mrr_at_1_std": -0.0743415827648592, + "nauc_mrr_at_20_diff1": 0.4725105311323889, + "nauc_mrr_at_20_max": 0.4674154537141511, + "nauc_mrr_at_20_std": -0.0709219361283738, + "nauc_mrr_at_3_diff1": 0.4776867449467645, + "nauc_mrr_at_3_max": 0.46855436808005135, + "nauc_mrr_at_3_std": -0.06548370329677092, + "nauc_mrr_at_5_diff1": 0.4730036970866975, + "nauc_mrr_at_5_max": 0.46926444144837987, + "nauc_mrr_at_5_std": -0.0700590291003169, + "nauc_ndcg_at_1000_diff1": 0.44397150820393094, + "nauc_ndcg_at_1000_max": 0.4581625606285969, + "nauc_ndcg_at_1000_std": -0.05509964656175125, + "nauc_ndcg_at_100_diff1": 0.43906541770619273, + "nauc_ndcg_at_100_max": 0.45869043042426993, + "nauc_ndcg_at_100_std": -0.05143101109165746, + "nauc_ndcg_at_10_diff1": 0.43710058767962184, + "nauc_ndcg_at_10_max": 0.4518053434834123, + "nauc_ndcg_at_10_std": -0.06802879936405161, + "nauc_ndcg_at_1_diff1": 0.49835947769517386, + "nauc_ndcg_at_1_max": 0.4391667120739193, + "nauc_ndcg_at_1_std": -0.0743415827648592, + "nauc_ndcg_at_20_diff1": 0.4373332630404972, + "nauc_ndcg_at_20_max": 0.4545445859197745, + "nauc_ndcg_at_20_std": -0.06945662028583341, + "nauc_ndcg_at_3_diff1": 0.4407937174127912, + "nauc_ndcg_at_3_max": 0.4362619446753429, + "nauc_ndcg_at_3_std": -0.06981144733431256, + "nauc_ndcg_at_5_diff1": 0.43932674653136927, + "nauc_ndcg_at_5_max": 0.42640696415820456, + "nauc_ndcg_at_5_std": -0.07968477875591329, + "nauc_precision_at_1000_diff1": -0.06854918638017435, + "nauc_precision_at_1000_max": 0.2584666665253486, + "nauc_precision_at_1000_std": 0.11440032576324274, + "nauc_precision_at_100_diff1": -0.019001455258262284, + "nauc_precision_at_100_max": 0.3186816805827729, + "nauc_precision_at_100_std": 0.10631143221681641, + "nauc_precision_at_10_diff1": 0.11604299133320306, + "nauc_precision_at_10_max": 0.4345592202699076, + "nauc_precision_at_10_std": 0.03715303963786623, + "nauc_precision_at_1_diff1": 0.49835947769517386, + "nauc_precision_at_1_max": 0.4391667120739193, + "nauc_precision_at_1_std": -0.0743415827648592, + "nauc_precision_at_20_diff1": 0.07262755499492132, + "nauc_precision_at_20_max": 0.4024285359446358, + "nauc_precision_at_20_std": 0.04942463705339145, + "nauc_precision_at_3_diff1": 0.21957206519538341, + "nauc_precision_at_3_max": 0.46602031818307155, + "nauc_precision_at_3_std": -0.01110400265147553, + "nauc_precision_at_5_diff1": 0.16678803606876608, + "nauc_precision_at_5_max": 0.4472928086351989, + "nauc_precision_at_5_std": 0.003928434995869322, + "nauc_recall_at_1000_diff1": 0.7474284157510483, + "nauc_recall_at_1000_max": 0.8532800770161174, + "nauc_recall_at_1000_std": 0.6678033002411131, + "nauc_recall_at_100_diff1": 0.3244948672244501, + "nauc_recall_at_100_max": 0.4430626241266509, + "nauc_recall_at_100_std": 0.10029818625199731, + "nauc_recall_at_10_diff1": 0.3678472474616527, + "nauc_recall_at_10_max": 0.42735698561333024, + "nauc_recall_at_10_std": -0.06817077731667845, + "nauc_recall_at_1_diff1": 0.5031559366613207, + "nauc_recall_at_1_max": 0.19366482945714678, + "nauc_recall_at_1_std": -0.10314670804994072, + "nauc_recall_at_20_diff1": 0.3614138029940608, + "nauc_recall_at_20_max": 0.42297531023594026, + "nauc_recall_at_20_std": -0.082511188971488, + "nauc_recall_at_3_diff1": 0.40532324458637353, + "nauc_recall_at_3_max": 0.3476895434932006, + "nauc_recall_at_3_std": -0.0854707435491653, + "nauc_recall_at_5_diff1": 0.3835500286622659, + "nauc_recall_at_5_max": 0.3911118350697689, + "nauc_recall_at_5_std": -0.08384948046394491, + "ndcg_at_1": 0.49306, + "ndcg_at_10": 0.54568, + "ndcg_at_100": 0.6113, + "ndcg_at_1000": 0.62853, + "ndcg_at_20": 0.57315, + "ndcg_at_3": 0.48673, + "ndcg_at_5": 0.50041, + "precision_at_1": 0.49306, + "precision_at_10": 0.15927, + "precision_at_100": 0.02194, + "precision_at_1000": 0.00244, + "precision_at_20": 0.0901, + "precision_at_3": 0.34931, + "precision_at_5": 0.26129, + "recall_at_1": 0.25076, + "recall_at_10": 0.63781, + "recall_at_100": 0.88443, + "recall_at_1000": 0.99277, + "recall_at_20": 0.72156, + "recall_at_3": 0.44931, + "recall_at_5": 0.53344 + } + ] + }, + "task_name": "XPQARetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XQuADRetrieval.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XQuADRetrieval.json new file mode 100644 index 0000000..b5a723e --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/XQuADRetrieval.json @@ -0,0 +1,158 @@ +{ + "dataset_revision": "51adfef1c1287aab1d2d91b5bead9bcfb9c68583", + "evaluation_time": 8.197790384292603, + "kg_co2_emissions": null, + "mteb_version": "1.12.39", + "scores": { + "validation": [ + { + "hf_subset": "es", + "languages": [ + "spa-Latn" + ], + "main_score": 0.96649, + "map_at_1": 0.9299, + "map_at_10": 0.95627, + "map_at_100": 0.95642, + "map_at_1000": 0.95642, + "map_at_20": 0.95639, + "map_at_3": 0.95256, + "map_at_5": 0.95543, + "mrr_at_1": 0.9298986486486487, + "mrr_at_10": 0.9562744664307169, + "mrr_at_100": 0.9564209249296571, + "mrr_at_1000": 0.9564209249296571, + "mrr_at_20": 0.956394531348576, + "mrr_at_3": 0.952561936936937, + "mrr_at_5": 0.9554335585585589, + "nauc_map_at_1000_diff1": 0.921185391628374, + "nauc_map_at_1000_max": 0.6576585579221138, + "nauc_map_at_1000_std": 0.21933144412549752, + "nauc_map_at_100_diff1": 0.921185391628374, + "nauc_map_at_100_max": 0.6576585579221138, + "nauc_map_at_100_std": 0.21933144412549752, + "nauc_map_at_10_diff1": 0.9213009104587758, + "nauc_map_at_10_max": 0.6586567588227937, + "nauc_map_at_10_std": 0.22038572155157674, + "nauc_map_at_1_diff1": 0.920431833029575, + "nauc_map_at_1_max": 0.6234160492455382, + "nauc_map_at_1_std": 0.18603836869788748, + "nauc_map_at_20_diff1": 0.9212330966484269, + "nauc_map_at_20_max": 0.657865770844904, + "nauc_map_at_20_std": 0.21980396831716806, + "nauc_map_at_3_diff1": 0.919643514342214, + "nauc_map_at_3_max": 0.6638380262492327, + "nauc_map_at_3_std": 0.21962358109814037, + "nauc_map_at_5_diff1": 0.9219810338253163, + "nauc_map_at_5_max": 0.6648222870467629, + "nauc_map_at_5_std": 0.2280235577407446, + "nauc_mrr_at_1000_diff1": 0.921185391628374, + "nauc_mrr_at_1000_max": 0.6576585579221138, + "nauc_mrr_at_1000_std": 0.21933144412549752, + "nauc_mrr_at_100_diff1": 0.921185391628374, + "nauc_mrr_at_100_max": 0.6576585579221138, + "nauc_mrr_at_100_std": 0.21933144412549752, + "nauc_mrr_at_10_diff1": 0.9213009104587758, + "nauc_mrr_at_10_max": 0.6586567588227937, + "nauc_mrr_at_10_std": 0.22038572155157674, + "nauc_mrr_at_1_diff1": 0.920431833029575, + "nauc_mrr_at_1_max": 0.6234160492455382, + "nauc_mrr_at_1_std": 0.18603836869788748, + "nauc_mrr_at_20_diff1": 0.9212330966484269, + "nauc_mrr_at_20_max": 0.657865770844904, + "nauc_mrr_at_20_std": 0.21980396831716806, + "nauc_mrr_at_3_diff1": 0.919643514342214, + "nauc_mrr_at_3_max": 0.6638380262492327, + "nauc_mrr_at_3_std": 0.21962358109814037, + "nauc_mrr_at_5_diff1": 0.9219810338253163, + "nauc_mrr_at_5_max": 0.6648222870467629, + "nauc_mrr_at_5_std": 0.2280235577407446, + "nauc_ndcg_at_1000_diff1": 0.9213879095724823, + "nauc_ndcg_at_1000_max": 0.6617261175044244, + "nauc_ndcg_at_1000_std": 0.22412290969575516, + "nauc_ndcg_at_100_diff1": 0.9213879095724823, + "nauc_ndcg_at_100_max": 0.6617261175044244, + "nauc_ndcg_at_100_std": 0.22412290969575516, + "nauc_ndcg_at_10_diff1": 0.9220014493472395, + "nauc_ndcg_at_10_max": 0.6669748906359305, + "nauc_ndcg_at_10_std": 0.2312084393294489, + "nauc_ndcg_at_1_diff1": 0.920431833029575, + "nauc_ndcg_at_1_max": 0.6234160492455382, + "nauc_ndcg_at_1_std": 0.18603836869788748, + "nauc_ndcg_at_20_diff1": 0.9217857889625972, + "nauc_ndcg_at_20_max": 0.6634382231783379, + "nauc_ndcg_at_20_std": 0.22804985657905577, + "nauc_ndcg_at_3_diff1": 0.9188548939004717, + "nauc_ndcg_at_3_max": 0.6800304674485118, + "nauc_ndcg_at_3_std": 0.23273239972793494, + "nauc_ndcg_at_5_diff1": 0.9238834674376439, + "nauc_ndcg_at_5_max": 0.6858940403412974, + "nauc_ndcg_at_5_std": 0.25336599923102227, + "nauc_precision_at_1000_diff1": 1.0, + "nauc_precision_at_1000_max": 1.0, + "nauc_precision_at_1000_std": 1.0, + "nauc_precision_at_100_diff1": 1.0, + "nauc_precision_at_100_max": 1.0, + "nauc_precision_at_100_std": 1.0, + "nauc_precision_at_10_diff1": 0.956443365338596, + "nauc_precision_at_10_max": 0.956443365338596, + "nauc_precision_at_10_std": 0.6640202050063402, + "nauc_precision_at_1_diff1": 0.920431833029575, + "nauc_precision_at_1_max": 0.6234160492455382, + "nauc_precision_at_1_std": 0.18603836869788748, + "nauc_precision_at_20_diff1": 1.0, + "nauc_precision_at_20_max": 1.0, + "nauc_precision_at_20_std": 1.0, + "nauc_precision_at_3_diff1": 0.9137363782342662, + "nauc_precision_at_3_max": 0.7774210714093309, + "nauc_precision_at_3_std": 0.3113025626032155, + "nauc_precision_at_5_diff1": 0.9524836712785049, + "nauc_precision_at_5_max": 0.9762418356392524, + "nauc_precision_at_5_std": 0.6088234777946944, + "nauc_recall_at_1000_diff1": NaN, + "nauc_recall_at_1000_max": NaN, + "nauc_recall_at_1000_std": NaN, + "nauc_recall_at_100_diff1": NaN, + "nauc_recall_at_100_max": NaN, + "nauc_recall_at_100_std": NaN, + "nauc_recall_at_10_diff1": 0.9564433653385848, + "nauc_recall_at_10_max": 0.9564433653385848, + "nauc_recall_at_10_std": 0.6640202050063789, + "nauc_recall_at_1_diff1": 0.920431833029575, + "nauc_recall_at_1_max": 0.6234160492455382, + "nauc_recall_at_1_std": 0.18603836869788748, + "nauc_recall_at_20_diff1": 1.0, + "nauc_recall_at_20_max": 1.0, + "nauc_recall_at_20_std": 1.0, + "nauc_recall_at_3_diff1": 0.9137363782342711, + "nauc_recall_at_3_max": 0.7774210714093432, + "nauc_recall_at_3_std": 0.3113025626032132, + "nauc_recall_at_5_diff1": 0.9524836712785105, + "nauc_recall_at_5_max": 0.9762418356392553, + "nauc_recall_at_5_std": 0.6088234777946975, + "ndcg_at_1": 0.9299, + "ndcg_at_10": 0.96649, + "ndcg_at_100": 0.96709, + "ndcg_at_1000": 0.96709, + "ndcg_at_20": 0.96692, + "ndcg_at_3": 0.95917, + "ndcg_at_5": 0.96436, + "precision_at_1": 0.9299, + "precision_at_10": 0.09975, + "precision_at_100": 0.01, + "precision_at_1000": 0.001, + "precision_at_20": 0.04996, + "precision_at_3": 0.32601, + "precision_at_5": 0.19814, + "recall_at_1": 0.9299, + "recall_at_10": 0.99747, + "recall_at_100": 1.0, + "recall_at_1000": 1.0, + "recall_at_20": 0.99916, + "recall_at_3": 0.97804, + "recall_at_5": 0.99071 + } + ] + }, + "task_name": "XQuADRetrieval" +} \ No newline at end of file diff --git a/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/model_meta.json b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/model_meta.json new file mode 100644 index 0000000..63cd498 --- /dev/null +++ b/evaluation/embeddings_model/results/BAAI/bge-m3/BAAI__bge-m3/babcf60cae0a1f438d7ade582983d4ba462303c2/model_meta.json @@ -0,0 +1 @@ +{"name": "BAAI/bge-m3", "revision": "babcf60cae0a1f438d7ade582983d4ba462303c2", "release_date": null, "languages": [], "n_parameters": null, "memory_usage": null, "max_tokens": null, "embed_dim": null, "license": null, "open_source": null, "similarity_fn_name": null, "framework": ["Sentence Transformers"], "loader": null} \ No newline at end of file From 4287f9e363ddaf47f94e37842c6945c7aba1a44c Mon Sep 17 00:00:00 2001 From: Bukosabino Date: Wed, 26 Jun 2024 11:49:02 +0000 Subject: [PATCH 8/9] adding bge-m3 results --- evaluation/embeddings_model/mteb_benchmark.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/evaluation/embeddings_model/mteb_benchmark.py b/evaluation/embeddings_model/mteb_benchmark.py index 0af1720..3257753 100644 --- a/evaluation/embeddings_model/mteb_benchmark.py +++ b/evaluation/embeddings_model/mteb_benchmark.py @@ -7,7 +7,7 @@ # TODO: write results on model cards huggingface # Define the sentence-transformers model name -model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn" +# model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn" # model_name = "dariolopez/roberta-base-bne-finetuned-msmarco-qa-es" # model_name = "PlanTL-GOB-ES/roberta-base-bne" # model_name = "PlanTL-GOB-ES/RoBERTalex" @@ -18,6 +18,7 @@ # model_name = "intfloat/multilingual-e5-base" # model_name = "intfloat/multilingual-e5-large" # model_name = "intfloat/multilingual-e5-large-instruct" +model_name = "BAAI/bge-m3" try: model = SentenceTransformer(model_name, device='cuda') @@ -40,20 +41,20 @@ ] TASK_LIST_MULTI_LABEL_CLASSIFICATION = [ - "MultiEURLEXMultilabelClassification" + ## "MultiEURLEXMultilabelClassification" ] TASK_LIST_RETRIEVAL = [ # "BelebeleRetrieval", "MintakaRetrieval", - "MIRACLRetrieval", + ## "MIRACLRetrieval", # "MLQARetrieval", # "MultiLongDocRetrieval", "PublicHealthQA", - "XMarket", + # "XMarket", "XPQARetrieval", "XQuADRetrieval", - "SpanishPassageRetrievalS2P", + ## "SpanishPassageRetrievalS2P", "SpanishPassageRetrievalS2S" ] @@ -69,14 +70,14 @@ # "MultilingualSentimentClassification", "SIB200Classification", "TweetSentimentClassification", - "SpanishNewsClassification", + ## "SpanishNewsClassification", "SpanishSentimentClassification" ] TASK_LIST_CLUSTERING = [ # "MLSUMClusteringP2P.v2", - "SpanishNewsClusteringP2P", - "MLSUMClusteringS2S.v2", + ## "SpanishNewsClusteringP2P", + ## "MLSUMClusteringS2S.v2", "SIB200ClusteringS2S" ] @@ -86,7 +87,7 @@ TASK_LIST_STS = [ "STS17", - "STS22", + ## "STS22", "STSBenchmarkMultilingualSTS", "STSES" ] From d0a3e27ab1c3ef12438c5acdb63345e7031c0e3d Mon Sep 17 00:00:00 2001 From: Bukosabino Date: Wed, 26 Jun 2024 14:08:23 +0200 Subject: [PATCH 9/9] Update README.md with the results --- evaluation/embeddings_model/README.md | 60 +++++++++++++-------------- 1 file changed, 29 insertions(+), 31 deletions(-) diff --git a/evaluation/embeddings_model/README.md b/evaluation/embeddings_model/README.md index 642e1c3..600eaa3 100644 --- a/evaluation/embeddings_model/README.md +++ b/evaluation/embeddings_model/README.md @@ -3,34 +3,32 @@ python mteb_benchmark.py ```` - -| Benchmark | dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn | intfloat__multilingual-e5-large | -|-----------------------------|---------|----------| -| AmazonReviewsClassification | 0.28194 | 0.42702 -| CataloniaTweetClassification | 0.48369999999999996 | 0.5025000000000001 -| MassiveIntentClassification | 0.5473100201748486 | 0.6470073974445192 -| MassiveScenarioClassification | 0.6322797579018158 | 0.689340954942838 -| MintakaRetrieval | 0.16548 | 0.2836 -| MIRACLRetrieval | 0.70137 | 0.82005 -| MLSUMClusteringS2S.v2 | 0.42841628413893035 | 0.48075917245775485 -| MTOPDomainClassification | 0.7924616410940628 | 0.8998999332888593 -| MTOPIntentClassification | 0.5307538358905937 | 0.6673782521681121 -| MultiEURLEXMultilabelClassification | 0.05144 | 0.05226000000000001 -| MultiHateClassification | 0.5578 | 0.639 -| PawsX | 0.6015684593563027 | 0.5639685167829116 -| PublicHealthQA | 0.62516 | 0.80811 -| SIB200Classification | 0.6549019607843137 | 0.7348039215686275 -| SIB200ClusteringS2S | 0.3347573603718645 | 0.3637865013678009 -| SpanishNewsClassification | 0.81318359375 | 0.880517578125 -| SpanishNewsClusteringP2P | 0.379918321557151 | 0.4399933663826367 -| SpanishSentimentClassification | 0.6378378378378378 | 0.9141891891891893 -| STS17 | 0.23167578806693545 | 0.8092850520982419 -| STS22 | 0.49970798735740846 | 0.7865922376187726 -| STSBenchmarkMultilingualSTS | 0.7724973718736371 | 0.8646354604520479 -| STSES | 0.6040795444089487 | 0.7923804835012699 -| Tatoeba | 0.15162606837606837 | 0.9736666666666667 -| TweetSentimentClassification | 0.408203125 | 0.508984375 -| XMarket | 0.11391 | 0.14136 -| XNLI | 0.5793703625227221 | 0.7603625574106656 -| XPQARetrieval | 0.47322 | 0.61619 -| XQuADRetrieval | 0.81996 | 0.97644 +| Benchmark | dariolopez__roberta-base-bne-finetuned-msmarco-qa-es-mnrl-mn | intfloat__multilingual-e5-large | BAAI/bge-m3 | +|-----------------------------|---------|----------|----------| +| AmazonReviewsClassification | 0.28194 | 0.42702 | **0.44678000000000007** +| CataloniaTweetClassification | 0.48369999999999996 | **0.5025000000000001** | 0.4875999999999999 +| MassiveIntentClassification | 0.5473100201748486 | 0.6470073974445192 | **0.6703429724277068** +| MassiveScenarioClassification | 0.6322797579018158 | 0.689340954942838 | **0.721990585070612** +| MintakaRetrieval | 0.16548 | **0.2836** | 0.22341 +| MIRACLRetrieval | 0.70137 | 0.82005 | - +| MLSUMClusteringS2S.v2 | 0.42841628413893035 | 0.48075917245775485 | - +| MTOPDomainClassification | 0.7924616410940628 | 0.8998999332888593 | **0.9183789192795198** +| MTOPIntentClassification | 0.5307538358905937 | **0.6673782521681121** | 0.6644096064042696 +| MultiEURLEXMultilabelClassification | 0.05144 | 0.05226000000000001 | - +| MultiHateClassification | 0.5578 | **0.639** | 0.6253 +| PawsX | 0.6015684593563027 | 0.5639685167829116 | **0.5735093608198505** +| PublicHealthQA | 0.62516 | **0.80811** | 0.79986 +| SIB200Classification | 0.6549019607843137 | **0.7348039215686275** | 0.7313725490196079 +| SIB200ClusteringS2S | 0.3347573603718645 | **0.3637865013678009** | 0.34998316595531576 +| SpanishNewsClassification | 0.81318359375 | 0.880517578125 | - +| SpanishNewsClusteringP2P | 0.379918321557151 | 0.4399933663826367 | - +| SpanishSentimentClassification | 0.6378378378378378 | 0.9141891891891893 | **0.9395270270270271** +| STS17 | 0.23167578806693545 | **0.8092850520982419** | 0.7557298844031564 +| STS22 | 0.49970798735740846 | 0.7865922376187726 | +| STSBenchmarkMultilingualSTS | 0.7724973718736371 | **0.8646354604520479** | 0.8468700424822017 +| STSES | 0.6040795444089487 | **0.7923804835012699** | 0.7743978294342545 +| TweetSentimentClassification | 0.408203125 | 0.508984375 | 0.54765625 +| XMarket | 0.11391 | 0.14136 | - +| XNLI | 0.5793703625227221 | 0.7603625574106656 | **0.8076605260070395** +| XPQARetrieval | 0.47322 | 0.61619 | **0.62131** +| XQuADRetrieval | 0.81996 | **0.97644** | 0.96649