From cb6219007620aefa4cf80b06d7e007376509d8d7 Mon Sep 17 00:00:00 2001 From: Jithin James Date: Thu, 3 Oct 2024 17:01:34 +0530 Subject: [PATCH] chore: add tracking for new testset generation (#1419) --- src/ragas/_analytics.py | 1 + src/ragas/testset/synthesizers/generate.py | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/src/ragas/_analytics.py b/src/ragas/_analytics.py index 624c92922..458256c09 100644 --- a/src/ragas/_analytics.py +++ b/src/ragas/_analytics.py @@ -97,6 +97,7 @@ class TestsetGenerationEvent(BaseEvent): num_rows: int language: str is_experiment: bool = False + version: str = "3" # the version of testset generation pipeline @silent diff --git a/src/ragas/testset/synthesizers/generate.py b/src/ragas/testset/synthesizers/generate.py index 9a4594e69..6aa71f04e 100644 --- a/src/ragas/testset/synthesizers/generate.py +++ b/src/ragas/testset/synthesizers/generate.py @@ -4,6 +4,7 @@ import typing as t from dataclasses import dataclass, field +from ragas._analytics import TestsetGenerationEvent, track from ragas.callbacks import new_group from ragas.executor import Executor from ragas.llms import BaseRagasLLM, LangchainLLMWrapper @@ -202,4 +203,17 @@ def generate( testsets.append(TestsetSample(eval_sample=sample, **additional_info)) testset = Testset(samples=testsets) testset_generation_rm.on_chain_end({"testset": testset}) + + # tracking how many samples were generated + track( + TestsetGenerationEvent( + event_type="testset_generation", + evolution_names=[ + e.__class__.__name__.lower() for e, _ in query_distribution + ], + evolution_percentages=[p for _, p in query_distribution], + num_rows=test_size, + language="english", + ) + ) return testset