diff --git a/src/experimental/tests/test_prompt.py b/src/experimental/tests/test_prompt.py index f2e406369..57a93c326 100644 --- a/src/experimental/tests/test_prompt.py +++ b/src/experimental/tests/test_prompt.py @@ -1,11 +1,11 @@ -from ragas_experimental.llms.prompt import StringPrompt, StringIO +import pytest +from langchain_core.outputs import Generation, LLMResult +from ragas_experimental.llms.prompt import StringIO, StringPrompt + from ragas.llms.base import BaseRagasLLM -from langchain_core.outputs import LLMResult, Generation from ragas.llms.prompt import PromptValue from ragas.run_config import RunConfig -import pytest - class EchoLLM(BaseRagasLLM): def generate_text( # type: ignore @@ -37,10 +37,11 @@ async def test_string_prompt(): def test_process_fields(): - from ragas_experimental.llms.prompt import PydanticPrompt, StringIO - from pydantic import BaseModel from enum import Enum + from pydantic import BaseModel + from ragas_experimental.llms.prompt import PydanticPrompt, StringIO + class Categories(str, Enum): science = "science" commerce = "commerce" @@ -63,10 +64,7 @@ class JokeGenerator(PydanticPrompt[InputModel, StringIO]): @pytest.mark.asyncio async def test_pydantic_prompt_io(): - from ragas_experimental.llms.prompt import ( - PydanticPrompt, - StringIO, - ) + from ragas_experimental.llms.prompt import PydanticPrompt, StringIO class Prompt(PydanticPrompt[StringIO, StringIO]): instruction = "" @@ -82,9 +80,7 @@ class Prompt(PydanticPrompt[StringIO, StringIO]): def test_pydantic_prompt_examples(): - from ragas_experimental.llms.prompt import ( - PydanticPrompt, - ) + from ragas_experimental.llms.prompt import PydanticPrompt class Prompt(PydanticPrompt[StringIO, StringIO]): instruction = "" diff --git a/src/ragas/experimental/llms/__init__.py b/src/ragas/experimental/llms/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/src/experimental/ragas_experimental/llms/prompt.py b/src/ragas/experimental/llms/prompt.py similarity index 82% rename from src/experimental/ragas_experimental/llms/prompt.py rename to src/ragas/experimental/llms/prompt.py index 8b0c7f978..528d9d0ed 100644 --- a/src/experimental/ragas_experimental/llms/prompt.py +++ b/src/ragas/experimental/llms/prompt.py @@ -1,21 +1,21 @@ from __future__ import annotations -from abc import ABC, abstractmethod -from dataclasses import dataclass -import json import typing as t +from abc import ABC, abstractmethod -from ragas.llms.output_parser import RagasoutputParser -from ragas.llms.prompt import PromptValue +import pydantic # Check Pydantic version from pydantic import BaseModel -import pydantic + +from ragas.llms.output_parser import RagasoutputParser +from ragas.llms.prompt import PromptValue if t.TYPE_CHECKING: - from ragas.llms.base import BaseRagasLLM from langchain_core.callbacks import Callbacks + from ragas.llms.base import BaseRagasLLM + PYDANTIC_V2 = pydantic.VERSION.startswith("2.") @@ -24,7 +24,7 @@ def __init__(self, llm): self.llm: BaseRagasLLM = llm @abstractmethod - async def generate(self, data: t.Any) -> t.Any: + async def generate(self, data: t.Any, callbacks: Callbacks = None) -> t.Any: pass @@ -57,12 +57,14 @@ def to_json(model: t.Any, indent: int = 4) -> str: return model.json(indent=indent) -def model_to_json_schema(model: t.Type[BaseModel]) -> dict: +def model_to_json_schema(model: t.Type[BaseModel]) -> str: if PYDANTIC_V2: - return model.model_json_schema() + # NOTE: this is not the same as model.schema_json() + return model.model_json_schema() # type: ignore else: return model.schema_json() + InputModel = t.TypeVar("InputModel", bound=BaseModel) OutputModel = t.TypeVar("OutputModel", bound=BaseModel) @@ -96,9 +98,11 @@ def generate_examples(self): example_strings.append( self.instruction + "\n" - + "input: " + to_json(input_data, indent=4) + + "input: " + + to_json(input_data, indent=4) + "\n" - + "output: " + to_json(output_data, indent=4) + + "output: " + + to_json(output_data, indent=4) ) return ( @@ -118,12 +122,15 @@ def to_string(self, data: InputModel) -> str: + "\n" + self.generate_examples() + "\nNow perform the above instruction with the following input\n" - + "input: " + to_json(data, indent=4) + + "input: " + + to_json(data, indent=4) + "\n" + "output: " ) - async def generate(self, data: InputModel, callbacks: Callbacks) -> OutputModel: + async def generate( + self, data: InputModel, callbacks: Callbacks = None + ) -> OutputModel: prompt_value = PromptValue(prompt_str=self.to_string(data)) resp = await self.llm.generate(prompt_value, callbacks=callbacks) resp_text = resp.generations[0][0].text @@ -135,7 +142,7 @@ async def generate(self, data: InputModel, callbacks: Callbacks) -> OutputModel: class StringPrompt(BasePrompt): - async def generate(self, data: str) -> str: + async def generate(self, data: str, callbacks: Callbacks = None) -> str: prompt_value = PromptValue(prompt_str=data) - llm_result = await self.llm.agenerate_text(prompt_value) - return llm_result.generations[0][0].text \ No newline at end of file + llm_result = await self.llm.agenerate_text(prompt_value, callbacks=callbacks) + return llm_result.generations[0][0].text diff --git a/src/experimental/ragas_experimental/metrics/__init__.py b/src/ragas/experimental/metrics/__init__.py similarity index 57% rename from src/experimental/ragas_experimental/metrics/__init__.py rename to src/ragas/experimental/metrics/__init__.py index e41ccd002..615fbe7ab 100644 --- a/src/experimental/ragas_experimental/metrics/__init__.py +++ b/src/ragas/experimental/metrics/__init__.py @@ -1,3 +1,3 @@ from ._faithfulness import FaithfulnessExperimental -__all__ = ["FaithfulnessExperimental"] \ No newline at end of file +__all__ = ["FaithfulnessExperimental"] diff --git a/src/experimental/ragas_experimental/metrics/_faithfulness.py b/src/ragas/experimental/metrics/_faithfulness.py similarity index 97% rename from src/experimental/ragas_experimental/metrics/_faithfulness.py rename to src/ragas/experimental/metrics/_faithfulness.py index 81e50c841..548dac0c5 100644 --- a/src/experimental/ragas_experimental/metrics/_faithfulness.py +++ b/src/ragas/experimental/metrics/_faithfulness.py @@ -1,17 +1,18 @@ from __future__ import annotations -import typing as t import logging +import typing as t from dataclasses import dataclass -from pydantic import BaseModel, Field import numpy as np +from pydantic import BaseModel, Field +from ragas.experimental.llms.prompt import PydanticPrompt from ragas.metrics.base import EvaluationMode, MetricWithLLM, get_segmenter -from ragas_experimental.llms.prompt import PydanticPrompt if t.TYPE_CHECKING: from langchain_core.callbacks import Callbacks + from ragas.metrics._faithfulness import HasSegmentMethod @@ -187,6 +188,8 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: answer, question, contexts = row["answer"], row["question"], row["contexts"] # get the sentences from the answer + if self.sentence_segmenter is None: + raise ValueError("Sentence segmenter is not set") sentences = self.sentence_segmenter.segment(answer) # TODO: why do we do this? sentences = [ @@ -198,9 +201,9 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: answer=answer, sentences={i: sentence for i, sentence in enumerate(sentences)}, ), - callbacks=callbacks + callbacks=callbacks, ) - + statements = [ statement for component in sentence_components.sentences @@ -211,9 +214,9 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: context="\n".join(contexts), statements=statements, ), - callbacks=callbacks + callbacks=callbacks, ) - + # compute the score num_faithful_statements = sum( verdict.verdict for verdict in verdicts.statements @@ -223,4 +226,3 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float: else: score = np.nan return score - diff --git a/src/ragas/llms/base.py b/src/ragas/llms/base.py index f0b2b3daa..3daa7b3ca 100644 --- a/src/ragas/llms/base.py +++ b/src/ragas/llms/base.py @@ -63,7 +63,8 @@ def generate_text( temperature: float = 1e-8, stop: t.Optional[t.List[str]] = None, callbacks: Callbacks = None, - ) -> LLMResult: ... + ) -> LLMResult: + ... @abstractmethod async def agenerate_text( @@ -73,7 +74,8 @@ async def agenerate_text( temperature: t.Optional[float] = None, stop: t.Optional[t.List[str]] = None, callbacks: Callbacks = None, - ) -> LLMResult: ... + ) -> LLMResult: + ... async def generate( self,