Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
Alexey Gorbatovski authored and Alexey Gorbatovski committed Oct 18, 2024
1 parent 1c11454 commit 2c37155
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 12 deletions.
21 changes: 13 additions & 8 deletions turbo_alignment/metrics/distinctness.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,16 @@
from transformers.tokenization_utils_base import PreTrainedTokenizerBase

from turbo_alignment.metrics.metric import Metric
from turbo_alignment.metrics.registry import DistinctnessSettings
from turbo_alignment.settings.metric import ElementWiseScores, MetricResults, MetricType


@Metric.register(MetricType.DIST_N)
class DistinctnessMetric(Metric):
def __init__(self, settings: DistinctnessSettings) -> None:
super().__init__(settings=settings)
self._settings: DistinctnessSettings = settings

def compute(self, **kwargs) -> list[MetricResults]:
predictions: list[list[str]] = kwargs.get('predictions', None)
dataset_name: str = kwargs.get('dataset_name', '')
Expand All @@ -18,7 +23,7 @@ def compute(self, **kwargs) -> list[MetricResults]:

dist_n = defaultdict(list)
for prompt_answers in predictions:
ans_dist_n = self.distinctness(prompt_answers, vocab_size)
ans_dist_n = self.distinctness(prompt_answers, vocab_size, self._settings.ngram)
for label, value in ans_dist_n.items():
dist_n[label].append(value)

Expand All @@ -34,22 +39,22 @@ def compute(self, **kwargs) -> list[MetricResults]:
]

@staticmethod
def distinctness(answers: list[str], vocab_size: int) -> dict[str, float]:
ngram_sets = [set() for _ in range(5)]
total_ngrams = [0] * 5
def distinctness(answers: list[str], vocab_size: int, ngram: int) -> dict[str, float]:
ngram_sets: list[set] = [set() for _ in range(ngram)]
total_ngrams = [0] * ngram

for answer in answers:
words = answer.split(' ')
ngram_sets[0].update(words)
total_ngrams[0] += len(words)

for n in range(1, 5):
for n in range(1, ngram):
ngrams = ['_'.join(words[i : i + n + 1]) for i in range(len(words) - n)]
ngram_sets[n].update(ngrams)
total_ngrams[n] += len(ngrams)

result = {}
for n in range(5):
for n in range(ngram):
result[f'dist_{n+1}'] = len(ngram_sets[n]) / total_ngrams[n] if total_ngrams[n] > 0 else 0
try:
result[f'ead_dist_{n+1}'] = (
Expand All @@ -60,6 +65,6 @@ def distinctness(answers: list[str], vocab_size: int) -> dict[str, float]:
except ZeroDivisionError:
result[f'ead_dist_{n+1}'] = 0

result['dist_mean'] = sum(result[f'dist_{n+1}'] for n in range(5)) / 5
result['ead_dist_mean'] = sum(result[f'ead_dist_{n+1}'] for n in range(5)) / 5
result['dist_mean'] = sum(result[f'dist_{n+1}'] for n in range(ngram)) / ngram
result['ead_dist_mean'] = sum(result[f'ead_dist_{n+1}'] for n in range(ngram)) / ngram
return result
13 changes: 10 additions & 3 deletions turbo_alignment/metrics/diversity.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,20 @@
from transformers import PreTrainedTokenizerBase

from turbo_alignment.metrics.metric import Metric
from turbo_alignment.metrics.registry import DiversitySettings
from turbo_alignment.settings.metric import ElementWiseScores, MetricResults, MetricType


@Metric.register(MetricType.DIVERSITY)
class DiversityMetric(Metric):
def __init__(self, settings: DiversitySettings) -> None:
super().__init__(settings=settings)
self._settings: DiversitySettings = settings

def compute(self, **kwargs) -> list[MetricResults]:
tokenizer: PreTrainedTokenizerBase = kwargs.get('tokenizer', None)
predictions: list[list[str]] = kwargs.get('predictions', None)
dataset_name: str = kwargs.get('dataset_name', '')
top_k: int = kwargs.get('top_k', None)

if predictions is None:
raise ValueError('predictions should not be None')
Expand All @@ -26,7 +30,10 @@ def compute(self, **kwargs) -> list[MetricResults]:
element_wise_diversity_scores = [
ElementWiseScores(
label=dataset_name + '@@' + 'diversity',
values=[self.average_token_entropy(answer_group, tokenizer, top_k) for answer_group in predictions],
values=[
self.average_token_entropy(answer_group, tokenizer, self._settings.top_k)
for answer_group in predictions
],
)
]

Expand All @@ -43,7 +50,7 @@ def average_token_entropy(self, answer_group: list[str], tokenizer: PreTrainedTo
return np.nan

@staticmethod
def token_entropy(sample: str, tokenizer: PreTrainedTokenizerBase, top_k: int = None) -> float:
def token_entropy(sample: str, tokenizer: PreTrainedTokenizerBase, top_k: int | None) -> float:
stats: dict[int, Any] = defaultdict(int)
num_tokens = 0
tokens = tokenizer.encode(sample)
Expand Down
15 changes: 14 additions & 1 deletion turbo_alignment/metrics/registry.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from enum import Enum
from pydantic import field_validator

from turbo_alignment.common.registry import Registrable
from turbo_alignment.settings.base import ExtraFieldsNotAllowedBaseModel
Expand Down Expand Up @@ -26,7 +27,13 @@ class MetricSettings(ExtraFieldsNotAllowedBaseModel):

@MetricSettingsRegistry.register(MetricType.DIST_N)
class DistinctnessSettings(MetricSettings):
...
ngram: int = 5

@field_validator('ngram', mode="before")
def check_ngram(cls, value: int):
if value <= 0:
raise ValueError('ngram should be greater that 0')
return value


@MetricSettingsRegistry.register(MetricType.DIVERSITY)
Expand Down Expand Up @@ -76,6 +83,12 @@ class RougeSettings(MetricSettings):
class SelfBleuSettings(MetricSettings):
ngram: int = 3

@field_validator('ngram', mode="before")
def check_ngram(cls, value: int):
if value <= 0:
raise ValueError('ngram should be greater that 0')
return value


@MetricSettingsRegistry.register(MetricType.TOOL_CALL_METRICS)
class ToolMetricsSettings(MetricSettings):
Expand Down

0 comments on commit 2c37155

Please sign in to comment.