From 90d16cd7ded383263da7c67dea0a7503abe7ae41 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Thu, 5 Dec 2024 12:54:13 -0800 Subject: [PATCH] minor changes in eval, deleted formatter --- .../prompt_migration/evaluator.py | 10 +--------- .../prompt_migration/formatters.py | 17 ----------------- 2 files changed, 1 insertion(+), 26 deletions(-) delete mode 100644 recipes/use_cases/prompt-migration/prompt_migration/formatters.py diff --git a/recipes/use_cases/prompt-migration/prompt_migration/evaluator.py b/recipes/use_cases/prompt-migration/prompt_migration/evaluator.py index 2607e68ca..446007362 100644 --- a/recipes/use_cases/prompt-migration/prompt_migration/evaluator.py +++ b/recipes/use_cases/prompt-migration/prompt_migration/evaluator.py @@ -71,7 +71,6 @@ def clean_score(score): return result except Exception as e: print(f"Error in judge: {str(e)}") - # Return default scores return type('Result', (), { 'accuracy': '0', 'consistency': '0', @@ -119,12 +118,10 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases: expected_output=expected ) - # Calculate scores accuracy_score = float(judgment.accuracy) / 100 consistency_score = float(judgment.consistency) / 100 is_equivalent = judgment.equivalence.lower() == "yes" - # Store individual scores case_scores = { "input": input_text, "expected": expected, @@ -137,7 +134,6 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases: } individual_scores.append(case_scores) - # Update totals total_accuracy += accuracy_score total_consistency += consistency_score total_similarity += float(is_equivalent) @@ -149,7 +145,6 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases: print(f"Judge's reasoning: {judgment.reasoning}") print(f"Scores - Accuracy: {accuracy_score:.2f}, Consistency: {consistency_score:.2f}, Equivalent: {is_equivalent}") - # Calculate final metrics metrics = EvaluationMetrics( accuracy=total_accuracy / num_cases, similarity=total_similarity / num_cases, @@ -157,7 +152,6 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases: individual_scores=individual_scores ) - # Save results to JSON results = { "source_prompt": source_prompt, "target_prompt": target_prompt, @@ -183,14 +177,12 @@ def evaluate(self, def _save_results(self, results: dict, filename: str = 'results.json') -> None: """Save results to a JSON file with a new name if the file already exists.""" - # Check if file exists + if os.path.exists(filename): - # Create new filename with timestamp timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') base, ext = os.path.splitext(filename) filename = f"{base}_{timestamp}{ext}" - # Save results with open(filename, 'w') as f: json.dump(results, f, indent=2) print(f"Results saved to {filename}") \ No newline at end of file diff --git a/recipes/use_cases/prompt-migration/prompt_migration/formatters.py b/recipes/use_cases/prompt-migration/prompt_migration/formatters.py deleted file mode 100644 index 42c0043bd..000000000 --- a/recipes/use_cases/prompt-migration/prompt_migration/formatters.py +++ /dev/null @@ -1,17 +0,0 @@ -from typing import List - -class PromptFormatter: - @staticmethod - def openai_to_llama(prompt: str) -> str: - """Convert OpenAI-style prompts to Llama format.""" - # Basic conversion logic - converted = prompt.replace("{{", "{").replace("}}", "}") - return converted - - @staticmethod - def extract_variables(prompt: str) -> List[str]: - """Extract variable names from a prompt template.""" - import re - pattern = r"\{([^}]+)\}" - matches = re.findall(pattern, prompt) - return list(set(matches)) \ No newline at end of file