From c37a10597f3d02131761975843d3b002d445cf2b Mon Sep 17 00:00:00 2001 From: "kaizen-bot[bot]" <150987473+kaizen-bot[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 17:54:02 +0000 Subject: [PATCH] Add auto-generated tests for PR #600 --- .../.experiments/code_review/test_main.py | 90 +++ .../code_review/test_process_pr.py | 103 ++++ .../code_review/test_save_review.py | 103 ++++ .../unit_test/kaizen/llms/test_llmprovider.py | 90 +++ .../llms/test_set_all_loggers_to_error.py | 43 ++ .../examples/code_fix/test_group_by_files.py | 46 ++ .../vscode/src/test/extension.test.ts | 15 + .../kaizen/helpers/test_format_add_linenum.py | 23 + .../helpers/test_patch_to_combined_chunks.py | 67 +++ .../llms/test_set_all_loggers_to_error.py | 64 ++- .../kaizen/tests/actions/test_review.py | 71 +++ .../helpers/test_diff_to_numbered_lines.py | 530 ++++++++++++++++++ .../kaizen/tests/helpers/test_output.py | 90 +++ .../kaizen/tests/helpers/test_patch_parser.py | 530 ++++++++++++++++++ .../kaizen/tests/llms/test_provider.py | 70 +++ .../kaizen/tests/retriever/test_chunker.py | 101 ++++ 16 files changed, 2006 insertions(+), 30 deletions(-) create mode 100644 .kaizen/unit_test/.experiments/code_review/test_main.py create mode 100644 .kaizen/unit_test/.experiments/code_review/test_process_pr.py create mode 100644 .kaizen/unit_test/.experiments/code_review/test_save_review.py create mode 100644 .kaizen/unit_test/.kaizen/unit_test/kaizen/llms/test_llmprovider.py create mode 100644 .kaizen/unit_test/.kaizen/unit_test/kaizen/llms/test_set_all_loggers_to_error.py create mode 100644 .kaizen/unit_test/examples/code_fix/test_group_by_files.py create mode 100644 .kaizen/unit_test/extensions/vscode/src/test/extension.test.ts create mode 100644 .kaizen/unit_test/kaizen/helpers/test_format_add_linenum.py create mode 100644 .kaizen/unit_test/kaizen/helpers/test_patch_to_combined_chunks.py create mode 100644 .kaizen/unit_test/kaizen/tests/actions/test_review.py create mode 100644 .kaizen/unit_test/kaizen/tests/helpers/test_diff_to_numbered_lines.py create mode 100644 .kaizen/unit_test/kaizen/tests/helpers/test_output.py create mode 100644 .kaizen/unit_test/kaizen/tests/helpers/test_patch_parser.py create mode 100644 .kaizen/unit_test/kaizen/tests/llms/test_provider.py create mode 100644 .kaizen/unit_test/kaizen/tests/retriever/test_chunker.py diff --git a/.kaizen/unit_test/.experiments/code_review/test_main.py b/.kaizen/unit_test/.experiments/code_review/test_main.py new file mode 100644 index 00000000..fe5c2bee --- /dev/null +++ b/.kaizen/unit_test/.experiments/code_review/test_main.py @@ -0,0 +1,90 @@ +import os +import pytest +from unittest.mock import patch, MagicMock +from datetime import datetime +from tqdm import tqdm + +# Assuming the main function is in a file named main.py +from main import main + +@pytest.fixture +def mock_process_pr(): + with patch('main.process_pr') as mock: + yield mock + +@pytest.fixture +def mock_save_review(): + with patch('main.save_review') as mock: + yield mock + +@pytest.fixture +def mock_logger(): + with patch('main.logger') as mock: + yield mock + +def test_multiple_pr_urls(mock_process_pr, mock_save_review, mock_logger): + pr_urls = ['https://github.com/org/repo/pull/1', 'https://github.com/org/repo/pull/2'] + + mock_process_pr.return_value = ('review_desc', 'comments', 'issues', 'combined_diff_data') + + with patch('os.makedirs'), \ + patch('os.path.join', return_value='/mock/path'), \ + patch('datetime.datetime') as mock_datetime: + + mock_datetime.now.return_value = datetime(2023, 1, 1, 12, 0, 0) + + main(pr_urls) + + assert mock_process_pr.call_count == 2 + assert mock_save_review.call_count == 2 + mock_logger.info.assert_called_with("All PRs processed successfully") + +def test_empty_pr_urls(mock_process_pr, mock_save_review, mock_logger): + pr_urls = [] + + with patch('os.makedirs'), \ + patch('os.path.join', return_value='/mock/path'), \ + patch('datetime.datetime') as mock_datetime: + + mock_datetime.now.return_value = datetime(2023, 1, 1, 12, 0, 0) + + main(pr_urls) + + mock_process_pr.assert_not_called() + mock_save_review.assert_not_called() + mock_logger.info.assert_called_with("All PRs processed successfully") + +def test_process_pr_exception(mock_process_pr, mock_save_review, mock_logger): + pr_urls = ['https://github.com/org/repo/pull/1'] + + mock_process_pr.side_effect = Exception("Simulated error") + + with patch('os.makedirs'), \ + patch('os.path.join', return_value='/mock/path'), \ + patch('datetime.datetime') as mock_datetime, \ + pytest.raises(Exception): + + mock_datetime.now.return_value = datetime(2023, 1, 1, 12, 0, 0) + + main(pr_urls) + + mock_process_pr.assert_called_once() + mock_save_review.assert_not_called() + mock_logger.info.assert_not_called_with("All PRs processed successfully") + +@pytest.mark.parametrize("reeval_response", [True, False]) +def test_reeval_response_flag(mock_process_pr, mock_save_review, mock_logger, reeval_response): + pr_urls = ['https://github.com/org/repo/pull/1'] + + mock_process_pr.return_value = ('review_desc', 'comments', 'issues', 'combined_diff_data') + + with patch('os.makedirs'), \ + patch('os.path.join', return_value='/mock/path'), \ + patch('datetime.datetime') as mock_datetime: + + mock_datetime.now.return_value = datetime(2023, 1, 1, 12, 0, 0) + + main(pr_urls) + + mock_process_pr.assert_called_once_with(pr_urls[0], reeval_response=False) + mock_save_review.assert_called_once() \ No newline at end of file diff --git a/.kaizen/unit_test/.experiments/code_review/test_process_pr.py b/.kaizen/unit_test/.experiments/code_review/test_process_pr.py new file mode 100644 index 00000000..6ffd42d3 --- /dev/null +++ b/.kaizen/unit_test/.experiments/code_review/test_process_pr.py @@ -0,0 +1,103 @@ +import pytest +from unittest.mock import patch, MagicMock +from kaizen.reviewer.code_review import CodeReviewer +from kaizen.llms.provider import LLMProvider +from kaizen.formatters.code_review_formatter import create_pr_review_text +from github_app.github_helper.utils import get_diff_text, get_pr_files +from github_app.github_helper.pull_requests import create_review_comments +from .experiments.code_review.main import process_pr + +@pytest.fixture +def mock_dependencies(): + with patch('kaizen.reviewer.code_review.CodeReviewer') as mock_reviewer, \ + patch('kaizen.llms.provider.LLMProvider') as mock_llm_provider, \ + patch('github_app.github_helper.utils.get_diff_text') as mock_get_diff_text, \ + patch('github_app.github_helper.utils.get_pr_files') as mock_get_pr_files, \ + patch('github_app.github_helper.pull_requests.create_review_comments') as mock_create_review_comments, \ + patch('kaizen.formatters.code_review_formatter.create_pr_review_text') as mock_create_pr_review_text: + + yield { + 'reviewer': mock_reviewer, + 'llm_provider': mock_llm_provider, + 'get_diff_text': mock_get_diff_text, + 'get_pr_files': mock_get_pr_files, + 'create_review_comments': mock_create_review_comments, + 'create_pr_review_text': mock_create_pr_review_text + } + +def test_process_pr_normal_case(mock_dependencies): + # Arrange + pr_url = "https://github.com/org/repo/pull/123" + mock_diff_text = "Sample diff text" + mock_pr_files = [{"filename": "file1.py", "patch": "Sample patch"}] + mock_review_data = MagicMock( + topics={"important": ["Topic 1", "Topic 2"]}, + code_quality="Good", + model_name="gpt-4", + usage={"prompt_tokens": 100, "completion_tokens": 50}, + issues=["Issue 1", "Issue 2"] + ) + + mock_dependencies['get_diff_text'].return_value = mock_diff_text + mock_dependencies['get_pr_files'].return_value = mock_pr_files + mock_dependencies['reviewer'].return_value.review_pull_request.return_value = mock_review_data + mock_dependencies['create_review_comments'].return_value = (["Comment 1"], ["Topic 1", "Topic 2"]) + mock_dependencies['create_pr_review_text'].return_value = "Sample review text" + + # Act + review_desc, comments, issues, combined_diff_data = process_pr(pr_url) + + # Assert + assert "PR URL: https://github.com/org/repo/pull/123" in review_desc + assert "Sample review text" in review_desc + assert "Cost Usage (gpt-4)" in review_desc + assert comments == ["Comment 1"] + assert issues == ["Issue 1", "Issue 2"] + assert "File Name: file1.py" in combined_diff_data + assert "Patch Details: Sample patch" in combined_diff_data + +def test_process_pr_empty_files(mock_dependencies): + # Arrange + pr_url = "https://github.com/org/repo/pull/124" + mock_diff_text = "Sample diff text" + mock_pr_files = [] + mock_review_data = MagicMock( + topics={}, + code_quality="N/A", + model_name="gpt-3.5-turbo", + usage={"prompt_tokens": 50, "completion_tokens": 25}, + issues=[] + ) + + mock_dependencies['get_diff_text'].return_value = mock_diff_text + mock_dependencies['get_pr_files'].return_value = mock_pr_files + mock_dependencies['reviewer'].return_value.review_pull_request.return_value = mock_review_data + mock_dependencies['create_review_comments'].return_value = ([], []) + mock_dependencies['create_pr_review_text'].return_value = "No changes found" + + # Act + review_desc, comments, issues, combined_diff_data = process_pr(pr_url) + + # Assert + assert "PR URL: https://github.com/org/repo/pull/124" in review_desc + assert "No changes found" in review_desc + assert "Cost Usage (gpt-3.5-turbo)" in review_desc + assert comments == [] + assert issues == [] + assert combined_diff_data == "" + +@pytest.mark.parametrize("exception_class", [ValueError, ConnectionError, Exception]) +def test_process_pr_invalid_url(mock_dependencies, exception_class): + # Arrange + pr_url = "https://invalid-url.com/pr/123" + mock_dependencies['get_diff_text'].side_effect = exception_class("Error fetching diff") + + # Act & Assert + with pytest.raises(exception_class): + process_pr(pr_url) + + # Verify that the function attempts to get the diff text + mock_dependencies['get_diff_text'].assert_called_once() + # Verify that no further processing occurs after the exception + mock_dependencies['get_pr_files'].assert_not_called() + mock_dependencies['reviewer'].return_value.review_pull_request.assert_not_called() \ No newline at end of file diff --git a/.kaizen/unit_test/.experiments/code_review/test_save_review.py b/.kaizen/unit_test/.experiments/code_review/test_save_review.py new file mode 100644 index 00000000..65088d03 --- /dev/null +++ b/.kaizen/unit_test/.experiments/code_review/test_save_review.py @@ -0,0 +1,103 @@ +import os +import json +import pytest +from unittest import mock +from unittest.mock import patch + +# Assuming the save_review function is imported from the specified path +from .experiments.code_review.main import save_review + +@pytest.fixture +def setup_folder(tmp_path): + # Create a temporary directory for testing + return tmp_path + +def test_save_review_valid_data(setup_folder): + pr_number = 123 + review_desc = "This is a review description." + comments = [{"line": 10, "comment": "Looks good!"}] + issues = [{"issue": "Variable not used", "line": 15}] + folder = setup_folder + combined_diff_data = "diff --git a/file.txt b/file.txt" + + save_review(pr_number, review_desc, comments, issues, str(folder), combined_diff_data) + + # Verify files were created with correct content + review_file = os.path.join(folder, f"pr_{pr_number}", "review.md") + comments_file = os.path.join(folder, f"pr_{pr_number}", "comments.json") + issues_file = os.path.join(folder, f"pr_{pr_number}", "issues.json") + combined_diff = os.path.join(folder, f"pr_{pr_number}", "combined_diff.txt") + + assert os.path.exists(review_file) + assert os.path.exists(comments_file) + assert os.path.exists(issues_file) + assert os.path.exists(combined_diff) + + with open(review_file, "r") as f: + assert f.read() == review_desc + + with open(comments_file, "r") as f: + assert json.load(f) == comments + + with open(issues_file, "r") as f: + assert json.load(f) == issues + + with open(combined_diff, "r") as f: + assert f.read() == combined_diff_data + +def test_save_review_empty_data(setup_folder): + pr_number = 456 + review_desc = "" + comments = [] + issues = [] + folder = setup_folder + combined_diff_data = "" + + save_review(pr_number, review_desc, comments, issues, str(folder), combined_diff_data) + + # Verify files were created with correct content + review_file = os.path.join(folder, f"pr_{pr_number}", "review.md") + comments_file = os.path.join(folder, f"pr_{pr_number}", "comments.json") + issues_file = os.path.join(folder, f"pr_{pr_number}", "issues.json") + combined_diff = os.path.join(folder, f"pr_{pr_number}", "combined_diff.txt") + + assert os.path.exists(review_file) + assert os.path.exists(comments_file) + assert os.path.exists(issues_file) + assert os.path.exists(combined_diff) + + with open(review_file, "r") as f: + assert f.read() == review_desc + + with open(comments_file, "r") as f: + assert json.load(f) == comments + + with open(issues_file, "r") as f: + assert json.load(f) == issues + + with open(combined_diff, "r") as f: + assert f.read() == combined_diff_data + +def test_save_review_empty_folder_path(): + pr_number = 789 + review_desc = "Review description" + comments = [{"line": 20, "comment": "Needs improvement"}] + issues = [{"issue": "Syntax error", "line": 25}] + folder = "" + combined_diff_data = "diff --git a/file2.txt b/file2.txt" + + with pytest.raises(FileNotFoundError): + save_review(pr_number, review_desc, comments, issues, folder, combined_diff_data) + +@patch("os.makedirs") +def test_save_review_invalid_folder_path(mock_makedirs): + mock_makedirs.side_effect = OSError("Invalid folder path") + pr_number = 101 + review_desc = "Another review description" + comments = [{"line": 30, "comment": "Check this"}] + issues = [{"issue": "Deprecated function", "line": 35}] + folder = "/invalid/folder/path" + combined_diff_data = "diff --git a/file3.txt b/file3.txt" + + with pytest.raises(OSError, match="Invalid folder path"): + save_review(pr_number, review_desc, comments, issues, folder, combined_diff_data) \ No newline at end of file diff --git a/.kaizen/unit_test/.kaizen/unit_test/kaizen/llms/test_llmprovider.py b/.kaizen/unit_test/.kaizen/unit_test/kaizen/llms/test_llmprovider.py new file mode 100644 index 00000000..abbc0053 --- /dev/null +++ b/.kaizen/unit_test/.kaizen/unit_test/kaizen/llms/test_llmprovider.py @@ -0,0 +1,90 @@ +# test_llm_provider.py + +import pytest +from unittest.mock import patch, MagicMock +from kaizen.llms.provider import LLMProvider +from kaizen.utils.config import ConfigData +from litellm import Router +import os + + +@pytest.fixture +def mock_config_data(): + return { + "language_model": { + "models": [ + {"model_name": "default", "litellm_params": {"model": "gpt-4o-mini"}} + ], + "redis_enabled": False, + "enable_observability_logging": False, + } + } + + +@pytest.fixture +def mock_litellm(): + with patch("kaizen.llms.provider.litellm") as mock: + mock.token_counter.return_value = 100 + mock.get_max_tokens.return_value = 4000 + mock.cost_per_token.return_value = (0.01, 0.02) + yield mock + + +@pytest.fixture +def llm_provider(mock_config_data, mock_litellm): + with patch.object(ConfigData, "get_config_data", return_value=mock_config_data): + return LLMProvider() + + +def test_initialization(llm_provider): + assert llm_provider.system_prompt is not None + assert llm_provider.model_config == {"model": "gpt-4o-mini"} + assert llm_provider.default_temperature == 0.3 + + +def test_validate_config_correct_setup(llm_provider): + assert llm_provider.models[0]["model_name"] == "default" + + +def test_validate_config_missing_language_model(): + with patch.object(ConfigData, "get_config_data", return_value={}): + with pytest.raises( + ValueError, match="Missing 'language_model' in configuration" + ): + LLMProvider() + + +def test_token_limit_check_with_valid_prompt(llm_provider, mock_litellm): + assert llm_provider.is_inside_token_limit("Test prompt") is True + + +def test_available_tokens_calculation(llm_provider, mock_litellm): + assert llm_provider.available_tokens("Test message") == 3200 + + +def test_usage_cost_calculation(llm_provider, mock_litellm): + total_usage = {"prompt_tokens": 100, "completion_tokens": 200} + cost = llm_provider.get_usage_cost(total_usage) + assert cost == (0.01, 0.02) + + +def test_setup_redis_missing_env_vars(): + with patch.dict(os.environ, {}, clear=True): + with patch.object( + ConfigData, + "get_config_data", + return_value={"language_model": {"redis_enabled": True}}, + ): + with pytest.raises( + ValueError, + match="Redis is enabled but REDIS_HOST or REDIS_PORT environment variables are missing", + ): + LLMProvider() + + +def test_token_limit_check_boundary_condition(llm_provider, mock_litellm): + mock_litellm.token_counter.return_value = 3200 + assert ( + llm_provider.is_inside_token_limit("Boundary test prompt", percentage=0.8) + is True + ) diff --git a/.kaizen/unit_test/.kaizen/unit_test/kaizen/llms/test_set_all_loggers_to_error.py b/.kaizen/unit_test/.kaizen/unit_test/kaizen/llms/test_set_all_loggers_to_error.py new file mode 100644 index 00000000..649e1882 --- /dev/null +++ b/.kaizen/unit_test/.kaizen/unit_test/kaizen/llms/test_set_all_loggers_to_error.py @@ -0,0 +1,43 @@ +import logging +import pytest + +# Assuming the function is in the module kaizen/llms/provider.py +from kaizen.llms.provider import set_all_loggers_to_ERROR + + +@pytest.fixture +def setup_loggers(): + # Setup: Create some loggers with different levels + loggers = { + "logger1": logging.getLogger("logger1"), + "logger2": logging.getLogger("logger2"), + "logger3": logging.getLogger("logger3"), + } + loggers["logger1"].setLevel(logging.DEBUG) + loggers["logger2"].setLevel(logging.INFO) + loggers["logger3"].setLevel(logging.WARNING) + + yield loggers + + # Teardown: Reset loggers to default level (WARNING) + for logger in loggers.values(): + logger.setLevel(logging.WARNING) + + +def test_set_all_loggers_to_ERROR(setup_loggers): + # Test: Verify all existing loggers are set to ERROR level + set_all_loggers_to_ERROR() + + for name, logger in setup_loggers.items(): + assert logger.level == logging.ERROR, f"Logger {name} not set to ERROR level" + + +def test_no_loggers_present(monkeypatch): + # Edge Case: Handle scenario where no loggers are present + # Mock the loggerDict to simulate no loggers + monkeypatch.setattr(logging.Logger.manager, "loggerDict", {}) + + set_all_loggers_to_ERROR() + + # Verify no errors occur and loggerDict is still empty + assert logging.Logger.manager.loggerDict == {}, "LoggerDict should be empty" diff --git a/.kaizen/unit_test/examples/code_fix/test_group_by_files.py b/.kaizen/unit_test/examples/code_fix/test_group_by_files.py new file mode 100644 index 00000000..aea4e0b9 --- /dev/null +++ b/.kaizen/unit_test/examples/code_fix/test_group_by_files.py @@ -0,0 +1,46 @@ +# test_group_by_files.py + +import pytest +from examples.code_fix.main import group_by_files + +def test_group_by_single_file(): + issues = [ + {"file_path": "file1.py", "issue": "error1"}, + {"file_path": "file1.py", "issue": "error2"} + ] + expected = { + "file1.py": [ + {"file_path": "file1.py", "issue": "error1"}, + {"file_path": "file1.py", "issue": "error2"} + ] + } + assert group_by_files(issues) == expected + +def test_group_by_multiple_files(): + issues = [ + {"file_path": "file1.py", "issue": "error1"}, + {"file_path": "file2.py", "issue": "error2"}, + {"file_path": "file1.py", "issue": "error3"} + ] + expected = { + "file1.py": [ + {"file_path": "file1.py", "issue": "error1"}, + {"file_path": "file1.py", "issue": "error3"} + ], + "file2.py": [ + {"file_path": "file2.py", "issue": "error2"} + ] + } + assert group_by_files(issues) == expected + +def test_empty_issues_list(): + issues = [] + expected = {} + assert group_by_files(issues) == expected + +def test_issue_without_file_path_key(): + issues = [ + {"issue": "error1"} + ] + with pytest.raises(KeyError): + group_by_files(issues) \ No newline at end of file diff --git a/.kaizen/unit_test/extensions/vscode/src/test/extension.test.ts b/.kaizen/unit_test/extensions/vscode/src/test/extension.test.ts new file mode 100644 index 00000000..4ca0ab41 --- /dev/null +++ b/.kaizen/unit_test/extensions/vscode/src/test/extension.test.ts @@ -0,0 +1,15 @@ +import * as assert from 'assert'; + +// You can import and use all API from the 'vscode' module +// as well as import your extension to test it +import * as vscode from 'vscode'; +// import * as myExtension from '../../extension'; + +suite('Extension Test Suite', () => { + vscode.window.showInformationMessage('Start all tests.'); + + test('Sample test', () => { + assert.strictEqual(-1, [1, 2, 3].indexOf(5)); + assert.strictEqual(-1, [1, 2, 3].indexOf(0)); + }); +}); diff --git a/.kaizen/unit_test/kaizen/helpers/test_format_add_linenum.py b/.kaizen/unit_test/kaizen/helpers/test_format_add_linenum.py new file mode 100644 index 00000000..13b54fcc --- /dev/null +++ b/.kaizen/unit_test/kaizen/helpers/test_format_add_linenum.py @@ -0,0 +1,23 @@ +import pytest +from kaizen.helpers.parser import format_add_linenum + +@pytest.mark.parametrize("new_num, content, expected", [ + (123, "Sample content", "123 Sample content"), + (None, "Sample content", " Sample content"), + (12345, "Sample content", "12345 Sample content"), + (123, "", "123 "), + (None, "", " "), +]) +def test_format_add_linenum_basic_cases(new_num, content, expected): + assert format_add_linenum(new_num, content) == expected + +def test_format_add_linenum_multiline_content(): + multiline_content = "Line 1\nLine 2\nLine 3" + expected = "123 Line 1\n123 Line 2\n123 Line 3" + result = "\n".join(format_add_linenum(123, line) for line in multiline_content.split("\n")) + assert result == expected + +def test_format_add_linenum_ignore_deletions(): + # Since ignore_deletions is not used, this test checks that it doesn't affect the output + assert format_add_linenum(123, "Content", ignore_deletions=True) == "123 Content" + assert format_add_linenum(123, "Content", ignore_deletions=False) == "123 Content" \ No newline at end of file diff --git a/.kaizen/unit_test/kaizen/helpers/test_patch_to_combined_chunks.py b/.kaizen/unit_test/kaizen/helpers/test_patch_to_combined_chunks.py new file mode 100644 index 00000000..62759e11 --- /dev/null +++ b/.kaizen/unit_test/kaizen/helpers/test_patch_to_combined_chunks.py @@ -0,0 +1,67 @@ +import pytest +from kaizen.helpers.parser import patch_to_combined_chunks + +# Mock the format_change function used in the source code +def format_change(file_name, line_num, change_type, content, ignore_deletions): + return f"{change_type}: {content}" + +@pytest.fixture +def mock_format_change(monkeypatch): + monkeypatch.setattr("builtins.format_change", format_change) + +def test_empty_patch_text(mock_format_change): + result = patch_to_combined_chunks("") + assert result == "" + +def test_single_file_addition(mock_format_change): + patch_text = """diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -0,0 +1 @@ ++new line +""" + expected_output = "\n[FILE_START] file.txt\n\nUPDATED: new line" + result = patch_to_combined_chunks(patch_text) + assert result == expected_output + +def test_single_file_deletion(mock_format_change): + patch_text = """diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1 +0,0 @@ +-old line +""" + expected_output = "\n[FILE_START] file.txt\n\nREMOVED: old line" + result = patch_to_combined_chunks(patch_text) + assert result == expected_output + +def test_ignore_deletions(mock_format_change): + patch_text = """diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1 +0,0 @@ +-old line +""" + expected_output = "\n[FILE_START] file.txt\n" + result = patch_to_combined_chunks(patch_text, ignore_deletions=True) + assert result == expected_output + +def test_context_lines(mock_format_change): + patch_text = """diff --git a/file.txt b/file.txt +--- a/file.txt ++++ b/file.txt +@@ -1,3 +1,3 @@ + line 1 +-line 2 ++line 2 updated + line 3 +""" + expected_output = ( + "\n[FILE_START] file.txt\n\n" + "CONTEXT: line 1\n" + "REMOVED: line 2\n" + "UPDATED: line 2 updated\n" + "CONTEXT: line 3" + ) + result = patch_to_combined_chunks(patch_text) + assert result == expected_output \ No newline at end of file diff --git a/.kaizen/unit_test/kaizen/llms/test_set_all_loggers_to_error.py b/.kaizen/unit_test/kaizen/llms/test_set_all_loggers_to_error.py index 649e1882..fa7eebe7 100644 --- a/.kaizen/unit_test/kaizen/llms/test_set_all_loggers_to_error.py +++ b/.kaizen/unit_test/kaizen/llms/test_set_all_loggers_to_error.py @@ -1,43 +1,47 @@ import logging import pytest -# Assuming the function is in the module kaizen/llms/provider.py +# Import the function from the specified path from kaizen.llms.provider import set_all_loggers_to_ERROR - @pytest.fixture def setup_loggers(): - # Setup: Create some loggers with different levels - loggers = { - "logger1": logging.getLogger("logger1"), - "logger2": logging.getLogger("logger2"), - "logger3": logging.getLogger("logger3"), - } - loggers["logger1"].setLevel(logging.DEBUG) - loggers["logger2"].setLevel(logging.INFO) - loggers["logger3"].setLevel(logging.WARNING) - - yield loggers - - # Teardown: Reset loggers to default level (WARNING) + # Create a few loggers for testing + logger_names = ['test_logger_1', 'test_logger_2', 'test_placeholder'] + loggers = {name: logging.getLogger(name) for name in logger_names} + + # Set initial levels to something other than ERROR for logger in loggers.values(): - logger.setLevel(logging.WARNING) - + logger.setLevel(logging.INFO) + + # Add a placeholder logger + logging.Logger.manager.loggerDict['test_placeholder'] = logging.PlaceHolder(None) + + yield loggers + + # Cleanup: Remove the loggers after the test + for name in logger_names: + logging.Logger.manager.loggerDict.pop(name, None) def test_set_all_loggers_to_ERROR(setup_loggers): - # Test: Verify all existing loggers are set to ERROR level + # Run the function to set all loggers to ERROR set_all_loggers_to_ERROR() - + + # Check that all real loggers are set to ERROR for name, logger in setup_loggers.items(): - assert logger.level == logging.ERROR, f"Logger {name} not set to ERROR level" - - -def test_no_loggers_present(monkeypatch): - # Edge Case: Handle scenario where no loggers are present - # Mock the loggerDict to simulate no loggers - monkeypatch.setattr(logging.Logger.manager, "loggerDict", {}) - + if isinstance(logger, logging.Logger): + assert logger.level == logging.ERROR, f"Logger {name} is not set to ERROR" + else: + # Ensure placeholders are not affected + assert isinstance(logger, logging.PlaceHolder), f"Logger {name} should be a placeholder" + +def test_handle_placeholder_loggers_gracefully(): + # Add a placeholder logger + placeholder_name = 'test_placeholder' + logging.Logger.manager.loggerDict[placeholder_name] = logging.PlaceHolder(None) + + # Run the function set_all_loggers_to_ERROR() - - # Verify no errors occur and loggerDict is still empty - assert logging.Logger.manager.loggerDict == {}, "LoggerDict should be empty" + + # Ensure no exceptions are raised and placeholders remain unchanged + assert isinstance(logging.Logger.manager.loggerDict[placeholder_name], logging.PlaceHolder), "Placeholder logger should remain unchanged" \ No newline at end of file diff --git a/.kaizen/unit_test/kaizen/tests/actions/test_review.py b/.kaizen/unit_test/kaizen/tests/actions/test_review.py new file mode 100644 index 00000000..ddc7485b --- /dev/null +++ b/.kaizen/unit_test/kaizen/tests/actions/test_review.py @@ -0,0 +1,71 @@ +import pytest +import json +from kaizen.reviewer.code_review import CodeReviewer +from unittest.mock import Mock +from kaizen.llms.provider import LLMProvider +from kaizen.llms.prompts.code_review_prompts import CODE_REVIEW_PROMPT + +with open("tests/data/actions/valid_review.json") as f: + data = json.load(f) + + +# @pytest.mark.parametrize("valid_review", data) +# def test_review_pull_request(valid_review): +# # Act +# code_reviewer = CodeReviewer() +# result = code_reviewer.review_pull_request( +# valid_review["input"]["diff"], +# valid_review["input"]["title"], +# valid_review["input"]["description"], +# pull_request_files=[], +# user="pytest", +# ) +# review = code_reviewer.create_pr_review_text(result.topics) + +# assert fuzz.ratio(review, valid_review["output"]) > 95 + + +@pytest.fixture +def code_reviewer(): + code_reviewer = CodeReviewer() + mock_provider = Mock(spec=LLMProvider) + code_reviewer.provider = mock_provider + return code_reviewer + + +def test_is_code_review_prompt_within_limit_true(code_reviewer): + diff_text = "sample diff text" + pull_request_title = "Sample Pull Request Title" + pull_request_desc = "Sample Pull Request Description" + prompt = CODE_REVIEW_PROMPT.format( + PULL_REQUEST_TITLE=pull_request_title, + PULL_REQUEST_DESC=pull_request_desc, + CODE_DIFF=diff_text, + ) + code_reviewer.provider.is_inside_token_limit.return_value = True + + result = code_reviewer.is_code_review_prompt_within_limit( + diff_text, pull_request_title, pull_request_desc + ) + + assert result + code_reviewer.provider.is_inside_token_limit.assert_called_once_with(PROMPT=prompt) + + +def test_is_code_review_prompt_within_limit_false(code_reviewer): + diff_text = "very long diff text" * 1000 + pull_request_title = "Sample Pull Request Title" + pull_request_desc = "Sample Pull Request Description" + prompt = CODE_REVIEW_PROMPT.format( + PULL_REQUEST_TITLE=pull_request_title, + PULL_REQUEST_DESC=pull_request_desc, + CODE_DIFF=diff_text, + ) + code_reviewer.provider.is_inside_token_limit.return_value = False + + result = code_reviewer.is_code_review_prompt_within_limit( + diff_text, pull_request_title, pull_request_desc + ) + + assert not result + code_reviewer.provider.is_inside_token_limit.assert_called_once_with(PROMPT=prompt) diff --git a/.kaizen/unit_test/kaizen/tests/helpers/test_diff_to_numbered_lines.py b/.kaizen/unit_test/kaizen/tests/helpers/test_diff_to_numbered_lines.py new file mode 100644 index 00000000..eff04aa8 --- /dev/null +++ b/.kaizen/unit_test/kaizen/tests/helpers/test_diff_to_numbered_lines.py @@ -0,0 +1,530 @@ +from kaizen.helpers.parser import patch_to_numbered_lines + +patch_data = ''' +From b82fcf4f6392a54bc8bfa6d099fb838f9293f448 Mon Sep 17 00:00:00 2001 +From: Saurav Panda +Date: Tue, 16 Jul 2024 21:56:02 -0700 +Subject: [PATCH] fix: updated the work summary prompt to merge multiple + summaries + +--- + examples/work_summarizer/main.py | 2 +- + kaizen/llms/prompts/work_summary_prompts.py | 22 ++++++++++++++++++++- + kaizen/reviewer/work_summarizer.py | 6 +++++- + pyproject.toml | 2 +- + 4 files changed, 28 insertions(+), 4 deletions(-) + +diff --git a/examples/work_summarizer/main.py b/examples/work_summarizer/main.py +index 1436a23..ffd0282 100644 +--- a/examples/work_summarizer/main.py ++++ b/examples/work_summarizer/main.py +@@ -8,7 +8,7 @@ + + # Get the current date and calculate the date 14 days ago + current_date = datetime.now(timezone.utc).date() +-since_date = current_date - timedelta(days=7) ++since_date = current_date - timedelta(days=14) + + # Convert the date to ISO format + since_date_iso = since_date.isoformat() +diff --git a/kaizen/llms/prompts/work_summary_prompts.py b/kaizen/llms/prompts/work_summary_prompts.py +index acedda0..e35d38c 100644 +--- a/kaizen/llms/prompts/work_summary_prompts.py ++++ b/kaizen/llms/prompts/work_summary_prompts.py +@@ -27,7 +27,7 @@ + }} + + estimated_time: its the range of time you think the above work might have taken for a developer. example "10-15hrs" +-details: its list of important changes in human readable term so that anyone can understand how the software has been impacted. ++details: its list of changes in human readable term so that anyone can understand how the software has been impacted. + + Guidelines: + 1. Give a high-level overview of the goal. +@@ -41,6 +41,26 @@ + PATCH DATA: {PATCH_DATA} + """ + ++MERGE_WORK_SUMMARY_PROMPT = """ ++Merge all this information into the following output format. ++ ++OUTPUT Format: ++{{ ++ "summary": "", ++ "details": ["", ...], ++ "todo": ["", ...], ++ "future_considerations": ["", ...], ++ "estimated_time": ++}} ++ ++estimated_time: its the range of time you think the above work might have taken for a developer in hours, be little generous. example "10-15hrs" ++details: its list of changes in human readable term so that anyone can understand how the software has been impacted. ++ ++All the summaries: ++ ++{SUMMARY_JSON} ++""" ++ + TWITTER_POST_PROMPT = """ + Given the following work summary, create a concise and engaging Twitter post (max 280 characters) that highlights the key changes or improvements. Format the post as markdown, enclosed in triple backticks: + +diff --git a/kaizen/reviewer/work_summarizer.py b/kaizen/reviewer/work_summarizer.py +index ecacc57..df6082e 100644 +--- a/kaizen/reviewer/work_summarizer.py ++++ b/kaizen/reviewer/work_summarizer.py +@@ -6,8 +6,10 @@ + WORK_SUMMARY_SYSTEM_PROMPT, + TWITTER_POST_PROMPT, + LINKEDIN_POST_PROMPT, ++ MERGE_WORK_SUMMARY_PROMPT + ) + import logging ++import json + + + class WorkSummaryGenerator: +@@ -55,7 +57,9 @@ def generate_work_summaries( + + if len(summaries) > 1: + # TODO Merge summaries +- pass ++ prompt = MERGE_WORK_SUMMARY_PROMPT.format(SUMMARY_JSON=json.dumps(summaries)) ++ response, usage = self.provider.chat_completion_with_json(prompt, user=user) ++ summaries = [response] + + return {"summary": summaries[0], "usage": self.total_usage} + +diff --git a/pyproject.toml b/pyproject.toml +index 95c754f..4b35bcc 100644 +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -1,6 +1,6 @@ + [tool.poetry] + name = "kaizen-cloudcode" +-version = "0.3.9" ++version = "0.3.10" + description = "An intelligent coding companion that accelerates your development workflow by providing efficient assistance, enabling you to craft high-quality code more rapidly." + authors = ["Saurav Panda "] + license = "Apache2.0" +''' + +patch_data2 = ''' +From d3f483e4f6a9d3b6322e0baaeb1d1bc15fed3cc6 Mon Sep 17 00:00:00 2001 +From: Saurav Panda +Date: Thu, 18 Jul 2024 09:42:03 -0700 +Subject: [PATCH] feat: added standard output format for description generation + +--- + kaizen/generator/pr_description.py | 51 ++------ + kaizen/llms/prompts/code_review_prompts.py | 140 --------------------- + kaizen/llms/prompts/pr_desc_prompts.py | 92 ++++++++++++++ + 3 files changed, 105 insertions(+), 178 deletions(-) + create mode 100644 kaizen/llms/prompts/pr_desc_prompts.py + +diff --git a/kaizen/generator/pr_description.py b/kaizen/generator/pr_description.py +index e4f548c..e8a4635 100644 +--- a/kaizen/generator/pr_description.py ++++ b/kaizen/generator/pr_description.py +@@ -5,12 +5,11 @@ + + from kaizen.helpers import output, parser + from kaizen.llms.provider import LLMProvider +-from kaizen.llms.prompts.code_review_prompts import ( ++from kaizen.llms.prompts.pr_desc_prompts import ( + PR_DESCRIPTION_PROMPT, + MERGE_PR_DESCRIPTION_PROMPT, + PR_FILE_DESCRIPTION_PROMPT, +- PR_DESC_EVALUATION_PROMPT, +- CODE_REVIEW_SYSTEM_PROMPT, ++ PR_DESCRIPTION_SYSTEM_PROMPT, + ) + + +@@ -26,7 +25,7 @@ class PRDescriptionGenerator: + def __init__(self, llm_provider: LLMProvider): + self.logger = logging.getLogger(__name__) + self.provider = llm_provider +- self.provider.system_prompt = CODE_REVIEW_SYSTEM_PROMPT ++ self.provider.system_prompt = PR_DESCRIPTION_SYSTEM_PROMPT + self.total_usage = { + "prompt_tokens": 0, + "completion_tokens": 0, +@@ -40,7 +39,6 @@ def generate_pull_request_desc( + pull_request_desc: str, + pull_request_files: List[Dict], + user: Optional[str] = None, +- reeval_response: bool = False, + ) -> DescOutput: + prompt = PR_DESCRIPTION_PROMPT.format( + PULL_REQUEST_TITLE=pull_request_title, +@@ -51,14 +49,13 @@ def generate_pull_request_desc( + raise Exception("Both diff_text and pull_request_files are empty!") + + if diff_text and self.provider.is_inside_token_limit(PROMPT=prompt): +- desc = self._process_full_diff(prompt, user, reeval_response) ++ desc = self._process_full_diff(prompt, user) + else: + desc = self._process_files( + pull_request_files, + pull_request_title, + pull_request_desc, + user, +- reeval_response, + ) + + body = output.create_pr_description(desc, pull_request_desc) +@@ -77,15 +74,13 @@ def _process_full_diff( + self, + prompt: str, + user: Optional[str], +- reeval_response: bool, + ) -> str: + self.logger.debug("Processing directly from diff") +- resp, usage = self.provider.chat_completion_with_json(prompt, user=user) ++ resp, usage = self.provider.chat_completion(prompt, user=user) ++ desc = parser.extract_code_from_markdown(resp) + self.total_usage = self.provider.update_usage(self.total_usage, usage) + +- if reeval_response: +- resp = self._reevaluate_response(prompt, resp, user) +- return resp["desc"] ++ return desc + + def _process_files( + self, +@@ -93,7 +88,6 @@ def _process_files( + pull_request_title: str, + pull_request_desc: str, + user: Optional[str], +- reeval_response: bool, + ) -> List[Dict]: + self.logger.debug("Processing based on files") + file_descs = [] +@@ -102,15 +96,15 @@ def _process_files( + pull_request_title, + pull_request_desc, + user, +- reeval_response, + ): + file_descs.extend(file_review) + + prompt = MERGE_PR_DESCRIPTION_PROMPT.format(DESCS=json.dumps(file_descs)) +- resp, usage = self.provider.chat_completion_with_json(prompt, user=user) ++ resp, usage = self.provider.chat_completion(prompt, user=user) ++ desc = parser.extract_code_from_markdown(resp) + self.total_usage = self.provider.update_usage(self.total_usage, usage) + +- return resp["desc"] ++ return desc + + def _process_files_generator( + self, +@@ -118,7 +112,6 @@ def _process_files_generator( + pull_request_title: str, + pull_request_desc: str, + user: Optional[str], +- reeval_response: bool, + ) -> Generator[List[Dict], None, None]: + combined_diff_data = "" + available_tokens = self.provider.available_tokens( +@@ -151,7 +144,6 @@ def _process_files_generator( + pull_request_title, + pull_request_desc, + user, +- reeval_response, + ) + combined_diff_data = ( + f"\n---->\nFile Name: {filename}\nPatch Details: {patch_details}" +@@ -163,7 +155,6 @@ def _process_files_generator( + pull_request_title, + pull_request_desc, + user, +- reeval_response, + ) + + def _process_file_chunk( +@@ -172,30 +163,14 @@ def _process_file_chunk( + pull_request_title: str, + pull_request_desc: str, + user: Optional[str], +- reeval_response: bool, + ) -> List[Dict]: + prompt = PR_FILE_DESCRIPTION_PROMPT.format( + PULL_REQUEST_TITLE=pull_request_title, + PULL_REQUEST_DESC=pull_request_desc, + CODE_DIFF=diff_data, + ) +- resp, usage = self.provider.chat_completion_with_json(prompt, user=user) ++ resp, usage = self.provider.chat_completion(prompt, user=user) ++ desc = parser.extract_code_from_markdown(resp) + self.total_usage = self.provider.update_usage(self.total_usage, usage) + +- if reeval_response: +- resp = self._reevaluate_response(prompt, resp, user) +- +- return resp["desc"] +- +- def _reevaluate_response(self, prompt: str, resp: str, user: Optional[str]) -> str: +- messages = [ +- {"role": "system", "content": self.provider.system_prompt}, +- {"role": "user", "content": prompt}, +- {"role": "assistant", "content": resp}, +- {"role": "user", "content": PR_DESC_EVALUATION_PROMPT}, +- ] +- resp, usage = self.provider.chat_completion( +- prompt, user=user, messages=messages +- ) +- self.total_usage = self.provider.update_usage(self.total_usage, usage) +- return resp ++ return desc +diff --git a/kaizen/llms/prompts/code_review_prompts.py b/kaizen/llms/prompts/code_review_prompts.py +index 04c1181..5942fff 100644 +--- a/kaizen/llms/prompts/code_review_prompts.py ++++ b/kaizen/llms/prompts/code_review_prompts.py +@@ -166,146 +166,6 @@ + ```{FILE_PATCH}``` + """ + +-PR_DESCRIPTION_PROMPT = """ +-As a skilled developer reviewing a pull request, generate a concise and well-formatted description summarizing the main purpose, scope of changes, significant modifications, refactoring, or new features introduced in the pull request. +- +-Provide the output in the following JSON format: +- +-{{ +- "desc": " +-### Summary +- +- +- +-### Details +- +- +-- List of key changes +-- New features +-- Refactoring details +- " +-}} +- +-When generating the description: +- +-- Create a concise and clear summary highlighting the main purpose of the pull request. +-- Use markdown formatting in the detailed description for better readability. +-- Organize the details into relevant sections or bullet points. +-- Focus on the most significant aspects of the changes. +-- Avoid repeating information already present in the pull request title or description. +-- Ensure the output is in valid JSON format. +- +-Based on the provided information: +- +-Pull Request Title: {PULL_REQUEST_TITLE} +-Pull Request Description: {PULL_REQUEST_DESC} +-Patch Data: +-{CODE_DIFF} +- +-Analyze the information thoroughly and generate a comprehensive summary and detailed description. +-Use your expertise to identify and highlight the most important aspects of the changes without asking for additional clarification. If certain details are unclear, make reasonable inferences based on the available information and your development experience. +- +-""" +- +-PR_FILE_DESCRIPTION_PROMPT = """ +-As a skilled developer reviewing a pull request, generate a concise and well-formatted description summarizing the main purpose, scope of changes, significant modifications, refactoring, or new features introduced in the pull request. +- +-Provide the output in the following JSON format: +- +-{{ +- "desc": " +-### Summary +- +- +- +-### Details +- +- +-- List of key changes +-- New features +-- Refactoring details +- " +-}} +- +-When generating the description: +- +-- Create a concise and clear summary highlighting the main purpose of the pull request. +-- Use markdown formatting in the detailed description for better readability. +-- Organize the details into relevant sections or bullet points. +-- Focus on the most significant aspects of the changes. +-- Avoid repeating information already present in the pull request title or description. +-- Ensure the output is in valid JSON format. +- +-Based on the provided information: +- +-Pull Request Title: {PULL_REQUEST_TITLE} +-Pull Request Description: {PULL_REQUEST_DESC} +-Patch Data: +-{CODE_DIFF} +- +-Analyze the information thoroughly and generate a comprehensive summary and detailed description. +-Use your expertise to identify and highlight the most important aspects of the changes without asking for additional clarification. If certain details are unclear, make reasonable inferences based on the available information and your development experience. +-""" +- +-MERGE_PR_DESCRIPTION_PROMPT = """ +-As a skilled developer reviewing a pull request, generate a concise and well-formatted description that synthesizes multiple PR descriptions into a single, comprehensive summary. This summary should encapsulate the main purpose, scope of changes, significant modifications, refactoring, and new features introduced in the pull request. +- +-Using the provided PR descriptions in JSON format, create a merged PR Description in the following JSON format: +- +-{{ +- "desc": " +-### Summary +- +- +- +-### Details +- +- +-- Consolidated list of key changes +-- Aggregated new features +-- Combined refactoring details +-- Other significant aspects from all descriptions +- " +-}} +- +-When generating the merged description: +- +-- Create a concise yet comprehensive summary that captures the essence of all provided descriptions. +-- Use markdown formatting in the detailed description for improved readability. +-- Organize the details into relevant sections or bullet points, consolidating similar information from different descriptions. +-- Focus on the most significant aspects of the changes across all descriptions. +-- Eliminate redundancies and repetitions while ensuring all unique and important points are included. +-- Ensure the output is in valid JSON format. +- +-Analyze the provided PR descriptions thoroughly and generate a unified, comprehensive summary and detailed description. Use your expertise to identify, merge, and highlight the most important aspects of the changes across all descriptions. If certain details seem contradictory or unclear, use your best judgment to provide the most accurate and coherent representation of the pull request's purpose and changes. +- +-Here is the information: +-{DESCS} +-""" +- +-PR_DESC_EVALUATION_PROMPT = """ +-Please evaluate the accuracy and completeness of your previous responses in this conversation. +-Identify any potential errors or areas for improvement. +- +-Respond the JSON output as: +-{{ +- "desc": " +-### Summary +- +- +- +-### Details +- +- +-- Consolidated list of key changes +-- Aggregated new features +-- Combined refactoring details +-- Other significant aspects from all descriptions +- " +-}} +- +-""" +- + + PR_REVIEW_EVALUATION_PROMPT = """ + Please evaluate the accuracy and completeness of your previous responses in this conversation. +diff --git a/kaizen/llms/prompts/pr_desc_prompts.py b/kaizen/llms/prompts/pr_desc_prompts.py +new file mode 100644 +index 0000000..3800d67 +--- /dev/null ++++ b/kaizen/llms/prompts/pr_desc_prompts.py +@@ -0,0 +1,92 @@ ++PR_DESCRIPTION_SYSTEM_PROMPT = """ ++As a senior software developer reviewing code submissions, provide thorough, constructive feedback and suggestions for improvements. Consider best practices, error handling, performance, readability, and maintainability. Offer objective and respectful reviews that help developers enhance their skills and code quality. Use your expertise to provide comprehensive feedback without asking clarifying questions. ++""" ++ ++PR_DESCRIPTION_PROMPT = """ ++Summarize the main purpose, scope of changes, significant modifications, refactoring, or new features in this pull request. ++ ++Output Format: ++```markdown ++# {{Generated PR Title}} ++ ++## Overview ++{{Brief summary of overall purpose}} ++ ++## Changes ++- Key Changes: {{List main modifications}} ++- New Features: {{List key new features}} ++- Refactoring: {{List main refactoring changes}} ++``` ++ ++Instructions: ++- Create a concise summary of the PR's main purpose. ++- Use markdown formatting for readability. ++- Focus on significant changes and avoid repetition. ++ ++Based on: ++Title: {PULL_REQUEST_TITLE} ++Description: {PULL_REQUEST_DESC} ++Patch: ++{CODE_DIFF} ++ ++Analyze the information and generate a comprehensive summary. Make reasonable inferences for unclear details based on your development experience. ++""" ++ ++PR_FILE_DESCRIPTION_PROMPT = """ ++Summarize the main purpose, scope of changes, significant modifications, refactoring, or new features in this pull request file. ++ ++Output Format: ++```markdown ++# {{Generated PR Title}} ++ ++## Overview ++{{Brief summary of file changes}} ++ ++## Details ++- Main Changes: {{List key modifications}} ++- New Features: {{List new features, if any}} ++- Refactoring: {{List refactoring changes, if any}} ++``` ++ ++Instructions: ++- Create a concise summary of the file changes. ++- Use markdown formatting for readability. ++- Focus on significant changes and avoid repetition. ++ ++Based on: ++Title: {PULL_REQUEST_TITLE} ++Description: {PULL_REQUEST_DESC} ++Patch: ++{CODE_DIFF} ++ ++Analyze the information and generate a comprehensive summary. Make reasonable inferences for unclear details based on your development experience. ++""" ++ ++MERGE_PR_DESCRIPTION_PROMPT = """ ++Synthesize multiple PR descriptions into a single, comprehensive summary. Create a markdown-formatted description that captures the main purpose, scope of changes, and significant modifications. ++ ++Output Format: ++```markdown ++# {{Generated PR Title}} ++ ++## Overview ++{{Brief summary of overall purpose}} ++ ++## Changes ++- New Features: {{List key new features}} ++- Refactoring: {{List main refactoring changes}} ++- Other Changes: {{List other significant modifications}} ++``` ++ ++Instructions: ++- Capture the essence of all descriptions concisely. ++- Use markdown formatting for readability. ++- Organize details into the specified sections. ++- Focus on the most significant aspects across all descriptions. ++- Ensure all unique and important points are included. ++ ++Analyze the provided PR descriptions and generate a unified summary. Use your judgment to resolve any contradictions or unclear points. ++ ++Here is the information: ++{DESCS} ++""" +''' + +print(patch_to_numbered_lines(patch_text=patch_data)) diff --git a/.kaizen/unit_test/kaizen/tests/helpers/test_output.py b/.kaizen/unit_test/kaizen/tests/helpers/test_output.py new file mode 100644 index 00000000..a71aaf08 --- /dev/null +++ b/.kaizen/unit_test/kaizen/tests/helpers/test_output.py @@ -0,0 +1,90 @@ +import pytest + + +@pytest.fixture +def test_data(): + return { + "review": [ + { + "topic": "Code Quality", + "comment": "The code is well-structured and easy to read.", + "reasoning": "The code follows best practices and coding standards.", + "confidence": "High", + }, + { + "topic": "Performance", + "comment": "The code could be optimized for better performance.", + "reasoning": "There are some inefficient loops and data structures used.", + "confidence": "Medium", + }, + { + "topic": "Performance", + "comment": "The code could be optimized for better performance.", + "reasoning": "There are some inefficient loops and data structures used.", + "confidence": "Medium", + }, + { + "topic": "Security", + "comment": "NA", + "reasoning": "NA", + "confidence": "NA", + }, + ] + } + + +# def test_json_to_markdown(test_data, capfd): +# logging.getLogger().setLevel(logging.ERROR) +# expected_output = "## Code Review Feedback\n\n" +# expected_output += "### Code Quality\n\n" +# expected_output += ( +# PR_COLLAPSIBLE_TEMPLATE.format( +# comment="The code is well-structured and easy to read.", +# reasoning="The code follows best practices and coding standards.", +# confidence="High", +# file_name="NA", +# start_line="NA", +# end_line="NA", +# ) +# + "\n" +# ) +# expected_output += "### Performance\n\n" +# expected_output += ( +# PR_COLLAPSIBLE_TEMPLATE.format( +# comment="The code could be optimized for better performance.", +# reasoning="There are some inefficient loops and data structures used.", +# confidence="Medium", +# file_name="NA", +# start_line="NA", +# end_line="NA", +# ) +# + "\n" +# + PR_COLLAPSIBLE_TEMPLATE.format( +# comment="The code could be optimized for better performance.", +# reasoning="There are some inefficient loops and data structures used.", +# confidence="Medium", +# file_name="NA", +# start_line="NA", +# end_line="NA", +# ) +# + "\n" +# ) +# expected_output += "### Security\n\n" +# expected_output += ( +# PR_COLLAPSIBLE_TEMPLATE.format( +# comment="NA", +# reasoning="NA", +# confidence="NA", +# file_name="NA", +# start_line="NA", +# end_line="NA", +# ) +# + "\n" +# ) + +# reviewer = CodeReviewer() +# output = reviewer.merge_topics(test_data["review"]) +# text = reviewer.create_pr_review_text(output) +# captured = capfd.readouterr() +# assert text == expected_output +# assert captured.out == "" diff --git a/.kaizen/unit_test/kaizen/tests/helpers/test_patch_parser.py b/.kaizen/unit_test/kaizen/tests/helpers/test_patch_parser.py new file mode 100644 index 00000000..8cee4165 --- /dev/null +++ b/.kaizen/unit_test/kaizen/tests/helpers/test_patch_parser.py @@ -0,0 +1,530 @@ +from kaizen.helpers.parser import patch_to_combined_chunks + +patch_data = ''' +From b82fcf4f6392a54bc8bfa6d099fb838f9293f448 Mon Sep 17 00:00:00 2001 +From: Saurav Panda +Date: Tue, 16 Jul 2024 21:56:02 -0700 +Subject: [PATCH] fix: updated the work summary prompt to merge multiple + summaries + +--- + examples/work_summarizer/main.py | 2 +- + kaizen/llms/prompts/work_summary_prompts.py | 22 ++++++++++++++++++++- + kaizen/reviewer/work_summarizer.py | 6 +++++- + pyproject.toml | 2 +- + 4 files changed, 28 insertions(+), 4 deletions(-) + +diff --git a/examples/work_summarizer/main.py b/examples/work_summarizer/main.py +index 1436a23..ffd0282 100644 +--- a/examples/work_summarizer/main.py ++++ b/examples/work_summarizer/main.py +@@ -8,7 +8,7 @@ + + # Get the current date and calculate the date 14 days ago + current_date = datetime.now(timezone.utc).date() +-since_date = current_date - timedelta(days=7) ++since_date = current_date - timedelta(days=14) + + # Convert the date to ISO format + since_date_iso = since_date.isoformat() +diff --git a/kaizen/llms/prompts/work_summary_prompts.py b/kaizen/llms/prompts/work_summary_prompts.py +index acedda0..e35d38c 100644 +--- a/kaizen/llms/prompts/work_summary_prompts.py ++++ b/kaizen/llms/prompts/work_summary_prompts.py +@@ -27,7 +27,7 @@ + }} + + estimated_time: its the range of time you think the above work might have taken for a developer. example "10-15hrs" +-details: its list of important changes in human readable term so that anyone can understand how the software has been impacted. ++details: its list of changes in human readable term so that anyone can understand how the software has been impacted. + + Guidelines: + 1. Give a high-level overview of the goal. +@@ -41,6 +41,26 @@ + PATCH DATA: {PATCH_DATA} + """ + ++MERGE_WORK_SUMMARY_PROMPT = """ ++Merge all this information into the following output format. ++ ++OUTPUT Format: ++{{ ++ "summary": "", ++ "details": ["", ...], ++ "todo": ["", ...], ++ "future_considerations": ["", ...], ++ "estimated_time": ++}} ++ ++estimated_time: its the range of time you think the above work might have taken for a developer in hours, be little generous. example "10-15hrs" ++details: its list of changes in human readable term so that anyone can understand how the software has been impacted. ++ ++All the summaries: ++ ++{SUMMARY_JSON} ++""" ++ + TWITTER_POST_PROMPT = """ + Given the following work summary, create a concise and engaging Twitter post (max 280 characters) that highlights the key changes or improvements. Format the post as markdown, enclosed in triple backticks: + +diff --git a/kaizen/reviewer/work_summarizer.py b/kaizen/reviewer/work_summarizer.py +index ecacc57..df6082e 100644 +--- a/kaizen/reviewer/work_summarizer.py ++++ b/kaizen/reviewer/work_summarizer.py +@@ -6,8 +6,10 @@ + WORK_SUMMARY_SYSTEM_PROMPT, + TWITTER_POST_PROMPT, + LINKEDIN_POST_PROMPT, ++ MERGE_WORK_SUMMARY_PROMPT + ) + import logging ++import json + + + class WorkSummaryGenerator: +@@ -55,7 +57,9 @@ def generate_work_summaries( + + if len(summaries) > 1: + # TODO Merge summaries +- pass ++ prompt = MERGE_WORK_SUMMARY_PROMPT.format(SUMMARY_JSON=json.dumps(summaries)) ++ response, usage = self.provider.chat_completion_with_json(prompt, user=user) ++ summaries = [response] + + return {"summary": summaries[0], "usage": self.total_usage} + +diff --git a/pyproject.toml b/pyproject.toml +index 95c754f..4b35bcc 100644 +--- a/pyproject.toml ++++ b/pyproject.toml +@@ -1,6 +1,6 @@ + [tool.poetry] + name = "kaizen-cloudcode" +-version = "0.3.9" ++version = "0.3.10" + description = "An intelligent coding companion that accelerates your development workflow by providing efficient assistance, enabling you to craft high-quality code more rapidly." + authors = ["Saurav Panda "] + license = "Apache2.0" +''' + +patch_data2 = ''' +From d3f483e4f6a9d3b6322e0baaeb1d1bc15fed3cc6 Mon Sep 17 00:00:00 2001 +From: Saurav Panda +Date: Thu, 18 Jul 2024 09:42:03 -0700 +Subject: [PATCH] feat: added standard output format for description generation + +--- + kaizen/generator/pr_description.py | 51 ++------ + kaizen/llms/prompts/code_review_prompts.py | 140 --------------------- + kaizen/llms/prompts/pr_desc_prompts.py | 92 ++++++++++++++ + 3 files changed, 105 insertions(+), 178 deletions(-) + create mode 100644 kaizen/llms/prompts/pr_desc_prompts.py + +diff --git a/kaizen/generator/pr_description.py b/kaizen/generator/pr_description.py +index e4f548c..e8a4635 100644 +--- a/kaizen/generator/pr_description.py ++++ b/kaizen/generator/pr_description.py +@@ -5,12 +5,11 @@ + + from kaizen.helpers import output, parser + from kaizen.llms.provider import LLMProvider +-from kaizen.llms.prompts.code_review_prompts import ( ++from kaizen.llms.prompts.pr_desc_prompts import ( + PR_DESCRIPTION_PROMPT, + MERGE_PR_DESCRIPTION_PROMPT, + PR_FILE_DESCRIPTION_PROMPT, +- PR_DESC_EVALUATION_PROMPT, +- CODE_REVIEW_SYSTEM_PROMPT, ++ PR_DESCRIPTION_SYSTEM_PROMPT, + ) + + +@@ -26,7 +25,7 @@ class PRDescriptionGenerator: + def __init__(self, llm_provider: LLMProvider): + self.logger = logging.getLogger(__name__) + self.provider = llm_provider +- self.provider.system_prompt = CODE_REVIEW_SYSTEM_PROMPT ++ self.provider.system_prompt = PR_DESCRIPTION_SYSTEM_PROMPT + self.total_usage = { + "prompt_tokens": 0, + "completion_tokens": 0, +@@ -40,7 +39,6 @@ def generate_pull_request_desc( + pull_request_desc: str, + pull_request_files: List[Dict], + user: Optional[str] = None, +- reeval_response: bool = False, + ) -> DescOutput: + prompt = PR_DESCRIPTION_PROMPT.format( + PULL_REQUEST_TITLE=pull_request_title, +@@ -51,14 +49,13 @@ def generate_pull_request_desc( + raise Exception("Both diff_text and pull_request_files are empty!") + + if diff_text and self.provider.is_inside_token_limit(PROMPT=prompt): +- desc = self._process_full_diff(prompt, user, reeval_response) ++ desc = self._process_full_diff(prompt, user) + else: + desc = self._process_files( + pull_request_files, + pull_request_title, + pull_request_desc, + user, +- reeval_response, + ) + + body = output.create_pr_description(desc, pull_request_desc) +@@ -77,15 +74,13 @@ def _process_full_diff( + self, + prompt: str, + user: Optional[str], +- reeval_response: bool, + ) -> str: + self.logger.debug("Processing directly from diff") +- resp, usage = self.provider.chat_completion_with_json(prompt, user=user) ++ resp, usage = self.provider.chat_completion(prompt, user=user) ++ desc = parser.extract_code_from_markdown(resp) + self.total_usage = self.provider.update_usage(self.total_usage, usage) + +- if reeval_response: +- resp = self._reevaluate_response(prompt, resp, user) +- return resp["desc"] ++ return desc + + def _process_files( + self, +@@ -93,7 +88,6 @@ def _process_files( + pull_request_title: str, + pull_request_desc: str, + user: Optional[str], +- reeval_response: bool, + ) -> List[Dict]: + self.logger.debug("Processing based on files") + file_descs = [] +@@ -102,15 +96,15 @@ def _process_files( + pull_request_title, + pull_request_desc, + user, +- reeval_response, + ): + file_descs.extend(file_review) + + prompt = MERGE_PR_DESCRIPTION_PROMPT.format(DESCS=json.dumps(file_descs)) +- resp, usage = self.provider.chat_completion_with_json(prompt, user=user) ++ resp, usage = self.provider.chat_completion(prompt, user=user) ++ desc = parser.extract_code_from_markdown(resp) + self.total_usage = self.provider.update_usage(self.total_usage, usage) + +- return resp["desc"] ++ return desc + + def _process_files_generator( + self, +@@ -118,7 +112,6 @@ def _process_files_generator( + pull_request_title: str, + pull_request_desc: str, + user: Optional[str], +- reeval_response: bool, + ) -> Generator[List[Dict], None, None]: + combined_diff_data = "" + available_tokens = self.provider.available_tokens( +@@ -151,7 +144,6 @@ def _process_files_generator( + pull_request_title, + pull_request_desc, + user, +- reeval_response, + ) + combined_diff_data = ( + f"\n---->\nFile Name: {filename}\nPatch Details: {patch_details}" +@@ -163,7 +155,6 @@ def _process_files_generator( + pull_request_title, + pull_request_desc, + user, +- reeval_response, + ) + + def _process_file_chunk( +@@ -172,30 +163,14 @@ def _process_file_chunk( + pull_request_title: str, + pull_request_desc: str, + user: Optional[str], +- reeval_response: bool, + ) -> List[Dict]: + prompt = PR_FILE_DESCRIPTION_PROMPT.format( + PULL_REQUEST_TITLE=pull_request_title, + PULL_REQUEST_DESC=pull_request_desc, + CODE_DIFF=diff_data, + ) +- resp, usage = self.provider.chat_completion_with_json(prompt, user=user) ++ resp, usage = self.provider.chat_completion(prompt, user=user) ++ desc = parser.extract_code_from_markdown(resp) + self.total_usage = self.provider.update_usage(self.total_usage, usage) + +- if reeval_response: +- resp = self._reevaluate_response(prompt, resp, user) +- +- return resp["desc"] +- +- def _reevaluate_response(self, prompt: str, resp: str, user: Optional[str]) -> str: +- messages = [ +- {"role": "system", "content": self.provider.system_prompt}, +- {"role": "user", "content": prompt}, +- {"role": "assistant", "content": resp}, +- {"role": "user", "content": PR_DESC_EVALUATION_PROMPT}, +- ] +- resp, usage = self.provider.chat_completion( +- prompt, user=user, messages=messages +- ) +- self.total_usage = self.provider.update_usage(self.total_usage, usage) +- return resp ++ return desc +diff --git a/kaizen/llms/prompts/code_review_prompts.py b/kaizen/llms/prompts/code_review_prompts.py +index 04c1181..5942fff 100644 +--- a/kaizen/llms/prompts/code_review_prompts.py ++++ b/kaizen/llms/prompts/code_review_prompts.py +@@ -166,146 +166,6 @@ + ```{FILE_PATCH}``` + """ + +-PR_DESCRIPTION_PROMPT = """ +-As a skilled developer reviewing a pull request, generate a concise and well-formatted description summarizing the main purpose, scope of changes, significant modifications, refactoring, or new features introduced in the pull request. +- +-Provide the output in the following JSON format: +- +-{{ +- "desc": " +-### Summary +- +- +- +-### Details +- +- +-- List of key changes +-- New features +-- Refactoring details +- " +-}} +- +-When generating the description: +- +-- Create a concise and clear summary highlighting the main purpose of the pull request. +-- Use markdown formatting in the detailed description for better readability. +-- Organize the details into relevant sections or bullet points. +-- Focus on the most significant aspects of the changes. +-- Avoid repeating information already present in the pull request title or description. +-- Ensure the output is in valid JSON format. +- +-Based on the provided information: +- +-Pull Request Title: {PULL_REQUEST_TITLE} +-Pull Request Description: {PULL_REQUEST_DESC} +-Patch Data: +-{CODE_DIFF} +- +-Analyze the information thoroughly and generate a comprehensive summary and detailed description. +-Use your expertise to identify and highlight the most important aspects of the changes without asking for additional clarification. If certain details are unclear, make reasonable inferences based on the available information and your development experience. +- +-""" +- +-PR_FILE_DESCRIPTION_PROMPT = """ +-As a skilled developer reviewing a pull request, generate a concise and well-formatted description summarizing the main purpose, scope of changes, significant modifications, refactoring, or new features introduced in the pull request. +- +-Provide the output in the following JSON format: +- +-{{ +- "desc": " +-### Summary +- +- +- +-### Details +- +- +-- List of key changes +-- New features +-- Refactoring details +- " +-}} +- +-When generating the description: +- +-- Create a concise and clear summary highlighting the main purpose of the pull request. +-- Use markdown formatting in the detailed description for better readability. +-- Organize the details into relevant sections or bullet points. +-- Focus on the most significant aspects of the changes. +-- Avoid repeating information already present in the pull request title or description. +-- Ensure the output is in valid JSON format. +- +-Based on the provided information: +- +-Pull Request Title: {PULL_REQUEST_TITLE} +-Pull Request Description: {PULL_REQUEST_DESC} +-Patch Data: +-{CODE_DIFF} +- +-Analyze the information thoroughly and generate a comprehensive summary and detailed description. +-Use your expertise to identify and highlight the most important aspects of the changes without asking for additional clarification. If certain details are unclear, make reasonable inferences based on the available information and your development experience. +-""" +- +-MERGE_PR_DESCRIPTION_PROMPT = """ +-As a skilled developer reviewing a pull request, generate a concise and well-formatted description that synthesizes multiple PR descriptions into a single, comprehensive summary. This summary should encapsulate the main purpose, scope of changes, significant modifications, refactoring, and new features introduced in the pull request. +- +-Using the provided PR descriptions in JSON format, create a merged PR Description in the following JSON format: +- +-{{ +- "desc": " +-### Summary +- +- +- +-### Details +- +- +-- Consolidated list of key changes +-- Aggregated new features +-- Combined refactoring details +-- Other significant aspects from all descriptions +- " +-}} +- +-When generating the merged description: +- +-- Create a concise yet comprehensive summary that captures the essence of all provided descriptions. +-- Use markdown formatting in the detailed description for improved readability. +-- Organize the details into relevant sections or bullet points, consolidating similar information from different descriptions. +-- Focus on the most significant aspects of the changes across all descriptions. +-- Eliminate redundancies and repetitions while ensuring all unique and important points are included. +-- Ensure the output is in valid JSON format. +- +-Analyze the provided PR descriptions thoroughly and generate a unified, comprehensive summary and detailed description. Use your expertise to identify, merge, and highlight the most important aspects of the changes across all descriptions. If certain details seem contradictory or unclear, use your best judgment to provide the most accurate and coherent representation of the pull request's purpose and changes. +- +-Here is the information: +-{DESCS} +-""" +- +-PR_DESC_EVALUATION_PROMPT = """ +-Please evaluate the accuracy and completeness of your previous responses in this conversation. +-Identify any potential errors or areas for improvement. +- +-Respond the JSON output as: +-{{ +- "desc": " +-### Summary +- +- +- +-### Details +- +- +-- Consolidated list of key changes +-- Aggregated new features +-- Combined refactoring details +-- Other significant aspects from all descriptions +- " +-}} +- +-""" +- + + PR_REVIEW_EVALUATION_PROMPT = """ + Please evaluate the accuracy and completeness of your previous responses in this conversation. +diff --git a/kaizen/llms/prompts/pr_desc_prompts.py b/kaizen/llms/prompts/pr_desc_prompts.py +new file mode 100644 +index 0000000..3800d67 +--- /dev/null ++++ b/kaizen/llms/prompts/pr_desc_prompts.py +@@ -0,0 +1,92 @@ ++PR_DESCRIPTION_SYSTEM_PROMPT = """ ++As a senior software developer reviewing code submissions, provide thorough, constructive feedback and suggestions for improvements. Consider best practices, error handling, performance, readability, and maintainability. Offer objective and respectful reviews that help developers enhance their skills and code quality. Use your expertise to provide comprehensive feedback without asking clarifying questions. ++""" ++ ++PR_DESCRIPTION_PROMPT = """ ++Summarize the main purpose, scope of changes, significant modifications, refactoring, or new features in this pull request. ++ ++Output Format: ++```markdown ++# {{Generated PR Title}} ++ ++## Overview ++{{Brief summary of overall purpose}} ++ ++## Changes ++- Key Changes: {{List main modifications}} ++- New Features: {{List key new features}} ++- Refactoring: {{List main refactoring changes}} ++``` ++ ++Instructions: ++- Create a concise summary of the PR's main purpose. ++- Use markdown formatting for readability. ++- Focus on significant changes and avoid repetition. ++ ++Based on: ++Title: {PULL_REQUEST_TITLE} ++Description: {PULL_REQUEST_DESC} ++Patch: ++{CODE_DIFF} ++ ++Analyze the information and generate a comprehensive summary. Make reasonable inferences for unclear details based on your development experience. ++""" ++ ++PR_FILE_DESCRIPTION_PROMPT = """ ++Summarize the main purpose, scope of changes, significant modifications, refactoring, or new features in this pull request file. ++ ++Output Format: ++```markdown ++# {{Generated PR Title}} ++ ++## Overview ++{{Brief summary of file changes}} ++ ++## Details ++- Main Changes: {{List key modifications}} ++- New Features: {{List new features, if any}} ++- Refactoring: {{List refactoring changes, if any}} ++``` ++ ++Instructions: ++- Create a concise summary of the file changes. ++- Use markdown formatting for readability. ++- Focus on significant changes and avoid repetition. ++ ++Based on: ++Title: {PULL_REQUEST_TITLE} ++Description: {PULL_REQUEST_DESC} ++Patch: ++{CODE_DIFF} ++ ++Analyze the information and generate a comprehensive summary. Make reasonable inferences for unclear details based on your development experience. ++""" ++ ++MERGE_PR_DESCRIPTION_PROMPT = """ ++Synthesize multiple PR descriptions into a single, comprehensive summary. Create a markdown-formatted description that captures the main purpose, scope of changes, and significant modifications. ++ ++Output Format: ++```markdown ++# {{Generated PR Title}} ++ ++## Overview ++{{Brief summary of overall purpose}} ++ ++## Changes ++- New Features: {{List key new features}} ++- Refactoring: {{List main refactoring changes}} ++- Other Changes: {{List other significant modifications}} ++``` ++ ++Instructions: ++- Capture the essence of all descriptions concisely. ++- Use markdown formatting for readability. ++- Organize details into the specified sections. ++- Focus on the most significant aspects across all descriptions. ++- Ensure all unique and important points are included. ++ ++Analyze the provided PR descriptions and generate a unified summary. Use your judgment to resolve any contradictions or unclear points. ++ ++Here is the information: ++{DESCS} ++""" +''' + +print(patch_to_combined_chunks(patch_text=patch_data2, ignore_deletions=True)) diff --git a/.kaizen/unit_test/kaizen/tests/llms/test_provider.py b/.kaizen/unit_test/kaizen/tests/llms/test_provider.py new file mode 100644 index 00000000..eeb08656 --- /dev/null +++ b/.kaizen/unit_test/kaizen/tests/llms/test_provider.py @@ -0,0 +1,70 @@ +import pytest +from unittest.mock import patch +from kaizen.llms.provider import LLMProvider + + +@pytest.fixture +def mock_config_data(): + with patch("kaizen.utils.config.ConfigData") as MockConfigData: + mock_config = MockConfigData.return_value + mock_config.get_config_data.return_value = { + "language_model": { + "default_model_config": {"model": "gpt-3.5-turbo-1106"}, + "enable_observability_logging": True, + } + } + yield mock_config + + +@pytest.fixture +def llm_provider(mock_config_data): + return LLMProvider() + + +def test_initialization(llm_provider): + assert llm_provider.model == "gpt-3.5-turbo-1106" + assert llm_provider.model_config == {"model": "gpt-3.5-turbo-1106"} + + +@patch("kaizen.llms.provider.litellm.completion") +def test_chat_completion(mock_completion, llm_provider): + mock_completion.return_value = { + "choices": [{"message": {"content": "response"}}], + "usage": {"prompt_tokens": 10, "completion_tokens": 10}, + } + response, usage = llm_provider.chat_completion("test prompt") + assert response is not None + assert usage is not None + + +@patch("kaizen.llms.provider.litellm.token_counter") +@patch("kaizen.llms.provider.litellm.get_max_tokens") +def test_is_inside_token_limit(mock_get_max_tokens, mock_token_counter, llm_provider): + mock_token_counter.return_value = 100 + mock_get_max_tokens.return_value = 150 + + # Including system prompt in the calculation + system_prompt_length = len(llm_provider.system_prompt.split()) + user_prompt_length = len("test prompt".split()) + total_length = system_prompt_length + user_prompt_length + + mock_token_counter.return_value = total_length + + assert llm_provider.is_inside_token_limit("test prompt") is True + + mock_token_counter.return_value = 120 + assert llm_provider.is_inside_token_limit("test prompt") is False + + +@patch("kaizen.llms.provider.litellm.token_counter") +@patch("kaizen.llms.provider.litellm.get_max_tokens") +def test_available_tokens(mock_get_max_tokens, mock_token_counter, llm_provider): + mock_token_counter.return_value = 100 + mock_get_max_tokens.return_value = 150 + assert llm_provider.available_tokens("test message") == 20 + + +@patch("kaizen.llms.provider.litellm.token_counter") +def test_get_token_count(mock_token_counter, llm_provider): + mock_token_counter.return_value = 50 + assert llm_provider.get_token_count("test message") == 50 diff --git a/.kaizen/unit_test/kaizen/tests/retriever/test_chunker.py b/.kaizen/unit_test/kaizen/tests/retriever/test_chunker.py new file mode 100644 index 00000000..54e6405d --- /dev/null +++ b/.kaizen/unit_test/kaizen/tests/retriever/test_chunker.py @@ -0,0 +1,101 @@ +from kaizen.retriever.code_chunker import chunk_code +import json + + +# Example usage +python_code = """ +import math + +def square(x): + return x * x + +class Circle: + def __init__(self, radius): + self.radius = radius + + def area(self): + return math.pi * square(self.radius) + +if __name__ == "__main__": + c = Circle(5) + print(f"Area: {c.area()}") +""" + +javascript_code = """ +import Math from 'math'; + +function square(x) { + return x * x; +} + +class Circle { + constructor(radius) { + this.radius = radius; + } + + area() { + return Math.PI * square(this.radius); + } +} + +const c = new Circle(5); +console.log(`Area: ${c.area()}`); +""" + +# Example usage +react_nextjs_code = """ +import React, { useState, useEffect } from 'react'; +import Head from 'next/head'; + +function useCustomHook() { + const [value, setValue] = useState(0); + return [value, setValue]; +} + +function HomePage() { + const [count, setCount] = useCustomHook(); + + useEffect(() => { + document.title = `Count: ${count}`; + }, [count]); + + return ( +
+ + Home Page + +

Welcome to Next.js!

+

Count: {count}

+ +
+ ); +} + +export default HomePage; +""" + + +def print_chunks(language, chunks): + print(f"\n{language.capitalize()} Chunks:") + print(json.dumps(chunks, indent=2)) + # print("\nFunctions:") + # for name, func in chunks["functions"].items(): + # print(f"\n{name}:\n{func}") + + # print("\nClasses:") + # for name, class_info in chunks["classes"].items(): + # print(f"\n{name}:") + # print(f"Definition:\n{class_info['definition']}") + # print("Methods:") + # for method_name, method in class_info["methods"].items(): + # print(f"\n {method_name}:\n{method}") + + # print("\nOther Blocks:") + # for i, block in enumerate(chunks["other_blocks"], 1): + # print(f"\nBlock {i}:\n{block}") + + +print_chunks("Python", chunk_code(python_code, "python")) +print_chunks("JavaScript", chunk_code(javascript_code, "javascript")) +print_chunks("JavaScript", chunk_code(javascript_code, "javascript")) +print_chunks("React", chunk_code(react_nextjs_code, "javascript"))