From 655d98e487e9b184c88a5a41d78653d1ea89ee33 Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Tue, 17 Sep 2024 11:31:22 -0700 Subject: [PATCH 1/3] code_review demo --- examples/demo/main.py | 67 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 examples/demo/main.py diff --git a/examples/demo/main.py b/examples/demo/main.py new file mode 100644 index 00000000..dd466e53 --- /dev/null +++ b/examples/demo/main.py @@ -0,0 +1,67 @@ +import random +import time + +# Global variable +GLOBAL_COUNTER = 0 + + +def process_data(data): + global GLOBAL_COUNTER + result = [] + for i in range(len(data)): + item = data[i] + if item % 2 == 0: + result.append(item * 2) + else: + result.append(item * 3) + GLOBAL_COUNTER += 1 + return result + + +def fetch_user_data(user_id): + # Simulating a database query + time.sleep(2) + return {"id": user_id, "name": f"User{user_id}", "score": random.randint(1, 100)} + + +def calculate_average(numbers): + total = 0 + count = 0 + for num in numbers: + total += num + count += 1 + return total / count if count > 0 else 0 + + +def write_to_file(filename, content): + f = open(filename, "w") + f.write(content) + f.close() + + +def main(): + data = [1, 2, 3, 4, 5] + processed_data = process_data(data) + print("Processed data:", processed_data) + + user_data = fetch_user_data(123) + print("User data:", user_data) + + numbers = [10, 20, 30, 40, 50] + avg = calculate_average(numbers) + print("Average:", avg) + + write_to_file("output.txt", "Hello, World!") + + unused_var = 42 + print("Unused variable:", unused_var) + + db_password = "password123" + print("DB Password:", db_password) + + squared_numbers = [num * num for num in range(100)] + print("Squared numbers:", squared_numbers) + + +if __name__ == "__main__": + main() From d25ce227088c86a445b9c463d840c2aa171676d4 Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Tue, 17 Sep 2024 22:36:47 -0700 Subject: [PATCH 2/3] feat: enhance commit message generation Introduce a command for generating commit messages based on diffs and improve configuration management. - Added `generate_commit_msg` command for staged changes. - Updated config to use `KAIZEN_` prefix instead of `MYAPP_`. - Removed `black` pre-commit hook from configuration. - Adjusted token limits and model defaults in LLMProvider. - Enhanced installation of git hooks for destination directories. These changes streamline the commit process and improve the usability of the CLI tool. --- .pre-commit-config.yaml | 6 ------ cli/kaizen_cli/cli.py | 3 ++- cli/kaizen_cli/commands/reviewer_commands.py | 20 ++++++++++++++++++++ cli/kaizen_cli/config/manager.py | 2 +- cli/kaizen_cli/hooks/prepare-commit-msg | 12 +++++++++--- cli/kaizen_cli/hooks/setup.py | 5 ++++- examples/code_review/main.py | 7 ++++--- github_app/github_helper/pull_requests.py | 4 ++-- kaizen/generator/pr_description.py | 2 +- kaizen/llms/provider.py | 16 ++++++++++++---- 10 files changed, 55 insertions(+), 22 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 748c8157..0c56ed05 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,9 +4,3 @@ repos: hooks: - id: flake8 args: [--config=.flake8] - - - repo: https://github.com/psf/black - rev: 23.3.0 - hooks: - - id: black - args: [--line-length=88] \ No newline at end of file diff --git a/cli/kaizen_cli/cli.py b/cli/kaizen_cli/cli.py index 580850a4..fff9c185 100644 --- a/cli/kaizen_cli/cli.py +++ b/cli/kaizen_cli/cli.py @@ -2,7 +2,7 @@ from .config.manager import load_config from .commands.config_commands import config from .commands.unit_test_commands import unit_test -from .commands.reviewer_commands import reviewer +from .commands.reviewer_commands import reviewer, generate_commit_msg from .hooks.setup import hooks from kaizen.generator.e2e_tests import E2ETestGenerator @@ -25,6 +25,7 @@ def ui_tests(url): cli.add_command(unit_test) cli.add_command(reviewer) cli.add_command(hooks) +cli.add_command(generate_commit_msg) if __name__ == "__main__": cli() diff --git a/cli/kaizen_cli/commands/reviewer_commands.py b/cli/kaizen_cli/commands/reviewer_commands.py index 98936bb9..7e3d44b2 100644 --- a/cli/kaizen_cli/commands/reviewer_commands.py +++ b/cli/kaizen_cli/commands/reviewer_commands.py @@ -1,4 +1,7 @@ import click +from kaizen.generator.pr_description import PRDescriptionGenerator +from kaizen.llms.provider import LLMProvider +from ..config.manager import load_config @click.group() @@ -14,3 +17,20 @@ def work(github_url, branch): """Run reviewer work""" click.echo(f"Reviewing {github_url} on branch {branch}") # Implement the reviewer work logic here + + +@click.command() +@click.argument("diff", type=str, required=True) +def generate_commit_msg(diff): + """Generate a commit message based on the provided diff""" + model_config = load_config()["language_model"]["models"][0]["litellm_params"] + generator = PRDescriptionGenerator(LLMProvider(model_config=model_config)) + desc = generator.generate_pull_request_desc( + diff_text=diff, + pull_request_title="", + pull_request_desc="", + pull_request_files=[], + user="", + ) + msg, _, _ = generator.generate_pr_commit_message(desc) + click.echo(f'{msg["subject"]}\n\n{msg["body"]}') diff --git a/cli/kaizen_cli/config/manager.py b/cli/kaizen_cli/config/manager.py index 8b10390e..c5446fb6 100644 --- a/cli/kaizen_cli/config/manager.py +++ b/cli/kaizen_cli/config/manager.py @@ -24,7 +24,7 @@ def load_config(): # Override with environment variables for key, value in os.environ.items(): - if key.startswith("MYAPP_"): + if key.startswith("KAIZEN_"): config_key = key[6:].lower().split("__") try: parsed_value = json.loads(value) diff --git a/cli/kaizen_cli/hooks/prepare-commit-msg b/cli/kaizen_cli/hooks/prepare-commit-msg index 42dbf5c9..9f25445d 100644 --- a/cli/kaizen_cli/hooks/prepare-commit-msg +++ b/cli/kaizen_cli/hooks/prepare-commit-msg @@ -1,8 +1,14 @@ #!/bin/sh # hooks/prepare-commit-msg +# Change to the repository root directory +cd "$(git rev-parse --show-toplevel)" || exit 1 + +# Get the staged changes +staged_diff=$(git diff --cached) + # Run your CLI command and capture the output -commit_msg=$(kaizen-cli generate-commit-msg) +commit_info=$(kaizen-cli generate-commit-msg "$staged_diff") -# Overwrite the commit message file with the generated message -echo "$commit_msg" > "$1" \ No newline at end of file +# Write the commit info to the commit message file +echo "$commit_info" > "$1" \ No newline at end of file diff --git a/cli/kaizen_cli/hooks/setup.py b/cli/kaizen_cli/hooks/setup.py index 107f64c3..fd4682a9 100644 --- a/cli/kaizen_cli/hooks/setup.py +++ b/cli/kaizen_cli/hooks/setup.py @@ -15,7 +15,8 @@ def hooks(): @click.argument("hook_type", type=click.Choice(HOOK_TYPES)) def install(hook_type): """Install a specific git hook""" - source = os.path.join(os.path.dirname(__file__), "hooks", hook_type) + source = os.path.join(os.path.dirname(__file__), hook_type) + print(source) destination = os.path.join(".git", "hooks", hook_type) if not os.path.exists(source): @@ -23,6 +24,8 @@ def install(hook_type): return try: + # Create the destination directory if it doesn't exist + os.makedirs(os.path.dirname(destination), exist_ok=True) shutil.copy(source, destination) os.chmod(destination, 0o755) click.echo(f"{hook_type} hook installed successfully") diff --git a/examples/code_review/main.py b/examples/code_review/main.py index 08fb9c69..0d0320a9 100644 --- a/examples/code_review/main.py +++ b/examples/code_review/main.py @@ -12,8 +12,8 @@ logging.basicConfig(level="DEBUG") -pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/335.patch" -pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/335/files" +pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/559.patch" +pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/559/files" pr_title = "feat: updated the prompt to provide solution" diff_text = get_diff_text(pr_diff, "") @@ -31,7 +31,7 @@ reeval_response=False, ) -topics = clean_keys(review_data.topics, "important") +topics = clean_keys(review_data.topics, "high") review_desc = create_pr_review_text( review_data.issues, code_quality=review_data.code_quality ) @@ -54,3 +54,4 @@ print(desc_data) comit_message = pr_desc.generate_pr_commit_message(desc_data.desc) +print(comit_message) diff --git a/github_app/github_helper/pull_requests.py b/github_app/github_helper/pull_requests.py index f2ecd386..4f0d0be2 100644 --- a/github_app/github_helper/pull_requests.py +++ b/github_app/github_helper/pull_requests.py @@ -19,8 +19,8 @@ confidence_mapping = { "critical": 5, - "important": 4, - "moderate": 3, + "high": 4, + "medium": 3, "low": 2, "trivial": 1, } diff --git a/kaizen/generator/pr_description.py b/kaizen/generator/pr_description.py index 957ec461..d24dc9df 100644 --- a/kaizen/generator/pr_description.py +++ b/kaizen/generator/pr_description.py @@ -181,4 +181,4 @@ def generate_pr_commit_message( DESC=desc, ) resp, usage = self.provider.chat_completion_with_json(prompt, user=user) - return resp, usage + return resp, usage, self.provider.model diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py index 553bdd4e..bdc86e02 100644 --- a/kaizen/llms/provider.py +++ b/kaizen/llms/provider.py @@ -9,7 +9,7 @@ import logging from collections import defaultdict -DEFAULT_MAX_TOKENS = 8000 +DEFAULT_MAX_TOKENS = 4000 def set_all_loggers_to_ERROR(): @@ -36,7 +36,7 @@ def set_all_loggers_to_ERROR(): class LLMProvider: - DEFAULT_MODEL = "gpt-3.5-turbo-1106" + DEFAULT_MODEL = "gpt-4o-mini" DEFAULT_MAX_TOKENS = 4000 DEFAULT_TEMPERATURE = 0 DEFAULT_MODEL_CONFIG = {"model": DEFAULT_MODEL} @@ -233,7 +233,12 @@ def is_inside_token_limit(self, PROMPT: str, percentage: float = 0.8) -> bool: {"role": "user", "content": PROMPT}, ] token_count = litellm.token_counter(model=self.model, messages=messages) - max_tokens = litellm.get_max_tokens(self.model) + if token_count is None: + token_count = litellm.token_counter(model=self.DEFAULT_MODEL, text=PROMPT) + try: + max_tokens = litellm.get_max_tokens(self.model) + except Exception: + max_tokens = DEFAULT_MAX_TOKENS if not max_tokens: max_tokens = DEFAULT_MAX_TOKENS return token_count <= max_tokens * percentage @@ -243,7 +248,10 @@ def available_tokens( ) -> int: if not model: model = self.model - max_tokens = litellm.get_max_tokens(model) + try: + max_tokens = litellm.get_max_tokens(model) + except Exception: + max_tokens = DEFAULT_MAX_TOKENS used_tokens = litellm.token_counter(model=model, text=message) if max_tokens: return int(max_tokens * percentage) - used_tokens From 00c66ef7a5921ac9aac368a56bfd568b719d92db Mon Sep 17 00:00:00 2001 From: Saurav Panda Date: Wed, 18 Sep 2024 00:13:46 -0700 Subject: [PATCH 3/3] Revert "feat: enhance commit message generation" This reverts commit d25ce227088c86a445b9c463d840c2aa171676d4. --- .pre-commit-config.yaml | 6 ++++++ cli/kaizen_cli/cli.py | 3 +-- cli/kaizen_cli/commands/reviewer_commands.py | 20 -------------------- cli/kaizen_cli/config/manager.py | 2 +- cli/kaizen_cli/hooks/prepare-commit-msg | 12 +++--------- cli/kaizen_cli/hooks/setup.py | 5 +---- examples/code_review/main.py | 7 +++---- github_app/github_helper/pull_requests.py | 4 ++-- kaizen/generator/pr_description.py | 2 +- kaizen/llms/provider.py | 16 ++++------------ 10 files changed, 22 insertions(+), 55 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0c56ed05..748c8157 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,3 +4,9 @@ repos: hooks: - id: flake8 args: [--config=.flake8] + + - repo: https://github.com/psf/black + rev: 23.3.0 + hooks: + - id: black + args: [--line-length=88] \ No newline at end of file diff --git a/cli/kaizen_cli/cli.py b/cli/kaizen_cli/cli.py index fff9c185..580850a4 100644 --- a/cli/kaizen_cli/cli.py +++ b/cli/kaizen_cli/cli.py @@ -2,7 +2,7 @@ from .config.manager import load_config from .commands.config_commands import config from .commands.unit_test_commands import unit_test -from .commands.reviewer_commands import reviewer, generate_commit_msg +from .commands.reviewer_commands import reviewer from .hooks.setup import hooks from kaizen.generator.e2e_tests import E2ETestGenerator @@ -25,7 +25,6 @@ def ui_tests(url): cli.add_command(unit_test) cli.add_command(reviewer) cli.add_command(hooks) -cli.add_command(generate_commit_msg) if __name__ == "__main__": cli() diff --git a/cli/kaizen_cli/commands/reviewer_commands.py b/cli/kaizen_cli/commands/reviewer_commands.py index 7e3d44b2..98936bb9 100644 --- a/cli/kaizen_cli/commands/reviewer_commands.py +++ b/cli/kaizen_cli/commands/reviewer_commands.py @@ -1,7 +1,4 @@ import click -from kaizen.generator.pr_description import PRDescriptionGenerator -from kaizen.llms.provider import LLMProvider -from ..config.manager import load_config @click.group() @@ -17,20 +14,3 @@ def work(github_url, branch): """Run reviewer work""" click.echo(f"Reviewing {github_url} on branch {branch}") # Implement the reviewer work logic here - - -@click.command() -@click.argument("diff", type=str, required=True) -def generate_commit_msg(diff): - """Generate a commit message based on the provided diff""" - model_config = load_config()["language_model"]["models"][0]["litellm_params"] - generator = PRDescriptionGenerator(LLMProvider(model_config=model_config)) - desc = generator.generate_pull_request_desc( - diff_text=diff, - pull_request_title="", - pull_request_desc="", - pull_request_files=[], - user="", - ) - msg, _, _ = generator.generate_pr_commit_message(desc) - click.echo(f'{msg["subject"]}\n\n{msg["body"]}') diff --git a/cli/kaizen_cli/config/manager.py b/cli/kaizen_cli/config/manager.py index c5446fb6..8b10390e 100644 --- a/cli/kaizen_cli/config/manager.py +++ b/cli/kaizen_cli/config/manager.py @@ -24,7 +24,7 @@ def load_config(): # Override with environment variables for key, value in os.environ.items(): - if key.startswith("KAIZEN_"): + if key.startswith("MYAPP_"): config_key = key[6:].lower().split("__") try: parsed_value = json.loads(value) diff --git a/cli/kaizen_cli/hooks/prepare-commit-msg b/cli/kaizen_cli/hooks/prepare-commit-msg index 9f25445d..42dbf5c9 100644 --- a/cli/kaizen_cli/hooks/prepare-commit-msg +++ b/cli/kaizen_cli/hooks/prepare-commit-msg @@ -1,14 +1,8 @@ #!/bin/sh # hooks/prepare-commit-msg -# Change to the repository root directory -cd "$(git rev-parse --show-toplevel)" || exit 1 - -# Get the staged changes -staged_diff=$(git diff --cached) - # Run your CLI command and capture the output -commit_info=$(kaizen-cli generate-commit-msg "$staged_diff") +commit_msg=$(kaizen-cli generate-commit-msg) -# Write the commit info to the commit message file -echo "$commit_info" > "$1" \ No newline at end of file +# Overwrite the commit message file with the generated message +echo "$commit_msg" > "$1" \ No newline at end of file diff --git a/cli/kaizen_cli/hooks/setup.py b/cli/kaizen_cli/hooks/setup.py index fd4682a9..107f64c3 100644 --- a/cli/kaizen_cli/hooks/setup.py +++ b/cli/kaizen_cli/hooks/setup.py @@ -15,8 +15,7 @@ def hooks(): @click.argument("hook_type", type=click.Choice(HOOK_TYPES)) def install(hook_type): """Install a specific git hook""" - source = os.path.join(os.path.dirname(__file__), hook_type) - print(source) + source = os.path.join(os.path.dirname(__file__), "hooks", hook_type) destination = os.path.join(".git", "hooks", hook_type) if not os.path.exists(source): @@ -24,8 +23,6 @@ def install(hook_type): return try: - # Create the destination directory if it doesn't exist - os.makedirs(os.path.dirname(destination), exist_ok=True) shutil.copy(source, destination) os.chmod(destination, 0o755) click.echo(f"{hook_type} hook installed successfully") diff --git a/examples/code_review/main.py b/examples/code_review/main.py index 0d0320a9..08fb9c69 100644 --- a/examples/code_review/main.py +++ b/examples/code_review/main.py @@ -12,8 +12,8 @@ logging.basicConfig(level="DEBUG") -pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/559.patch" -pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/559/files" +pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/335.patch" +pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/335/files" pr_title = "feat: updated the prompt to provide solution" diff_text = get_diff_text(pr_diff, "") @@ -31,7 +31,7 @@ reeval_response=False, ) -topics = clean_keys(review_data.topics, "high") +topics = clean_keys(review_data.topics, "important") review_desc = create_pr_review_text( review_data.issues, code_quality=review_data.code_quality ) @@ -54,4 +54,3 @@ print(desc_data) comit_message = pr_desc.generate_pr_commit_message(desc_data.desc) -print(comit_message) diff --git a/github_app/github_helper/pull_requests.py b/github_app/github_helper/pull_requests.py index 4f0d0be2..f2ecd386 100644 --- a/github_app/github_helper/pull_requests.py +++ b/github_app/github_helper/pull_requests.py @@ -19,8 +19,8 @@ confidence_mapping = { "critical": 5, - "high": 4, - "medium": 3, + "important": 4, + "moderate": 3, "low": 2, "trivial": 1, } diff --git a/kaizen/generator/pr_description.py b/kaizen/generator/pr_description.py index d24dc9df..957ec461 100644 --- a/kaizen/generator/pr_description.py +++ b/kaizen/generator/pr_description.py @@ -181,4 +181,4 @@ def generate_pr_commit_message( DESC=desc, ) resp, usage = self.provider.chat_completion_with_json(prompt, user=user) - return resp, usage, self.provider.model + return resp, usage diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py index bdc86e02..553bdd4e 100644 --- a/kaizen/llms/provider.py +++ b/kaizen/llms/provider.py @@ -9,7 +9,7 @@ import logging from collections import defaultdict -DEFAULT_MAX_TOKENS = 4000 +DEFAULT_MAX_TOKENS = 8000 def set_all_loggers_to_ERROR(): @@ -36,7 +36,7 @@ def set_all_loggers_to_ERROR(): class LLMProvider: - DEFAULT_MODEL = "gpt-4o-mini" + DEFAULT_MODEL = "gpt-3.5-turbo-1106" DEFAULT_MAX_TOKENS = 4000 DEFAULT_TEMPERATURE = 0 DEFAULT_MODEL_CONFIG = {"model": DEFAULT_MODEL} @@ -233,12 +233,7 @@ def is_inside_token_limit(self, PROMPT: str, percentage: float = 0.8) -> bool: {"role": "user", "content": PROMPT}, ] token_count = litellm.token_counter(model=self.model, messages=messages) - if token_count is None: - token_count = litellm.token_counter(model=self.DEFAULT_MODEL, text=PROMPT) - try: - max_tokens = litellm.get_max_tokens(self.model) - except Exception: - max_tokens = DEFAULT_MAX_TOKENS + max_tokens = litellm.get_max_tokens(self.model) if not max_tokens: max_tokens = DEFAULT_MAX_TOKENS return token_count <= max_tokens * percentage @@ -248,10 +243,7 @@ def available_tokens( ) -> int: if not model: model = self.model - try: - max_tokens = litellm.get_max_tokens(model) - except Exception: - max_tokens = DEFAULT_MAX_TOKENS + max_tokens = litellm.get_max_tokens(model) used_tokens = litellm.token_counter(model=model, text=message) if max_tokens: return int(max_tokens * percentage) - used_tokens