From 655d98e487e9b184c88a5a41d78653d1ea89ee33 Mon Sep 17 00:00:00 2001
From: Saurav Panda <sgp65@cornell.edu>
Date: Tue, 17 Sep 2024 11:31:22 -0700
Subject: [PATCH 1/3] code_review demo

---
 examples/demo/main.py | 67 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 67 insertions(+)
 create mode 100644 examples/demo/main.py

diff --git a/examples/demo/main.py b/examples/demo/main.py
new file mode 100644
index 00000000..dd466e53
--- /dev/null
+++ b/examples/demo/main.py
@@ -0,0 +1,67 @@
+import random
+import time
+
+# Global variable
+GLOBAL_COUNTER = 0
+
+
+def process_data(data):
+    global GLOBAL_COUNTER
+    result = []
+    for i in range(len(data)):
+        item = data[i]
+        if item % 2 == 0:
+            result.append(item * 2)
+        else:
+            result.append(item * 3)
+        GLOBAL_COUNTER += 1
+    return result
+
+
+def fetch_user_data(user_id):
+    # Simulating a database query
+    time.sleep(2)
+    return {"id": user_id, "name": f"User{user_id}", "score": random.randint(1, 100)}
+
+
+def calculate_average(numbers):
+    total = 0
+    count = 0
+    for num in numbers:
+        total += num
+        count += 1
+    return total / count if count > 0 else 0
+
+
+def write_to_file(filename, content):
+    f = open(filename, "w")
+    f.write(content)
+    f.close()
+
+
+def main():
+    data = [1, 2, 3, 4, 5]
+    processed_data = process_data(data)
+    print("Processed data:", processed_data)
+
+    user_data = fetch_user_data(123)
+    print("User data:", user_data)
+
+    numbers = [10, 20, 30, 40, 50]
+    avg = calculate_average(numbers)
+    print("Average:", avg)
+
+    write_to_file("output.txt", "Hello, World!")
+
+    unused_var = 42
+    print("Unused variable:", unused_var)
+
+    db_password = "password123"
+    print("DB Password:", db_password)
+
+    squared_numbers = [num * num for num in range(100)]
+    print("Squared numbers:", squared_numbers)
+
+
+if __name__ == "__main__":
+    main()

From d25ce227088c86a445b9c463d840c2aa171676d4 Mon Sep 17 00:00:00 2001
From: Saurav Panda <sgp65@cornell.edu>
Date: Tue, 17 Sep 2024 22:36:47 -0700
Subject: [PATCH 2/3] feat: enhance commit message generation

Introduce a command for generating commit messages based on diffs
and improve configuration management.

- Added `generate_commit_msg` command for staged changes.
- Updated config to use `KAIZEN_` prefix instead of `MYAPP_`.
- Removed `black` pre-commit hook from configuration.
- Adjusted token limits and model defaults in LLMProvider.
- Enhanced installation of git hooks for destination directories.

These changes streamline the commit process and improve the
usability of the CLI tool.
---
 .pre-commit-config.yaml                      |  6 ------
 cli/kaizen_cli/cli.py                        |  3 ++-
 cli/kaizen_cli/commands/reviewer_commands.py | 20 ++++++++++++++++++++
 cli/kaizen_cli/config/manager.py             |  2 +-
 cli/kaizen_cli/hooks/prepare-commit-msg      | 12 +++++++++---
 cli/kaizen_cli/hooks/setup.py                |  5 ++++-
 examples/code_review/main.py                 |  7 ++++---
 github_app/github_helper/pull_requests.py    |  4 ++--
 kaizen/generator/pr_description.py           |  2 +-
 kaizen/llms/provider.py                      | 16 ++++++++++++----
 10 files changed, 55 insertions(+), 22 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 748c8157..0c56ed05 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,9 +4,3 @@ repos:
     hooks:
       - id: flake8
         args: [--config=.flake8]
-
-  - repo: https://github.com/psf/black
-    rev: 23.3.0
-    hooks:
-      - id: black
-        args: [--line-length=88]
\ No newline at end of file
diff --git a/cli/kaizen_cli/cli.py b/cli/kaizen_cli/cli.py
index 580850a4..fff9c185 100644
--- a/cli/kaizen_cli/cli.py
+++ b/cli/kaizen_cli/cli.py
@@ -2,7 +2,7 @@
 from .config.manager import load_config
 from .commands.config_commands import config
 from .commands.unit_test_commands import unit_test
-from .commands.reviewer_commands import reviewer
+from .commands.reviewer_commands import reviewer, generate_commit_msg
 from .hooks.setup import hooks
 from kaizen.generator.e2e_tests import E2ETestGenerator
 
@@ -25,6 +25,7 @@ def ui_tests(url):
 cli.add_command(unit_test)
 cli.add_command(reviewer)
 cli.add_command(hooks)
+cli.add_command(generate_commit_msg)
 
 if __name__ == "__main__":
     cli()
diff --git a/cli/kaizen_cli/commands/reviewer_commands.py b/cli/kaizen_cli/commands/reviewer_commands.py
index 98936bb9..7e3d44b2 100644
--- a/cli/kaizen_cli/commands/reviewer_commands.py
+++ b/cli/kaizen_cli/commands/reviewer_commands.py
@@ -1,4 +1,7 @@
 import click
+from kaizen.generator.pr_description import PRDescriptionGenerator
+from kaizen.llms.provider import LLMProvider
+from ..config.manager import load_config
 
 
 @click.group()
@@ -14,3 +17,20 @@ def work(github_url, branch):
     """Run reviewer work"""
     click.echo(f"Reviewing {github_url} on branch {branch}")
     # Implement the reviewer work logic here
+
+
+@click.command()
+@click.argument("diff", type=str, required=True)
+def generate_commit_msg(diff):
+    """Generate a commit message based on the provided diff"""
+    model_config = load_config()["language_model"]["models"][0]["litellm_params"]
+    generator = PRDescriptionGenerator(LLMProvider(model_config=model_config))
+    desc = generator.generate_pull_request_desc(
+        diff_text=diff,
+        pull_request_title="",
+        pull_request_desc="",
+        pull_request_files=[],
+        user="",
+    )
+    msg, _, _ = generator.generate_pr_commit_message(desc)
+    click.echo(f'{msg["subject"]}\n\n{msg["body"]}')
diff --git a/cli/kaizen_cli/config/manager.py b/cli/kaizen_cli/config/manager.py
index 8b10390e..c5446fb6 100644
--- a/cli/kaizen_cli/config/manager.py
+++ b/cli/kaizen_cli/config/manager.py
@@ -24,7 +24,7 @@ def load_config():
 
     # Override with environment variables
     for key, value in os.environ.items():
-        if key.startswith("MYAPP_"):
+        if key.startswith("KAIZEN_"):
             config_key = key[6:].lower().split("__")
             try:
                 parsed_value = json.loads(value)
diff --git a/cli/kaizen_cli/hooks/prepare-commit-msg b/cli/kaizen_cli/hooks/prepare-commit-msg
index 42dbf5c9..9f25445d 100644
--- a/cli/kaizen_cli/hooks/prepare-commit-msg
+++ b/cli/kaizen_cli/hooks/prepare-commit-msg
@@ -1,8 +1,14 @@
 #!/bin/sh
 # hooks/prepare-commit-msg
 
+# Change to the repository root directory
+cd "$(git rev-parse --show-toplevel)" || exit 1
+
+# Get the staged changes
+staged_diff=$(git diff --cached)
+
 # Run your CLI command and capture the output
-commit_msg=$(kaizen-cli generate-commit-msg)
+commit_info=$(kaizen-cli generate-commit-msg "$staged_diff")
 
-# Overwrite the commit message file with the generated message
-echo "$commit_msg" > "$1"
\ No newline at end of file
+# Write the commit info to the commit message file
+echo "$commit_info" > "$1"
\ No newline at end of file
diff --git a/cli/kaizen_cli/hooks/setup.py b/cli/kaizen_cli/hooks/setup.py
index 107f64c3..fd4682a9 100644
--- a/cli/kaizen_cli/hooks/setup.py
+++ b/cli/kaizen_cli/hooks/setup.py
@@ -15,7 +15,8 @@ def hooks():
 @click.argument("hook_type", type=click.Choice(HOOK_TYPES))
 def install(hook_type):
     """Install a specific git hook"""
-    source = os.path.join(os.path.dirname(__file__), "hooks", hook_type)
+    source = os.path.join(os.path.dirname(__file__), hook_type)
+    print(source)
     destination = os.path.join(".git", "hooks", hook_type)
 
     if not os.path.exists(source):
@@ -23,6 +24,8 @@ def install(hook_type):
         return
 
     try:
+        # Create the destination directory if it doesn't exist
+        os.makedirs(os.path.dirname(destination), exist_ok=True)
         shutil.copy(source, destination)
         os.chmod(destination, 0o755)
         click.echo(f"{hook_type} hook installed successfully")
diff --git a/examples/code_review/main.py b/examples/code_review/main.py
index 08fb9c69..0d0320a9 100644
--- a/examples/code_review/main.py
+++ b/examples/code_review/main.py
@@ -12,8 +12,8 @@
 
 logging.basicConfig(level="DEBUG")
 
-pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/335.patch"
-pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/335/files"
+pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/559.patch"
+pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/559/files"
 pr_title = "feat: updated the prompt to provide solution"
 
 diff_text = get_diff_text(pr_diff, "")
@@ -31,7 +31,7 @@
     reeval_response=False,
 )
 
-topics = clean_keys(review_data.topics, "important")
+topics = clean_keys(review_data.topics, "high")
 review_desc = create_pr_review_text(
     review_data.issues, code_quality=review_data.code_quality
 )
@@ -54,3 +54,4 @@
 print(desc_data)
 
 comit_message = pr_desc.generate_pr_commit_message(desc_data.desc)
+print(comit_message)
diff --git a/github_app/github_helper/pull_requests.py b/github_app/github_helper/pull_requests.py
index f2ecd386..4f0d0be2 100644
--- a/github_app/github_helper/pull_requests.py
+++ b/github_app/github_helper/pull_requests.py
@@ -19,8 +19,8 @@
 
 confidence_mapping = {
     "critical": 5,
-    "important": 4,
-    "moderate": 3,
+    "high": 4,
+    "medium": 3,
     "low": 2,
     "trivial": 1,
 }
diff --git a/kaizen/generator/pr_description.py b/kaizen/generator/pr_description.py
index 957ec461..d24dc9df 100644
--- a/kaizen/generator/pr_description.py
+++ b/kaizen/generator/pr_description.py
@@ -181,4 +181,4 @@ def generate_pr_commit_message(
             DESC=desc,
         )
         resp, usage = self.provider.chat_completion_with_json(prompt, user=user)
-        return resp, usage
+        return resp, usage, self.provider.model
diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py
index 553bdd4e..bdc86e02 100644
--- a/kaizen/llms/provider.py
+++ b/kaizen/llms/provider.py
@@ -9,7 +9,7 @@
 import logging
 from collections import defaultdict
 
-DEFAULT_MAX_TOKENS = 8000
+DEFAULT_MAX_TOKENS = 4000
 
 
 def set_all_loggers_to_ERROR():
@@ -36,7 +36,7 @@ def set_all_loggers_to_ERROR():
 
 
 class LLMProvider:
-    DEFAULT_MODEL = "gpt-3.5-turbo-1106"
+    DEFAULT_MODEL = "gpt-4o-mini"
     DEFAULT_MAX_TOKENS = 4000
     DEFAULT_TEMPERATURE = 0
     DEFAULT_MODEL_CONFIG = {"model": DEFAULT_MODEL}
@@ -233,7 +233,12 @@ def is_inside_token_limit(self, PROMPT: str, percentage: float = 0.8) -> bool:
             {"role": "user", "content": PROMPT},
         ]
         token_count = litellm.token_counter(model=self.model, messages=messages)
-        max_tokens = litellm.get_max_tokens(self.model)
+        if token_count is None:
+            token_count = litellm.token_counter(model=self.DEFAULT_MODEL, text=PROMPT)
+        try:
+            max_tokens = litellm.get_max_tokens(self.model)
+        except Exception:
+            max_tokens = DEFAULT_MAX_TOKENS
         if not max_tokens:
             max_tokens = DEFAULT_MAX_TOKENS
         return token_count <= max_tokens * percentage
@@ -243,7 +248,10 @@ def available_tokens(
     ) -> int:
         if not model:
             model = self.model
-        max_tokens = litellm.get_max_tokens(model)
+        try:
+            max_tokens = litellm.get_max_tokens(model)
+        except Exception:
+            max_tokens = DEFAULT_MAX_TOKENS
         used_tokens = litellm.token_counter(model=model, text=message)
         if max_tokens:
             return int(max_tokens * percentage) - used_tokens

From 00c66ef7a5921ac9aac368a56bfd568b719d92db Mon Sep 17 00:00:00 2001
From: Saurav Panda <sgp65@cornell.edu>
Date: Wed, 18 Sep 2024 00:13:46 -0700
Subject: [PATCH 3/3] Revert "feat: enhance commit message generation"

This reverts commit d25ce227088c86a445b9c463d840c2aa171676d4.
---
 .pre-commit-config.yaml                      |  6 ++++++
 cli/kaizen_cli/cli.py                        |  3 +--
 cli/kaizen_cli/commands/reviewer_commands.py | 20 --------------------
 cli/kaizen_cli/config/manager.py             |  2 +-
 cli/kaizen_cli/hooks/prepare-commit-msg      | 12 +++---------
 cli/kaizen_cli/hooks/setup.py                |  5 +----
 examples/code_review/main.py                 |  7 +++----
 github_app/github_helper/pull_requests.py    |  4 ++--
 kaizen/generator/pr_description.py           |  2 +-
 kaizen/llms/provider.py                      | 16 ++++------------
 10 files changed, 22 insertions(+), 55 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 0c56ed05..748c8157 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,3 +4,9 @@ repos:
     hooks:
       - id: flake8
         args: [--config=.flake8]
+
+  - repo: https://github.com/psf/black
+    rev: 23.3.0
+    hooks:
+      - id: black
+        args: [--line-length=88]
\ No newline at end of file
diff --git a/cli/kaizen_cli/cli.py b/cli/kaizen_cli/cli.py
index fff9c185..580850a4 100644
--- a/cli/kaizen_cli/cli.py
+++ b/cli/kaizen_cli/cli.py
@@ -2,7 +2,7 @@
 from .config.manager import load_config
 from .commands.config_commands import config
 from .commands.unit_test_commands import unit_test
-from .commands.reviewer_commands import reviewer, generate_commit_msg
+from .commands.reviewer_commands import reviewer
 from .hooks.setup import hooks
 from kaizen.generator.e2e_tests import E2ETestGenerator
 
@@ -25,7 +25,6 @@ def ui_tests(url):
 cli.add_command(unit_test)
 cli.add_command(reviewer)
 cli.add_command(hooks)
-cli.add_command(generate_commit_msg)
 
 if __name__ == "__main__":
     cli()
diff --git a/cli/kaizen_cli/commands/reviewer_commands.py b/cli/kaizen_cli/commands/reviewer_commands.py
index 7e3d44b2..98936bb9 100644
--- a/cli/kaizen_cli/commands/reviewer_commands.py
+++ b/cli/kaizen_cli/commands/reviewer_commands.py
@@ -1,7 +1,4 @@
 import click
-from kaizen.generator.pr_description import PRDescriptionGenerator
-from kaizen.llms.provider import LLMProvider
-from ..config.manager import load_config
 
 
 @click.group()
@@ -17,20 +14,3 @@ def work(github_url, branch):
     """Run reviewer work"""
     click.echo(f"Reviewing {github_url} on branch {branch}")
     # Implement the reviewer work logic here
-
-
-@click.command()
-@click.argument("diff", type=str, required=True)
-def generate_commit_msg(diff):
-    """Generate a commit message based on the provided diff"""
-    model_config = load_config()["language_model"]["models"][0]["litellm_params"]
-    generator = PRDescriptionGenerator(LLMProvider(model_config=model_config))
-    desc = generator.generate_pull_request_desc(
-        diff_text=diff,
-        pull_request_title="",
-        pull_request_desc="",
-        pull_request_files=[],
-        user="",
-    )
-    msg, _, _ = generator.generate_pr_commit_message(desc)
-    click.echo(f'{msg["subject"]}\n\n{msg["body"]}')
diff --git a/cli/kaizen_cli/config/manager.py b/cli/kaizen_cli/config/manager.py
index c5446fb6..8b10390e 100644
--- a/cli/kaizen_cli/config/manager.py
+++ b/cli/kaizen_cli/config/manager.py
@@ -24,7 +24,7 @@ def load_config():
 
     # Override with environment variables
     for key, value in os.environ.items():
-        if key.startswith("KAIZEN_"):
+        if key.startswith("MYAPP_"):
             config_key = key[6:].lower().split("__")
             try:
                 parsed_value = json.loads(value)
diff --git a/cli/kaizen_cli/hooks/prepare-commit-msg b/cli/kaizen_cli/hooks/prepare-commit-msg
index 9f25445d..42dbf5c9 100644
--- a/cli/kaizen_cli/hooks/prepare-commit-msg
+++ b/cli/kaizen_cli/hooks/prepare-commit-msg
@@ -1,14 +1,8 @@
 #!/bin/sh
 # hooks/prepare-commit-msg
 
-# Change to the repository root directory
-cd "$(git rev-parse --show-toplevel)" || exit 1
-
-# Get the staged changes
-staged_diff=$(git diff --cached)
-
 # Run your CLI command and capture the output
-commit_info=$(kaizen-cli generate-commit-msg "$staged_diff")
+commit_msg=$(kaizen-cli generate-commit-msg)
 
-# Write the commit info to the commit message file
-echo "$commit_info" > "$1"
\ No newline at end of file
+# Overwrite the commit message file with the generated message
+echo "$commit_msg" > "$1"
\ No newline at end of file
diff --git a/cli/kaizen_cli/hooks/setup.py b/cli/kaizen_cli/hooks/setup.py
index fd4682a9..107f64c3 100644
--- a/cli/kaizen_cli/hooks/setup.py
+++ b/cli/kaizen_cli/hooks/setup.py
@@ -15,8 +15,7 @@ def hooks():
 @click.argument("hook_type", type=click.Choice(HOOK_TYPES))
 def install(hook_type):
     """Install a specific git hook"""
-    source = os.path.join(os.path.dirname(__file__), hook_type)
-    print(source)
+    source = os.path.join(os.path.dirname(__file__), "hooks", hook_type)
     destination = os.path.join(".git", "hooks", hook_type)
 
     if not os.path.exists(source):
@@ -24,8 +23,6 @@ def install(hook_type):
         return
 
     try:
-        # Create the destination directory if it doesn't exist
-        os.makedirs(os.path.dirname(destination), exist_ok=True)
         shutil.copy(source, destination)
         os.chmod(destination, 0o755)
         click.echo(f"{hook_type} hook installed successfully")
diff --git a/examples/code_review/main.py b/examples/code_review/main.py
index 0d0320a9..08fb9c69 100644
--- a/examples/code_review/main.py
+++ b/examples/code_review/main.py
@@ -12,8 +12,8 @@
 
 logging.basicConfig(level="DEBUG")
 
-pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/559.patch"
-pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/559/files"
+pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/335.patch"
+pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/335/files"
 pr_title = "feat: updated the prompt to provide solution"
 
 diff_text = get_diff_text(pr_diff, "")
@@ -31,7 +31,7 @@
     reeval_response=False,
 )
 
-topics = clean_keys(review_data.topics, "high")
+topics = clean_keys(review_data.topics, "important")
 review_desc = create_pr_review_text(
     review_data.issues, code_quality=review_data.code_quality
 )
@@ -54,4 +54,3 @@
 print(desc_data)
 
 comit_message = pr_desc.generate_pr_commit_message(desc_data.desc)
-print(comit_message)
diff --git a/github_app/github_helper/pull_requests.py b/github_app/github_helper/pull_requests.py
index 4f0d0be2..f2ecd386 100644
--- a/github_app/github_helper/pull_requests.py
+++ b/github_app/github_helper/pull_requests.py
@@ -19,8 +19,8 @@
 
 confidence_mapping = {
     "critical": 5,
-    "high": 4,
-    "medium": 3,
+    "important": 4,
+    "moderate": 3,
     "low": 2,
     "trivial": 1,
 }
diff --git a/kaizen/generator/pr_description.py b/kaizen/generator/pr_description.py
index d24dc9df..957ec461 100644
--- a/kaizen/generator/pr_description.py
+++ b/kaizen/generator/pr_description.py
@@ -181,4 +181,4 @@ def generate_pr_commit_message(
             DESC=desc,
         )
         resp, usage = self.provider.chat_completion_with_json(prompt, user=user)
-        return resp, usage, self.provider.model
+        return resp, usage
diff --git a/kaizen/llms/provider.py b/kaizen/llms/provider.py
index bdc86e02..553bdd4e 100644
--- a/kaizen/llms/provider.py
+++ b/kaizen/llms/provider.py
@@ -9,7 +9,7 @@
 import logging
 from collections import defaultdict
 
-DEFAULT_MAX_TOKENS = 4000
+DEFAULT_MAX_TOKENS = 8000
 
 
 def set_all_loggers_to_ERROR():
@@ -36,7 +36,7 @@ def set_all_loggers_to_ERROR():
 
 
 class LLMProvider:
-    DEFAULT_MODEL = "gpt-4o-mini"
+    DEFAULT_MODEL = "gpt-3.5-turbo-1106"
     DEFAULT_MAX_TOKENS = 4000
     DEFAULT_TEMPERATURE = 0
     DEFAULT_MODEL_CONFIG = {"model": DEFAULT_MODEL}
@@ -233,12 +233,7 @@ def is_inside_token_limit(self, PROMPT: str, percentage: float = 0.8) -> bool:
             {"role": "user", "content": PROMPT},
         ]
         token_count = litellm.token_counter(model=self.model, messages=messages)
-        if token_count is None:
-            token_count = litellm.token_counter(model=self.DEFAULT_MODEL, text=PROMPT)
-        try:
-            max_tokens = litellm.get_max_tokens(self.model)
-        except Exception:
-            max_tokens = DEFAULT_MAX_TOKENS
+        max_tokens = litellm.get_max_tokens(self.model)
         if not max_tokens:
             max_tokens = DEFAULT_MAX_TOKENS
         return token_count <= max_tokens * percentage
@@ -248,10 +243,7 @@ def available_tokens(
     ) -> int:
         if not model:
             model = self.model
-        try:
-            max_tokens = litellm.get_max_tokens(model)
-        except Exception:
-            max_tokens = DEFAULT_MAX_TOKENS
+        max_tokens = litellm.get_max_tokens(model)
         used_tokens = litellm.token_counter(model=model, text=message)
         if max_tokens:
             return int(max_tokens * percentage) - used_tokens