diff --git a/recipes/use_cases/README.md b/recipes/use_cases/README.md index 45d08ab8f..0eb10f0eb 100644 --- a/recipes/use_cases/README.md +++ b/recipes/use_cases/README.md @@ -21,3 +21,6 @@ A complete example of how to build a Llama 3 chatbot hosted on your browser that ## [Sales Bot](./customerservice_chatbots/ai_agent_chatbot/SalesBot.ipynb): Sales Bot with Llama3 - A Summarization and RAG Use Case An summarization + RAG use case built around the Amazon product review Kaggle dataset to build a helpful Music Store Sales Bot. The summarization and RAG are built on top of Llama models hosted on OctoAI, and the vector database is hosted on Weaviate Cloud Services. + +## [Llamas in Code Review](./coding/llamas-in-code-review/README.md): Two agents writing and reviewing code in a loop +This demo app shows an example of two agents one that writes code and one that reviews it. They go back and forth for multiple rounds and the code is improved over time. It uses [llama-stack](https://github.com/meta-llama/llama-stack) on the backend. diff --git a/recipes/use_cases/coding/llamas-in-code-review/.gitignore b/recipes/use_cases/coding/llamas-in-code-review/.gitignore new file mode 100644 index 000000000..76507387b --- /dev/null +++ b/recipes/use_cases/coding/llamas-in-code-review/.gitignore @@ -0,0 +1 @@ +sandbox/ \ No newline at end of file diff --git a/recipes/use_cases/coding/llamas-in-code-review/README.md b/recipes/use_cases/coding/llamas-in-code-review/README.md new file mode 100644 index 000000000..eab5a7ad0 --- /dev/null +++ b/recipes/use_cases/coding/llamas-in-code-review/README.md @@ -0,0 +1,75 @@ +## Llamas in Code Review + + + +In this example, we have two agents: + +- **Code Author:** Writes the code. +- **Code Reviewer:** Reviews the code and provides constructive feedback. + +Together, they'll engage in multiple iterations, and over time improve the code. + +This demo demonstrates tool calls, structured outputs and looping with llama. + +## Setup + +### Prerequisites + +- Python 3.10+ +- Docker + +### Running the demo + +We'll be using the fireworks llama-stack distribution to run this example - but you can use most other llama-stack distributions (instructions [here](https://llama-stack.readthedocs.io/en/latest/distributions/index.html)). +(Though note that not all distributions support structured outputs yet e.g., Ollama). + +```bash +# You can get this from https://fireworks.ai/account/api-keys - they give out initial free credits +export FIREWORKS_API_KEY= + +# This runs the llama-stack server +export LLAMA_STACK_PORT=5000 +docker run -it \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + llamastack/distribution-fireworks \ + --port $LLAMA_STACK_PORT \ + --env FIREWORKS_API_KEY=$FIREWORKS_API_KEY +``` + +Then to run the app: + +```bash +# cd to this directory +cd recipes/use_cases/coding/llamas-in-code-review + +# Create a virtual environment +# Use your preferred method to create a virtual environment +python -m venv .venv +source .venv/bin/activate + +# Install llama-stack-client +pip install llama-stack-client + +# Run the demo +export LLAMA_STACK_PORT=5000 +python app.py +``` + +The agents will then start writing code in the ./sandbox directory. + +### Configuration + +You can customize the application's behavior by adjusting parameters in `app.py`: + +```python +# The aim of the program +PROGRAM_OBJECTIVE="a web server that has an API endpoint that translates text from English to French." + +# Number of code review cycles +CODE_REVIEW_CYCLES = 5 + +# The model to use +# 3.1 405B works the best, 3.3 70B works really well too, smaller models are a bit hit and miss. +MODEL_ID = "meta-llama/Llama-3.3-70B-Instruct" +``` \ No newline at end of file diff --git a/recipes/use_cases/coding/llamas-in-code-review/app.py b/recipes/use_cases/coding/llamas-in-code-review/app.py new file mode 100644 index 000000000..6c69176cf --- /dev/null +++ b/recipes/use_cases/coding/llamas-in-code-review/app.py @@ -0,0 +1,193 @@ +import os +from llama_stack_client import LlamaStackClient +from tools import SANDBOX_DIR, TOOLS, run_tool +import json + +PROGRAM_OBJECTIVE="a web server that has an API endpoint that translates text from English to French." + +# Number of code review cycles +CODE_REVIEW_CYCLES = 5 + +# Works: +MODEL_ID = "meta-llama/Llama-3.3-70B-Instruct" +# MODEL_ID = "meta-llama/Llama-3.1-405B-Instruct-FP8" +# MODEL_ID = "meta-llama/Llama-3.1-8B-Instruct" # Works okay + +# Note: Smaller models don't work very well in this example. But feel free to try them out. +# MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct" +# MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct" + +CODER_AGENT_SYSTEM_PROMPT=f""" +You are a software engineer who is writing code to build a python codebase: {PROGRAM_OBJECTIVE}. +""" + +REVIEWER_AGENT_SYSTEM_PROMPT=f""" +You are a senior software engineer who is reviewing the codebase that was created by another software engineer. +The program is {PROGRAM_OBJECTIVE}. +If you think the codebase is good enough to ship, please say LGTM. +""" + +# No limit on output tokens +MAX_TOKENS = 200_000 + +def get_codebase_contents(): + contents = "" + for root, dirs, files in os.walk(SANDBOX_DIR): + for file in files: + # concatenate the file name + contents += f"file: {file}:\n" + with open(os.path.join(root, file), "r") as f: + contents += f.read() + contents += "\n\n" + return contents + + +BLUE = "\033[94m" +MAGENTA = "\033[95m" +GREEN = "\033[92m" +RESET = "\033[0m" + +if "3.2" in MODEL_ID or "3.3" in MODEL_ID: + tool_prompt_format = "python_list" +else: + tool_prompt_format = "json" + +client = LlamaStackClient(base_url=f"http://localhost:{os.environ['LLAMA_STACK_PORT']}") + +review_feedback = None +for i in range(1, CODE_REVIEW_CYCLES + 1): + print(f"{BLUE}Coder Agent - Creating Plan - Iteration {i}{RESET}") + if review_feedback: + prompt_feedback = f""" + One of your peers has provided the following feedback: + {review_feedback} + Please adjust the plan to address the feedback. + + """ + else: + prompt_feedback = "" + + prompt =f""" + Create a step by step plan to complete the task of creating a codebase that will {PROGRAM_OBJECTIVE}. + You have 3 different operations you can perform. You can create a file, update a file, or delete a file. + Limit your step by step plan to only these operations per step. + Don't create more than 10 steps. + + Here is the codebase currently: + {get_codebase_contents()} + + {prompt_feedback} + Please ensure there's a README.md file in the root of the codebase that describes the codebase and how to run it. + Please ensure there's a requirements.txt file in the root of the codebase that describes the dependencies of the codebase. + """ + response = client.inference.chat_completion( + model_id=MODEL_ID, + messages=[ + {"role": "system", "content": CODER_AGENT_SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + sampling_params={ + "max_tokens": MAX_TOKENS, + }, + response_format={ + "type": "json_schema", + "json_schema": { + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Plan", + "description": f"A plan to complete the task of creating a codebase that will {PROGRAM_OBJECTIVE}.", + "type": "object", + "properties": { + "steps": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["steps"], + "additionalProperties": False, + } + }, + stream=True, + ) + + content = "" + for chunk in response: + if chunk.event.delta: + print(chunk.event.delta, end="", flush=True) + content += chunk.event.delta + try: + plan = json.loads(content) + except Exception as e: + print(f"Error parsing plan into JSON: {e}") + plan = {"steps": []} + print("\n") + + # Coding agent executes the plan + print(f"{BLUE}Coder Agent - Executing Plan - Iteration {i}{RESET}") + if review_feedback: + prompt_feedback = f""" + Keep in mind one a senior engineer has provided the following feedback: + {review_feedback} + + """ + else: + prompt_feedback = "" + + for step in plan["steps"]: + prompt = f""" + You have 3 different operations you can perform. create_file(path, content), update_file(path, content), delete_file(path). + Here is the codebase: + {get_codebase_contents()} + Please perform the following operation: {step} + + {prompt_feedback} + Please don't create incomplete files. + """ + try: + response = client.inference.chat_completion( + model_id=MODEL_ID, + messages=[ + {"role": "system", "content": CODER_AGENT_SYSTEM_PROMPT}, + {"role": "user", "content": prompt}, + ], + sampling_params={ + "max_tokens": MAX_TOKENS, + }, + tools=TOOLS, + tool_prompt_format=tool_prompt_format, + ) + except Exception as e: + print(f"Error running tool - skipping: {e.message[:50] + '...'}") + continue + message = response.completion_message + if message.content: + print("Didn't get tool call - got message: ", message.content[:50] + "...") + else: + tool_call = message.tool_calls[0] + run_tool(tool_call) + print("\n") + + print(f"{MAGENTA}Reviewer Agent - Reviewing Codebase - Iteration {i}{RESET}") + response = client.inference.chat_completion( + model_id=MODEL_ID, + messages=[ + {"role": "system", "content": REVIEWER_AGENT_SYSTEM_PROMPT}, + {"role": "user", "content": f""" + Here is the full codebase: + {get_codebase_contents()} + Please review the codebase and make sure it is correct. + Please provide a list of changes you would like to make to the codebase. + """}, + ], + sampling_params={ + "max_tokens": MAX_TOKENS, + }, + stream=True, + ) + review_feedback = "" + for chunk in response: + if chunk.event.delta: + print(chunk.event.delta, end="", flush=True) + review_feedback += chunk.event.delta + print("\n") diff --git a/recipes/use_cases/coding/llamas-in-code-review/tools.py b/recipes/use_cases/coding/llamas-in-code-review/tools.py new file mode 100644 index 000000000..4936a6e5f --- /dev/null +++ b/recipes/use_cases/coding/llamas-in-code-review/tools.py @@ -0,0 +1,143 @@ +import os + +SANDBOX_DIR = os.path.join(os.getcwd(), "sandbox") + +TOOLS = [ + { + "tool_name": "create_file", + "description": "Create a file with the given name and content. If there are any directories that don't exist, create them.", + "parameters": { + "path": { + "param_type": "string", + "description": "The relative path to the file to create", + "required": True, + }, + "content": { + "param_type": "string", + "description": "The content of the file to create", + "required": True, + }, + }, + }, + { + "tool_name": "update_file", + "description": "Update a file with the given name and content. If the file does not exist, create it.", + "parameters": { + "path": { + "param_type": "string", + "description": "The relative path to the file to update", + "required": True, + }, + "content": { + "param_type": "string", + "description": "The content of the file to update", + "required": True, + }, + }, + }, + { + "tool_name": "delete_file", + "description": "Delete a file with the given path. If the file does not exist, do nothing.", + "parameters": { + "path": { + "param_type": "string", + "description": "The relative path to the file to delete", + "required": True, + }, + }, + }, +] + + +def upsert_file(path, content, operation): + if not path: + print(f"{operation}: couldn't parse path={path}") + return + + # Ensure path doesn't try to escape sandbox directory + normalized_path = os.path.normpath(path) + if normalized_path.startswith('..') or normalized_path.startswith('/'): + print(f"{operation}: Path {path} attempts to escape sandbox directory. Skipping.") + return + + # Hack because llama sometimes escapes newlines + content = content.replace("\\n", "\n") + + # Create any directories that don't exist + try: + os.makedirs(os.path.dirname(os.path.join(SANDBOX_DIR, path)), exist_ok=True) + except Exception as e: + print(f"{operation}: error creating parent directories: {path}. Skipping.") + return + + # Write to file + try: + with open(os.path.join(SANDBOX_DIR, path), "w") as f: + f.write(content) + print(f"{operation}: {os.path.join(SANDBOX_DIR, path)}") + except Exception as e: + print(f"{operation}: error writing to file: {path}. Skipping.") + return + + +def create_file(path, content): + upsert_file(path, content, "create_file") + +def update_file(path, content): + upsert_file(path, content, "update_file") + +def delete_file(path): + # Ensure path doesn't try to escape sandbox directory + normalized_path = os.path.normpath(path) + if normalized_path.startswith('..') or normalized_path.startswith('/'): + print(f"delete_file: Path {path} attempts to escape sandbox directory. Skipping.") + return + + # If the file doesn't exist, don't do anything + if not os.path.exists(os.path.join(SANDBOX_DIR, path)): + print( + f"Tried to delete file {os.path.join(SANDBOX_DIR, path)} but it does not exist" + ) + return + + try: + os.remove(os.path.join(SANDBOX_DIR, path)) + print(f"Deleted file {os.path.join(SANDBOX_DIR, path)}") + except Exception as e: + print(f"delete_file, error deleting file: {path}. Skipping.") + return + + +def run_tool(tool_call): + arguments = tool_call.arguments + if tool_call.tool_name == "create_file": + if "path" not in arguments or "content" not in arguments: + print(f"create_file, couldn't parse arguments: {arguments}") + return + create_file(arguments["path"], arguments["content"]) + elif tool_call.tool_name == "update_file": + # Does the same thing as create_file - but nice to have a separate function for updating files + # So the LLM has the option to update files if it wants to - if that makes more sense than creating a new file + if "path" not in arguments or "content" not in arguments: + print(f"update_file, couldn't parse arguments: {arguments}") + return + update_file(arguments["path"], arguments["content"]) + elif tool_call.tool_name == "delete_file": + if "path" not in arguments: + print(f"delete_file: couldn't parse path={arguments['path']}") + return + delete_file(arguments["path"]) + + +if os.path.exists(SANDBOX_DIR): + # Clear the contents of the directory + for item in os.listdir(SANDBOX_DIR): + item_path = os.path.join(SANDBOX_DIR, item) + if os.path.isfile(item_path): + os.unlink(item_path) + elif os.path.isdir(item_path): + import shutil + + shutil.rmtree(item_path) +else: + os.makedirs(SANDBOX_DIR)