diff --git a/.gitignore b/.gitignore index e7a1d6e..cc38ad9 100644 --- a/.gitignore +++ b/.gitignore @@ -15,6 +15,8 @@ venv/ .env *.pyc *.db +*.db-shm +*.db-wal config.json poetry.lock .DS_Store diff --git a/core/agents/architect.py b/core/agents/architect.py index 6df45b6..c1691d4 100644 --- a/core/agents/architect.py +++ b/core/agents/architect.py @@ -168,7 +168,7 @@ async def plan_architecture(self, spec: Specification): await self.send_message("Picking technologies to use ...") - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = ( AgentConvo(self) .template( @@ -247,11 +247,12 @@ async def check_system_dependencies(self, spec: Specification): remedy = "If you would like to use it locally, please install it before proceeding." await self.send_message(f"❌ {dep['name']} is not available. {remedy}") await self.ask_question( - f"Once you have installed {dep['name']}, please press Continue.", - buttons={"continue": "Continue"}, + "", + buttons={"continue": f"I've installed {dep['name']}"}, buttons_only=True, default="continue", ) + else: await self.send_message(f"✅ {dep['name']} is available.") @@ -271,7 +272,7 @@ async def configure_template(self, spec: Specification, template_class: BaseProj # If template has no options, no need to ask LLM for anything return NoOptions() - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = ( AgentConvo(self) .template( diff --git a/core/agents/base.py b/core/agents/base.py index 242d255..f2e5ac5 100644 --- a/core/agents/base.py +++ b/core/agents/base.py @@ -28,17 +28,18 @@ def __init__( step: Optional[Any] = None, prev_response: Optional["AgentResponse"] = None, process_manager: Optional["ProcessManager"] = None, + data: Optional[Any] = None, ): """ Create a new agent. """ self.ui_source = AgentSource(self.display_name, self.agent_type) self.ui = ui - self.stream_output = True self.state_manager = state_manager self.process_manager = process_manager self.prev_response = prev_response self.step = step + self.data = data @property def current_state(self) -> ProjectState: @@ -55,11 +56,11 @@ async def send_message(self, message: str): Send a message to the user. Convenience method, uses `UIBase.send_message()` to send the message, - setting the correct source. + setting the correct source and project state ID. :param message: Message to send. """ - await self.ui.send_message(message + "\n", source=self.ui_source) + await self.ui.send_message(message + "\n", source=self.ui_source, project_state_id=str(self.current_state.id)) async def ask_question( self, @@ -76,7 +77,7 @@ async def ask_question( Ask a question to the user and return the response. Convenience method, uses `UIBase.ask_question()` to - ask the question, setting the correct source and + ask the question, setting the correct source and project state ID, and logging the question/response. :param question: Question to ask. @@ -97,6 +98,7 @@ async def ask_question( hint=hint, initial_text=initial_text, source=self.ui_source, + project_state_id=str(self.current_state.id), ) await self.state_manager.log_user_input(question, response) return response @@ -106,16 +108,14 @@ async def stream_handler(self, content: str): Handle streamed response from the LLM. Serves as a callback to `AgentBase.llm()` so it can stream the responses to the UI. - This can be turned on/off on a pe-request basis by setting `BaseAgent.stream_output` - to True or False. :param content: Response content. """ - if self.stream_output: - await self.ui.send_stream_chunk(content, source=self.ui_source) + + await self.ui.send_stream_chunk(content, source=self.ui_source, project_state_id=str(self.current_state.id)) if content is None: - await self.ui.send_message("", source=self.ui_source) + await self.ui.send_message("", source=self.ui_source, project_state_id=str(self.current_state.id)) async def error_handler(self, error: LLMError, message: Optional[str] = None) -> bool: """ @@ -150,7 +150,7 @@ async def error_handler(self, error: LLMError, message: Optional[str] = None) -> return False - def get_llm(self, name=None) -> Callable: + def get_llm(self, name=None, stream_output=False) -> Callable: """ Get a new instance of the agent-specific LLM client. @@ -170,7 +170,8 @@ def get_llm(self, name=None) -> Callable: llm_config = config.llm_for_agent(name) client_class = BaseLLMClient.for_provider(llm_config.provider) - llm_client = client_class(llm_config, stream_handler=self.stream_handler, error_handler=self.error_handler) + stream_handler = self.stream_handler if stream_output else None + llm_client = client_class(llm_config, stream_handler=stream_handler, error_handler=self.error_handler) async def client(convo, **kwargs) -> Any: """ diff --git a/core/agents/bug_hunter.py b/core/agents/bug_hunter.py index 30df597..550d12b 100644 --- a/core/agents/bug_hunter.py +++ b/core/agents/bug_hunter.py @@ -9,16 +9,11 @@ from core.db.models.project_state import IterationStatus from core.llm.parser import JSONParser from core.log import get_logger +from core.telemetry import telemetry log = get_logger(__name__) -class StepType(str, Enum): - ADD_LOG = "add_log" - EXPLAIN_PROBLEM = "explain_problem" - GET_ADDITIONAL_FILES = "get_additional_files" - - class HuntConclusionType(str, Enum): ADD_LOGS = magic_words.ADD_LOGS PROBLEM_IDENTIFIED = magic_words.PROBLEM_IDENTIFIED @@ -30,6 +25,21 @@ class HuntConclusionOptions(BaseModel): ) +class ImportantLog(BaseModel): + logCode: str = Field(description="Actual line of code that prints the log.") + shouldBeDifferent: bool = Field( + description="Whether the current output should be different from the expected output." + ) + filePath: str = Field(description="Path to the file in which the log exists.") + currentOutput: str = Field(description="Current output of the log.") + expectedOutput: str = Field(description="Expected output of the log.") + explanation: str = Field(description="A brief explanation of the log.") + + +class ImportantLogsForDebugging(BaseModel): + logs: list[ImportantLog] = Field(description="Important logs that will help the human debug the current bug.") + + class BugHunter(BaseAgent): agent_type = "bug-hunter" display_name = "Bug Hunter" @@ -43,12 +53,17 @@ async def run(self) -> AgentResponse: # TODO determine how to find a bug (eg. check in db, ask user a question, etc.) return await self.check_logs() elif current_iteration["status"] == IterationStatus.AWAITING_USER_TEST: + await self.ui.send_bug_hunter_status("close_status", 0) return await self.ask_user_to_test(False, True) elif current_iteration["status"] == IterationStatus.AWAITING_BUG_REPRODUCTION: + await self.ui.send_bug_hunter_status("close_status", 0) return await self.ask_user_to_test(True, False) + elif current_iteration["status"] == IterationStatus.START_PAIR_PROGRAMMING: + await self.ui.send_bug_hunter_status("close_status", 0) + return await self.start_pair_programming() async def get_bug_reproduction_instructions(self): - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = AgentConvo(self).template( "get_bug_reproduction_instructions", current_task=self.current_state.current_task, @@ -61,25 +76,8 @@ async def get_bug_reproduction_instructions(self): self.next_state.current_iteration["bug_reproduction_description"] = bug_reproduction_instructions async def check_logs(self, logs_message: str = None): - llm = self.get_llm(CHECK_LOGS_AGENT_NAME) - convo = AgentConvo(self).template( - "iteration", - current_task=self.current_state.current_task, - user_feedback=self.current_state.current_iteration["user_feedback"], - user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], - docs=self.current_state.docs, - magic_words=magic_words, - next_solution_to_try=None, - ) - - for hunting_cycle in self.current_state.current_iteration.get("bug_hunting_cycles", []): - convo = convo.assistant(hunting_cycle["human_readable_instructions"]).template( - "log_data", - backend_logs=hunting_cycle["backend_logs"], - frontend_logs=hunting_cycle["frontend_logs"], - fix_attempted=hunting_cycle["fix_attempted"], - ) - + llm = self.get_llm(CHECK_LOGS_AGENT_NAME, stream_output=True) + convo = self.generate_iteration_convo_so_far() human_readable_instructions = await llm(convo, temperature=0.5) convo = ( @@ -93,42 +91,37 @@ async def check_logs(self, logs_message: str = None): llm = self.get_llm() hunt_conclusion = await llm(convo, parser=JSONParser(HuntConclusionOptions), temperature=0) - self.next_state.current_iteration["description"] = human_readable_instructions - self.next_state.current_iteration["bug_hunting_cycles"] += [ - { - "human_readable_instructions": human_readable_instructions, - "fix_attempted": any( - c["fix_attempted"] for c in self.current_state.current_iteration["bug_hunting_cycles"] - ), - } - ] - + bug_hunting_cycles = self.current_state.current_iteration.get("bug_hunting_cycles") + num_bug_hunting_cycles = len(bug_hunting_cycles) if bug_hunting_cycles else 0 if hunt_conclusion.conclusion == magic_words.PROBLEM_IDENTIFIED: # if no need for logs, implement iteration same as before - self.next_state.current_iteration["status"] = IterationStatus.AWAITING_BUG_FIX - await self.send_message("The bug is found - I'm attempting to fix it.") + self.set_data_for_next_hunting_cycle(human_readable_instructions, IterationStatus.AWAITING_BUG_FIX) + await self.send_message("Found the bug. I'm attempting to fix it ...") + await self.ui.send_bug_hunter_status("fixing_bug", num_bug_hunting_cycles) else: # if logs are needed, add logging steps - self.next_state.current_iteration["status"] = IterationStatus.AWAITING_LOGGING - await self.send_message("Adding more logs to identify the bug.") + self.set_data_for_next_hunting_cycle(human_readable_instructions, IterationStatus.AWAITING_LOGGING) + await self.send_message("Adding more logs to identify the bug ...") + await self.ui.send_bug_hunter_status("adding_logs", num_bug_hunting_cycles) self.next_state.flag_iterations_as_modified() return AgentResponse.done(self) async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiting_user_test: bool = False): - await self.send_message( - "You can reproduce the bug like this:\n\n" - + self.current_state.current_iteration["bug_reproduction_description"] - ) + await self.ui.stop_app() + test_instructions = self.current_state.current_iteration["bug_reproduction_description"] + await self.send_message("You can reproduce the bug like this:\n\n" + test_instructions) + await self.ui.send_test_instructions(test_instructions) if self.current_state.run_command: await self.ui.send_run_command(self.current_state.run_command) if awaiting_user_test: + buttons = {"yes": "Yes, the issue is fixed", "no": "No", "start_pair_programming": "Start Pair Programming"} user_feedback = await self.ask_question( "Is the bug you reported fixed now?", - buttons={"yes": "Yes, the issue is fixed", "no": "No"}, - default="continue", + buttons=buttons, + default="yes", buttons_only=True, hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"], @@ -137,14 +130,23 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti if user_feedback.button == "yes": self.next_state.complete_iteration() + elif user_feedback.button == "start_pair_programming": + self.next_state.current_iteration["status"] = IterationStatus.START_PAIR_PROGRAMMING + self.next_state.flag_iterations_as_modified() else: awaiting_bug_reproduction = True if awaiting_bug_reproduction: # TODO how can we get FE and BE logs automatically? + buttons = { + "copy_backend_logs": "Copy Backend Logs", + "continue": "Continue without logs", + "done": "Bug is fixed", + "start_pair_programming": "Start Pair Programming", + } backend_logs = await self.ask_question( - "Please do exactly what you did in the last iteration, paste **BACKEND** logs here and click CONTINUE.", - buttons={"continue": "Continue", "done": "Bug is fixed"}, + "Please share the relevant Backend logs", + buttons=buttons, default="continue", hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"], @@ -152,10 +154,26 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti if backend_logs.button == "done": self.next_state.complete_iteration() + elif backend_logs.button == "start_pair_programming": + self.next_state.current_iteration["status"] = IterationStatus.START_PAIR_PROGRAMMING + self.next_state.flag_iterations_as_modified() else: + buttons = { + "copy_frontend_logs": "Copy Frontend Logs", + "continue": "Continue without logs", + } frontend_logs = await self.ask_question( - "Please paste **frontend** logs here and click CONTINUE.", - buttons={"continue": "Continue", "done": "Bug is fixed"}, + "Please share the relevant Frontend logs", + buttons=buttons, + default="continue", + hint="Instructions for testing:\n\n" + + self.current_state.current_iteration["bug_reproduction_description"], + ) + + buttons = {"continue": "Continue without feedback"} + user_feedback = await self.ask_question( + "Please add any additional feedback that could help Pythagora solve this bug", + buttons=buttons, default="continue", hint="Instructions for testing:\n\n" + self.current_state.current_iteration["bug_reproduction_description"], @@ -164,9 +182,169 @@ async def ask_user_to_test(self, awaiting_bug_reproduction: bool = False, awaiti # TODO select only the logs that are new (with PYTHAGORA_DEBUGGING_LOG) self.next_state.current_iteration["bug_hunting_cycles"][-1]["backend_logs"] = backend_logs.text self.next_state.current_iteration["bug_hunting_cycles"][-1]["frontend_logs"] = frontend_logs.text + self.next_state.current_iteration["bug_hunting_cycles"][-1]["user_feedback"] = user_feedback.text self.next_state.current_iteration["status"] = IterationStatus.HUNTING_FOR_BUG - if frontend_logs.button == "done": - self.next_state.complete_iteration() + return AgentResponse.done(self) + + async def start_pair_programming(self): + llm = self.get_llm(stream_output=True) + convo = self.generate_iteration_convo_so_far(True) + if len(convo.messages) > 1: + convo.remove_last_x_messages(1) + convo = convo.template("problem_explanation") + await self.ui.start_important_stream() + initial_explanation = await llm(convo, temperature=0.5) + + llm = self.get_llm() + convo = convo.template("data_about_logs").require_schema(ImportantLogsForDebugging) + data_about_logs = await llm(convo, parser=JSONParser(ImportantLogsForDebugging), temperature=0.5) + + await self.ui.send_data_about_logs( + { + "logs": [ + { + "currentLog": d.currentOutput, + "expectedLog": d.expectedOutput, + "explanation": d.explanation, + "filePath": d.filePath, + "logCode": d.logCode, + "shouldBeDifferent": d.shouldBeDifferent, + } + for d in data_about_logs.logs + ] + } + ) + + while True: + self.next_state.current_iteration["initial_explanation"] = initial_explanation + next_step = await self.ask_question( + "What do you want to do?", + buttons={ + "question": "I have a question", + "done": "I fixed the bug myself", + "tell_me_more": "Tell me more about the bug", + "solution_hint": "I think I know where the problem is", + "other": "Other", + }, + buttons_only=True, + default="continue", + hint="Instructions for testing:\n\n" + + self.current_state.current_iteration["bug_reproduction_description"], + ) + + await telemetry.trace_code_event( + "pair-programming", + { + "button": next_step.button, + "num_tasks": len(self.current_state.tasks), + "num_epics": len(self.current_state.epics), + "num_iterations": len(self.current_state.iterations), + "app_id": str(self.state_manager.project.id), + "app_name": self.state_manager.project.name, + "folder_name": self.state_manager.project.folder_name, + }, + ) + + # TODO: remove when Leon checks + convo.remove_last_x_messages(2) + + if len(convo.messages) > 10: + convo.trim(1, 2) + + # TODO: in the future improve with a separate conversation that parses the user info and goes into an appropriate if statement + if next_step.button == "done": + self.next_state.complete_iteration() + break + elif next_step.button == "question": + user_response = await self.ask_question("Oh, cool, what would you like to know?") + convo = convo.template("ask_a_question", question=user_response.text) + await self.ui.start_important_stream() + llm_answer = await llm(convo, temperature=0.5) + await self.send_message(llm_answer) + elif next_step.button == "tell_me_more": + convo.template("tell_me_more") + await self.ui.start_important_stream() + response = await llm(convo, temperature=0.5) + await self.send_message(response) + elif next_step.button == "other": + # this is the same as "question" - we want to keep an option for users to click to understand if we're missing something with other options + user_response = await self.ask_question("Let me know what you think ...") + convo = convo.template("ask_a_question", question=user_response.text) + await self.ui.start_important_stream() + llm_answer = await llm(convo, temperature=0.5) + await self.send_message(llm_answer) + elif next_step.button == "solution_hint": + human_hint_label = "Amazing! How do you think we can solve this bug?" + while True: + human_hint = await self.ask_question(human_hint_label) + convo = convo.template("instructions_from_human_hint", human_hint=human_hint.text) + await self.ui.start_important_stream() + llm = self.get_llm(CHECK_LOGS_AGENT_NAME, stream_output=True) + human_readable_instructions = await llm(convo, temperature=0.5) + human_approval = await self.ask_question( + "Can I implement this solution?", buttons={"yes": "Yes", "no": "No"}, buttons_only=True + ) + llm = self.get_llm(stream_output=True) + if human_approval.button == "yes": + self.set_data_for_next_hunting_cycle( + human_readable_instructions, IterationStatus.AWAITING_BUG_FIX + ) + self.next_state.flag_iterations_as_modified() + break + else: + human_hint_label = "Oh, my bad, what did I misunderstand?" + break + elif next_step.button == "tell_me_more": + convo.template("tell_me_more") + await self.ui.start_important_stream() + response = await llm(convo, temperature=0.5) + await self.send_message(response) + continue return AgentResponse.done(self) + + def generate_iteration_convo_so_far(self, omit_last_cycle=False): + convo = AgentConvo(self).template( + "iteration", + current_task=self.current_state.current_task, + user_feedback=self.current_state.current_iteration["user_feedback"], + user_feedback_qa=self.current_state.current_iteration["user_feedback_qa"], + docs=self.current_state.docs, + magic_words=magic_words, + next_solution_to_try=None, + ) + + hunting_cycles = self.current_state.current_iteration.get("bug_hunting_cycles", [])[ + 0 : (-1 if omit_last_cycle else None) + ] + + for hunting_cycle in hunting_cycles: + convo = convo.assistant(hunting_cycle["human_readable_instructions"]).template( + "log_data", + backend_logs=hunting_cycle.get("backend_logs"), + frontend_logs=hunting_cycle.get("frontend_logs"), + fix_attempted=hunting_cycle.get("fix_attempted"), + user_feedback=hunting_cycle.get("user_feedback"), + ) + + return convo + + def set_data_for_next_hunting_cycle(self, human_readable_instructions, new_status): + self.next_state.current_iteration["description"] = human_readable_instructions + self.next_state.current_iteration["bug_hunting_cycles"] += [ + { + "human_readable_instructions": human_readable_instructions, + "fix_attempted": any( + c["fix_attempted"] for c in self.current_state.current_iteration["bug_hunting_cycles"] + ), + } + ] + + self.next_state.current_iteration["status"] = new_status + + async def continue_on(self, convo, button_value, user_response): + llm = self.get_llm(stream_output=True) + convo = convo.template("continue_on") + continue_on = await llm(convo, temperature=0.5) + return continue_on diff --git a/core/agents/code_monkey.py b/core/agents/code_monkey.py index 710da8d..3141d9e 100644 --- a/core/agents/code_monkey.py +++ b/core/agents/code_monkey.py @@ -1,17 +1,50 @@ -from os.path import basename +import re +from difflib import unified_diff +from enum import Enum +from typing import Optional, Union from pydantic import BaseModel, Field from core.agents.base import BaseAgent from core.agents.convo import AgentConvo from core.agents.response import AgentResponse, ResponseType -from core.config import CODE_MONKEY_AGENT_NAME, DESCRIBE_FILES_AGENT_NAME +from core.config import CODE_MONKEY_AGENT_NAME, CODE_REVIEW_AGENT_NAME, DESCRIBE_FILES_AGENT_NAME from core.llm.parser import JSONParser, OptionalCodeBlockParser from core.log import get_logger log = get_logger(__name__) +# Constant for indicating missing new line at the end of a file in a unified diff +NO_EOL = "\\ No newline at end of file" + +# Regular expression pattern for matching hunk headers +PATCH_HEADER_PATTERN = re.compile(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@") + +# Maximum number of attempts to ask for review if it can't be parsed +MAX_REVIEW_RETRIES = 2 + +# Maximum number of code implementation attempts after which we accept the changes unconditionaly +MAX_CODING_ATTEMPTS = 3 + + +class Decision(str, Enum): + APPLY = "apply" + IGNORE = "ignore" + REWORK = "rework" + + +class Hunk(BaseModel): + number: int = Field(description="Index of the hunk in the diff. Starts from 1.") + reason: str = Field(description="Reason for applying or ignoring this hunk, or for asking for it to be reworked.") + decision: Decision = Field(description="Whether to apply this hunk, rework, or ignore it.") + + +class ReviewChanges(BaseModel): + hunks: list[Hunk] + review_notes: str = Field(description="Additional review notes (optional, can be empty).") + + class FileDescription(BaseModel): summary: str = Field( description="Detailed description summarized what the file is about, and what the major classes, functions, elements or other functionality is implemented." @@ -29,9 +62,15 @@ async def run(self) -> AgentResponse: if self.prev_response and self.prev_response.type == ResponseType.DESCRIBE_FILES: return await self.describe_files() else: - return await self.implement_changes() + data = await self.implement_changes() + code_review_done = False + while not code_review_done: + review_response = await self.run_code_review(data) + if isinstance(review_response, AgentResponse): + return review_response + data = await self.implement_changes(review_response) - async def implement_changes(self) -> AgentResponse: + async def implement_changes(self, data: Optional[dict] = None) -> dict: file_name = self.step["save_file"]["path"] current_file = await self.state_manager.get_file_by_path(file_name) @@ -39,17 +78,18 @@ async def implement_changes(self) -> AgentResponse: task = self.current_state.current_task - if self.prev_response and self.prev_response.type == ResponseType.CODE_REVIEW_FEEDBACK: - attempt = self.prev_response.data["attempt"] + 1 - feedback = self.prev_response.data["feedback"] + if data is not None: + attempt = data["attempt"] + 1 + feedback = data["feedback"] log.debug(f"Fixing file {file_name} after review feedback: {feedback} ({attempt}. attempt)") - await self.send_message(f"Reworking changes I made to {file_name} ...") + await self.ui.send_file_status(file_name, "reworking") else: log.debug(f"Implementing file {file_name}") - await self.send_message(f"{'Updating existing' if file_content else 'Creating new'} file {file_name} ...") - self.next_state.action = ( - f'Update file "{basename(file_name)}"' if file_content else f'Create file "{basename(file_name)}"' - ) + if data is None: + await self.ui.send_file_status(file_name, "updating" if file_content else "creating") + else: + await self.ui.send_file_status(file_name, "reworking") + self.next_state.action = "Updating files" attempt = 1 feedback = None @@ -58,9 +98,7 @@ async def implement_changes(self) -> AgentResponse: user_feedback_qa = None llm = self.get_llm(CODE_MONKEY_AGENT_NAME) - if "task_review_feedback" in task and task["task_review_feedback"]: - instructions = task.get("task_review_feedback") - elif iterations: + if iterations: last_iteration = iterations[-1] instructions = last_iteration.get("description") user_feedback = last_iteration.get("user_feedback") @@ -77,16 +115,22 @@ async def implement_changes(self) -> AgentResponse: user_feedback_qa=user_feedback_qa, ) if feedback: - convo.assistant(f"```\n{self.prev_response.data['new_content']}\n```\n").template( + convo.assistant(f"```\n{data['new_content']}\n```\n").template( "review_feedback", - content=self.prev_response.data["approved_content"], + content=data["approved_content"], original_content=file_content, rework_feedback=feedback, ) response: str = await llm(convo, temperature=0, parser=OptionalCodeBlockParser()) # FIXME: provide a counter here so that we don't have an endless loop here - return AgentResponse.code_review(self, file_name, task["instructions"], file_content, response, attempt) + return { + "path": file_name, + "instructions": task["instructions"], + "old_content": file_content, + "new_content": response, + "attempt": attempt, + } async def describe_files(self) -> AgentResponse: llm = self.get_llm(DESCRIBE_FILES_AGENT_NAME) @@ -108,8 +152,6 @@ async def describe_files(self) -> AgentResponse: continue log.debug(f"Describing file {file.path}") - await self.send_message(f"Describing file {file.path} ...") - convo = ( AgentConvo(self) .template( @@ -127,3 +169,315 @@ async def describe_files(self) -> AgentResponse: "references": llm_response.references, } return AgentResponse.done(self) + + # ------------------------------ + # CODE REVIEW + # ------------------------------ + + async def run_code_review(self, data: Optional[dict]) -> Union[AgentResponse, dict]: + await self.ui.send_file_status(data["path"], "reviewing") + if ( + data is not None + and not data["old_content"] + or data["new_content"] == data["old_content"] + or data["attempt"] >= MAX_CODING_ATTEMPTS + ): + # we always auto-accept new files and unchanged files, or if we've tried too many times + return await self.accept_changes(data["path"], data["old_content"], data["new_content"]) + + approved_content, feedback = await self.review_change( + data["path"], + data["instructions"], + data["old_content"], + data["new_content"], + ) + if feedback: + return { + "new_content": data["new_content"], + "approved_content": approved_content, + "feedback": feedback, + "attempt": data["attempt"], + } + else: + return await self.accept_changes(data["path"], data["old_content"], approved_content) + + async def accept_changes(self, file_path: str, old_content: str, new_content: str) -> AgentResponse: + await self.ui.send_file_status(file_path, "done") + + n_new_lines, n_del_lines = self.get_line_changes(old_content, new_content) + await self.ui.generate_diff(file_path, old_content, new_content, n_new_lines, n_del_lines) + + await self.state_manager.save_file(file_path, new_content) + self.next_state.complete_step() + + input_required = self.state_manager.get_input_required(new_content) + if input_required: + return AgentResponse.input_required( + self, + [{"file": file_path, "line": line} for line in input_required], + ) + else: + return AgentResponse.done(self) + + def _get_task_convo(self) -> AgentConvo: + # FIXME: Current prompts reuse conversation from the developer so we have to resort to this + task = self.current_state.current_task + current_task_index = self.current_state.tasks.index(task) + + convo = AgentConvo(self).template( + "breakdown", + task=task, + iteration=None, + current_task_index=current_task_index, + ) + # TODO: We currently show last iteration to the code monkey; we might need to show the task + # breakdown and all the iterations instead? To think about when refactoring prompts + if self.current_state.iterations: + convo.assistant(self.current_state.iterations[-1]["description"]) + else: + convo.assistant(self.current_state.current_task["instructions"]) + return convo + + async def review_change( + self, file_name: str, instructions: str, old_content: str, new_content: str + ) -> tuple[str, str]: + """ + Review changes that were applied to the file. + + This asks the LLM to act as a PR reviewer and for each part (hunk) of the + diff, decide if it should be applied (kept) or ignored (removed from the PR). + + :param file_name: name of the file being modified + :param instructions: instructions for the reviewer + :param old_content: old file content + :param new_content: new file content (with proposed changes) + :return: tuple with file content update with approved changes, and review feedback + + Diff hunk explanation: https://www.gnu.org/software/diffutils/manual/html_node/Hunks.html + """ + + hunks = self.get_diff_hunks(file_name, old_content, new_content) + + llm = self.get_llm(CODE_REVIEW_AGENT_NAME) + convo = ( + self._get_task_convo() + .template( + "review_changes", + instructions=instructions, + file_name=file_name, + old_content=old_content, + hunks=hunks, + ) + .require_schema(ReviewChanges) + ) + llm_response: ReviewChanges = await llm(convo, temperature=0, parser=JSONParser(ReviewChanges)) + + for i in range(MAX_REVIEW_RETRIES): + reasons = {} + ids_to_apply = set() + ids_to_ignore = set() + ids_to_rework = set() + for hunk in llm_response.hunks: + reasons[hunk.number - 1] = hunk.reason + if hunk.decision == "apply": + ids_to_apply.add(hunk.number - 1) + elif hunk.decision == "ignore": + ids_to_ignore.add(hunk.number - 1) + elif hunk.decision == "rework": + ids_to_rework.add(hunk.number - 1) + + n_hunks = len(hunks) + n_review_hunks = len(reasons) + if n_review_hunks == n_hunks: + break + elif n_review_hunks < n_hunks: + error = "Not all hunks have been reviewed. Please review all hunks and add 'apply', 'ignore' or 'rework' decision for each." + elif n_review_hunks > n_hunks: + error = f"Your review contains more hunks ({n_review_hunks}) than in the original diff ({n_hunks}). Note that one hunk may have multiple changed lines." + + # Max two retries; if the reviewer still hasn't reviewed all hunks, we'll just use the entire new content + convo.assistant(llm_response.model_dump_json()).user(error) + llm_response = await llm(convo, parser=JSONParser(ReviewChanges)) + else: + return new_content, None + + hunks_to_apply = [h for i, h in enumerate(hunks) if i in ids_to_apply] + diff_log = f"--- {file_name}\n+++ {file_name}\n" + "\n".join(hunks_to_apply) + + hunks_to_rework = [(i, h) for i, h in enumerate(hunks) if i in ids_to_rework] + review_log = ( + "\n\n".join([f"## Change\n```{hunk}```\nReviewer feedback:\n{reasons[i]}" for (i, hunk) in hunks_to_rework]) + + "\n\nReview notes:\n" + + llm_response.review_notes + ) + + if len(hunks_to_apply) == len(hunks): + log.info(f"Applying entire change to {file_name}") + return new_content, None + + elif len(hunks_to_apply) == 0: + if hunks_to_rework: + log.info(f"Requesting rework for {len(hunks_to_rework)} changes to {file_name} (0 hunks to apply)") + return old_content, review_log + else: + # If everything can be safely ignored, it's probably because the files already implement the changes + # from previous tasks (which can happen often). Insisting on a change here is likely to cause problems. + log.info(f"Rejecting entire change to {file_name} with reason: {llm_response.review_notes}") + return old_content, None + + log.debug(f"Applying code change to {file_name}:\n{diff_log}") + new_content = self.apply_diff(file_name, old_content, hunks_to_apply, new_content) + if hunks_to_rework: + log.info(f"Requesting further rework for {len(hunks_to_rework)} changes to {file_name}") + return new_content, review_log + else: + return new_content, None + + @staticmethod + def get_line_changes(old_content: str, new_content: str) -> tuple[int, int]: + """ + Get the number of added and deleted lines between two files. + + This uses Python difflib to produce a unified diff, then counts + the number of added and deleted lines. + + :param old_content: old file content + :param new_content: new file content + :return: a tuple (added_lines, deleted_lines) + """ + + from_lines = old_content.splitlines(keepends=True) + to_lines = new_content.splitlines(keepends=True) + + diff_gen = unified_diff(from_lines, to_lines) + + added_lines = 0 + deleted_lines = 0 + + for line in diff_gen: + if line.startswith("+") and not line.startswith("+++"): # Exclude the file headers + added_lines += 1 + elif line.startswith("-") and not line.startswith("---"): # Exclude the file headers + deleted_lines += 1 + + return added_lines, deleted_lines + + @staticmethod + def get_diff_hunks(file_name: str, old_content: str, new_content: str) -> list[str]: + """ + Get the diff between two files. + + This uses Python difflib to produce an unified diff, then splits + it into hunks that will be separately reviewed by the reviewer. + + :param file_name: name of the file being modified + :param old_content: old file content + :param new_content: new file content + :return: change hunks from the unified diff + """ + from_name = "old_" + file_name + to_name = "to_" + file_name + from_lines = old_content.splitlines(keepends=True) + to_lines = new_content.splitlines(keepends=True) + diff_gen = unified_diff(from_lines, to_lines, fromfile=from_name, tofile=to_name) + diff_txt = "".join(diff_gen) + + hunks = re.split(r"\n@@", diff_txt, re.MULTILINE) + result = [] + for i, h in enumerate(hunks): + # Skip the prologue (file names) + if i == 0: + continue + txt = h.splitlines() + txt[0] = "@@" + txt[0] + result.append("\n".join(txt)) + return result + + def apply_diff(self, file_name: str, old_content: str, hunks: list[str], fallback: str): + """ + Apply the diff to the original file content. + + This uses the internal `_apply_patch` method to apply the + approved diff hunks to the original file content. + + If patch apply fails, the fallback is the full new file content + with all the changes applied (as if the reviewer approved everythng). + + :param file_name: name of the file being modified + :param old_content: old file content + :param hunks: change hunks from the unified diff + :param fallback: proposed new file content (with all the changes applied) + """ + diff = ( + "\n".join( + [ + f"--- {file_name}", + f"+++ {file_name}", + ] + + hunks + ) + + "\n" + ) + try: + fixed_content = self._apply_patch(old_content, diff) + except Exception as e: + # This should never happen but if it does, just use the new version from + # the LLM and hope for the best + print(f"Error applying diff: {e}; hoping all changes are valid") + return fallback + + return fixed_content + + # Adapted from https://gist.github.com/noporpoise/16e731849eb1231e86d78f9dfeca3abc (Public Domain) + @staticmethod + def _apply_patch(original: str, patch: str, revert: bool = False): + """ + Apply a patch to a string to recover a newer version of the string. + + :param original: The original string. + :param patch: The patch to apply. + :param revert: If True, treat the original string as the newer version and recover the older string. + :return: The updated string after applying the patch. + """ + original_lines = original.splitlines(True) + patch_lines = patch.splitlines(True) + + updated_text = "" + index_original = start_line = 0 + + # Choose which group of the regex to use based on the revert flag + match_index, line_sign = (1, "+") if not revert else (3, "-") + + # Skip header lines of the patch + while index_original < len(patch_lines) and patch_lines[index_original].startswith(("---", "+++")): + index_original += 1 + + while index_original < len(patch_lines): + match = PATCH_HEADER_PATTERN.match(patch_lines[index_original]) + if not match: + raise Exception("Bad patch -- regex mismatch [line " + str(index_original) + "]") + + line_number = int(match.group(match_index)) - 1 + (match.group(match_index + 1) == "0") + + if start_line > line_number or line_number > len(original_lines): + raise Exception("Bad patch -- bad line number [line " + str(index_original) + "]") + + updated_text += "".join(original_lines[start_line:line_number]) + start_line = line_number + index_original += 1 + + while index_original < len(patch_lines) and patch_lines[index_original][0] != "@": + if index_original + 1 < len(patch_lines) and patch_lines[index_original + 1][0] == "\\": + line_content = patch_lines[index_original][:-1] + index_original += 2 + else: + line_content = patch_lines[index_original] + index_original += 1 + + if line_content: + if line_content[0] == line_sign or line_content[0] == " ": + updated_text += line_content[1:] + start_line += line_content[0] != line_sign + + updated_text += "".join(original_lines[start_line:]) + return updated_text diff --git a/core/agents/code_reviewer.py b/core/agents/code_reviewer.py deleted file mode 100644 index f3fac62..0000000 --- a/core/agents/code_reviewer.py +++ /dev/null @@ -1,326 +0,0 @@ -import re -from difflib import unified_diff -from enum import Enum - -from pydantic import BaseModel, Field - -from core.agents.base import BaseAgent -from core.agents.convo import AgentConvo -from core.agents.response import AgentResponse -from core.llm.parser import JSONParser -from core.log import get_logger - -log = get_logger(__name__) - - -# Constant for indicating missing new line at the end of a file in a unified diff -NO_EOL = "\\ No newline at end of file" - -# Regular expression pattern for matching hunk headers -PATCH_HEADER_PATTERN = re.compile(r"^@@ -(\d+),?(\d+)? \+(\d+),?(\d+)? @@") - -# Maximum number of attempts to ask for review if it can't be parsed -MAX_REVIEW_RETRIES = 2 - -# Maximum number of code implementation attempts after which we accept the changes unconditionaly -MAX_CODING_ATTEMPTS = 3 - - -class Decision(str, Enum): - APPLY = "apply" - IGNORE = "ignore" - REWORK = "rework" - - -class Hunk(BaseModel): - number: int = Field(description="Index of the hunk in the diff. Starts from 1.") - reason: str = Field(description="Reason for applying or ignoring this hunk, or for asking for it to be reworked.") - decision: Decision = Field(description="Whether to apply this hunk, rework, or ignore it.") - - -class ReviewChanges(BaseModel): - hunks: list[Hunk] - review_notes: str = Field(description="Additional review notes (optional, can be empty).") - - -class CodeReviewer(BaseAgent): - agent_type = "code-reviewer" - display_name = "Code Reviewer" - - async def run(self) -> AgentResponse: - if ( - not self.prev_response.data["old_content"] - or self.prev_response.data["new_content"] == self.prev_response.data["old_content"] - or self.prev_response.data["attempt"] >= MAX_CODING_ATTEMPTS - ): - # we always auto-accept new files and unchanged files, or if we've tried too many times - return await self.accept_changes(self.prev_response.data["path"], self.prev_response.data["new_content"]) - - approved_content, feedback = await self.review_change( - self.prev_response.data["path"], - self.prev_response.data["instructions"], - self.prev_response.data["old_content"], - self.prev_response.data["new_content"], - ) - if feedback: - return AgentResponse.code_review_feedback( - self, - new_content=self.prev_response.data["new_content"], - approved_content=approved_content, - feedback=feedback, - attempt=self.prev_response.data["attempt"], - ) - else: - return await self.accept_changes(self.prev_response.data["path"], approved_content) - - async def accept_changes(self, path: str, content: str) -> AgentResponse: - await self.state_manager.save_file(path, content) - self.next_state.complete_step() - - input_required = self.state_manager.get_input_required(content) - if input_required: - return AgentResponse.input_required( - self, - [{"file": path, "line": line} for line in input_required], - ) - else: - return AgentResponse.done(self) - - def _get_task_convo(self) -> AgentConvo: - # FIXME: Current prompts reuse conversation from the developer so we have to resort to this - task = self.current_state.current_task - current_task_index = self.current_state.tasks.index(task) - - convo = AgentConvo(self).template( - "breakdown", - task=task, - iteration=None, - current_task_index=current_task_index, - ) - # TODO: We currently show last iteration to the code monkey; we might need to show the task - # breakdown and all the iterations instead? To think about when refactoring prompts - if self.current_state.iterations: - convo.assistant(self.current_state.iterations[-1]["description"]) - else: - convo.assistant(self.current_state.current_task["instructions"]) - return convo - - async def review_change( - self, file_name: str, instructions: str, old_content: str, new_content: str - ) -> tuple[str, str]: - """ - Review changes that were applied to the file. - - This asks the LLM to act as a PR reviewer and for each part (hunk) of the - diff, decide if it should be applied (kept) or ignored (removed from the PR). - - :param file_name: name of the file being modified - :param instructions: instructions for the reviewer - :param old_content: old file content - :param new_content: new file content (with proposed changes) - :return: tuple with file content update with approved changes, and review feedback - - Diff hunk explanation: https://www.gnu.org/software/diffutils/manual/html_node/Hunks.html - """ - - hunks = self.get_diff_hunks(file_name, old_content, new_content) - - llm = self.get_llm() - convo = ( - self._get_task_convo() - .template( - "review_changes", - instructions=instructions, - file_name=file_name, - old_content=old_content, - hunks=hunks, - ) - .require_schema(ReviewChanges) - ) - llm_response: ReviewChanges = await llm(convo, temperature=0, parser=JSONParser(ReviewChanges)) - - for i in range(MAX_REVIEW_RETRIES): - reasons = {} - ids_to_apply = set() - ids_to_ignore = set() - ids_to_rework = set() - for hunk in llm_response.hunks: - reasons[hunk.number - 1] = hunk.reason - if hunk.decision == "apply": - ids_to_apply.add(hunk.number - 1) - elif hunk.decision == "ignore": - ids_to_ignore.add(hunk.number - 1) - elif hunk.decision == "rework": - ids_to_rework.add(hunk.number - 1) - - n_hunks = len(hunks) - n_review_hunks = len(reasons) - if n_review_hunks == n_hunks: - break - elif n_review_hunks < n_hunks: - error = "Not all hunks have been reviewed. Please review all hunks and add 'apply', 'ignore' or 'rework' decision for each." - elif n_review_hunks > n_hunks: - error = f"Your review contains more hunks ({n_review_hunks}) than in the original diff ({n_hunks}). Note that one hunk may have multiple changed lines." - - # Max two retries; if the reviewer still hasn't reviewed all hunks, we'll just use the entire new content - convo.assistant(llm_response.model_dump_json()).user(error) - llm_response = await llm(convo, parser=JSONParser(ReviewChanges)) - else: - return new_content, None - - hunks_to_apply = [h for i, h in enumerate(hunks) if i in ids_to_apply] - diff_log = f"--- {file_name}\n+++ {file_name}\n" + "\n".join(hunks_to_apply) - - hunks_to_rework = [(i, h) for i, h in enumerate(hunks) if i in ids_to_rework] - review_log = ( - "\n\n".join([f"## Change\n```{hunk}```\nReviewer feedback:\n{reasons[i]}" for (i, hunk) in hunks_to_rework]) - + "\n\nReview notes:\n" - + llm_response.review_notes - ) - - if len(hunks_to_apply) == len(hunks): - await self.send_message("Applying entire change") - log.info(f"Applying entire change to {file_name}") - return new_content, None - - elif len(hunks_to_apply) == 0: - if hunks_to_rework: - await self.send_message( - f"Requesting rework for {len(hunks_to_rework)} changes with reason: {llm_response.review_notes}" - ) - log.info(f"Requesting rework for {len(hunks_to_rework)} changes to {file_name} (0 hunks to apply)") - return old_content, review_log - else: - # If everything can be safely ignored, it's probably because the files already implement the changes - # from previous tasks (which can happen often). Insisting on a change here is likely to cause problems. - await self.send_message(f"Rejecting entire change with reason: {llm_response.review_notes}") - log.info(f"Rejecting entire change to {file_name} with reason: {llm_response.review_notes}") - return old_content, None - - log.debug(f"Applying code change to {file_name}:\n{diff_log}") - new_content = self.apply_diff(file_name, old_content, hunks_to_apply, new_content) - if hunks_to_rework: - log.info(f"Requesting further rework for {len(hunks_to_rework)} changes to {file_name}") - return new_content, review_log - else: - return new_content, None - - @staticmethod - def get_diff_hunks(file_name: str, old_content: str, new_content: str) -> list[str]: - """ - Get the diff between two files. - - This uses Python difflib to produce an unified diff, then splits - it into hunks that will be separately reviewed by the reviewer. - - :param file_name: name of the file being modified - :param old_content: old file content - :param new_content: new file content - :return: change hunks from the unified diff - """ - from_name = "old_" + file_name - to_name = "to_" + file_name - from_lines = old_content.splitlines(keepends=True) - to_lines = new_content.splitlines(keepends=True) - diff_gen = unified_diff(from_lines, to_lines, fromfile=from_name, tofile=to_name) - diff_txt = "".join(diff_gen) - - hunks = re.split(r"\n@@", diff_txt, re.MULTILINE) - result = [] - for i, h in enumerate(hunks): - # Skip the prologue (file names) - if i == 0: - continue - txt = h.splitlines() - txt[0] = "@@" + txt[0] - result.append("\n".join(txt)) - return result - - def apply_diff(self, file_name: str, old_content: str, hunks: list[str], fallback: str): - """ - Apply the diff to the original file content. - - This uses the internal `_apply_patch` method to apply the - approved diff hunks to the original file content. - - If patch apply fails, the fallback is the full new file content - with all the changes applied (as if the reviewer approved everythng). - - :param file_name: name of the file being modified - :param old_content: old file content - :param hunks: change hunks from the unified diff - :param fallback: proposed new file content (with all the changes applied) - """ - diff = ( - "\n".join( - [ - f"--- {file_name}", - f"+++ {file_name}", - ] - + hunks - ) - + "\n" - ) - try: - fixed_content = self._apply_patch(old_content, diff) - except Exception as e: - # This should never happen but if it does, just use the new version from - # the LLM and hope for the best - print(f"Error applying diff: {e}; hoping all changes are valid") - return fallback - - return fixed_content - - # Adapted from https://gist.github.com/noporpoise/16e731849eb1231e86d78f9dfeca3abc (Public Domain) - @staticmethod - def _apply_patch(original: str, patch: str, revert: bool = False): - """ - Apply a patch to a string to recover a newer version of the string. - - :param original: The original string. - :param patch: The patch to apply. - :param revert: If True, treat the original string as the newer version and recover the older string. - :return: The updated string after applying the patch. - """ - original_lines = original.splitlines(True) - patch_lines = patch.splitlines(True) - - updated_text = "" - index_original = start_line = 0 - - # Choose which group of the regex to use based on the revert flag - match_index, line_sign = (1, "+") if not revert else (3, "-") - - # Skip header lines of the patch - while index_original < len(patch_lines) and patch_lines[index_original].startswith(("---", "+++")): - index_original += 1 - - while index_original < len(patch_lines): - match = PATCH_HEADER_PATTERN.match(patch_lines[index_original]) - if not match: - raise Exception("Bad patch -- regex mismatch [line " + str(index_original) + "]") - - line_number = int(match.group(match_index)) - 1 + (match.group(match_index + 1) == "0") - - if start_line > line_number or line_number > len(original_lines): - raise Exception("Bad patch -- bad line number [line " + str(index_original) + "]") - - updated_text += "".join(original_lines[start_line:line_number]) - start_line = line_number - index_original += 1 - - while index_original < len(patch_lines) and patch_lines[index_original][0] != "@": - if index_original + 1 < len(patch_lines) and patch_lines[index_original + 1][0] == "\\": - line_content = patch_lines[index_original][:-1] - index_original += 2 - else: - line_content = patch_lines[index_original] - index_original += 1 - - if line_content: - if line_content[0] == line_sign or line_content[0] == " ": - updated_text += line_content[1:] - start_line += line_content[0] != line_sign - - updated_text += "".join(original_lines[start_line:]) - return updated_text diff --git a/core/agents/convo.py b/core/agents/convo.py index f4f6f20..60c05f2 100644 --- a/core/agents/convo.py +++ b/core/agents/convo.py @@ -88,6 +88,15 @@ def fork(self) -> "AgentConvo": child.prompt_log = deepcopy(self.prompt_log) return child + def trim(self, trim_index: int, trim_count: int) -> "AgentConvo": + """ + Trim the conversation starting from the given index by 1 message. + :param trim_index: + :return: + """ + self.messages = self.messages[:trim_index] + self.messages[trim_index + trim_count :] + return self + def require_schema(self, model: BaseModel) -> "AgentConvo": def remove_defs(d): if isinstance(d, dict): @@ -107,5 +116,8 @@ def remove_defs(d): return self def remove_last_x_messages(self, x: int) -> "AgentConvo": + """ + Remove the last `x` messages from the conversation. + """ self.messages = self.messages[:-x] return self diff --git a/core/agents/developer.py b/core/agents/developer.py index 02f596c..dbc3107 100644 --- a/core/agents/developer.py +++ b/core/agents/developer.py @@ -1,5 +1,6 @@ +import json from enum import Enum -from typing import Annotated, Literal, Optional, Union +from typing import Annotated, Literal, Union from uuid import uuid4 from pydantic import BaseModel, Field @@ -7,8 +8,8 @@ from core.agents.base import BaseAgent from core.agents.convo import AgentConvo from core.agents.mixins import RelevantFilesMixin -from core.agents.response import AgentResponse, ResponseType -from core.config import TASK_BREAKDOWN_AGENT_NAME +from core.agents.response import AgentResponse +from core.config import PARSE_TASK_AGENT_NAME, TASK_BREAKDOWN_AGENT_NAME from core.db.models.project_state import IterationStatus, TaskStatus from core.db.models.specification import Complexity from core.llm.parser import JSONParser @@ -64,9 +65,6 @@ class Developer(RelevantFilesMixin, BaseAgent): display_name = "Developer" async def run(self) -> AgentResponse: - if self.prev_response and self.prev_response.type == ResponseType.TASK_REVIEW_FEEDBACK: - return await self.breakdown_current_iteration(self.prev_response.data["feedback"]) - if not self.current_state.unfinished_tasks: log.warning("No unfinished tasks found, nothing to do (why am I called? is this a bug?)") return AgentResponse.done(self) @@ -89,40 +87,27 @@ async def run(self) -> AgentResponse: return await self.breakdown_current_task() - async def breakdown_current_iteration(self, task_review_feedback: Optional[str] = None) -> AgentResponse: + async def breakdown_current_iteration(self) -> AgentResponse: """ Breaks down current iteration or task review into steps. - :param task_review_feedback: If provided, the task review feedback is broken down instead of the current iteration :return: AgentResponse.done(self) when the breakdown is done """ current_task = self.current_state.current_task - if task_review_feedback is not None: - iteration = None - current_task["task_review_feedback"] = task_review_feedback - description = task_review_feedback - user_feedback = "" - source = "review" - n_tasks = 1 - log.debug(f"Breaking down the task review feedback {task_review_feedback}") - await self.send_message("Breaking down the task review feedback...") - elif self.current_state.current_iteration["status"] in ( + if self.current_state.current_iteration["status"] in ( IterationStatus.AWAITING_BUG_FIX, IterationStatus.AWAITING_LOGGING, ): iteration = self.current_state.current_iteration - current_task["task_review_feedback"] = None description = iteration["bug_hunting_cycles"][-1]["human_readable_instructions"] user_feedback = iteration["user_feedback"] source = "bug_hunt" n_tasks = len(self.next_state.iterations) log.debug(f"Breaking down the logging cycle {description}") - await self.send_message("Breaking down the current iteration logging cycle ...") else: iteration = self.current_state.current_iteration - current_task["task_review_feedback"] = None if iteration is None: log.error("Iteration breakdown called but there's no current iteration or task review, possible bug?") return AgentResponse.done(self) @@ -132,11 +117,11 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] source = "troubleshooting" n_tasks = len(self.next_state.iterations) log.debug(f"Breaking down the iteration {description}") - await self.send_message("Breaking down the current task iteration ...") if self.current_state.files and self.current_state.relevant_files is None: return await self.get_relevant_files(user_feedback, description) + await self.send_message("Breaking down the task into steps ...") await self.ui.send_task_progress( n_tasks, # iterations and reviews can be created only one at a time, so we are always on last one n_tasks, @@ -146,7 +131,7 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] self.current_state.get_source_index(source), self.current_state.tasks, ) - llm = self.get_llm() + llm = self.get_llm(PARSE_TASK_AGENT_NAME) # FIXME: In case of iteration, parse_task depends on the context (files, tasks, etc) set there. # Ideally this prompt would be self-contained. convo = ( @@ -195,7 +180,6 @@ async def breakdown_current_iteration(self, task_review_feedback: Optional[str] async def breakdown_current_task(self) -> AgentResponse: current_task = self.current_state.current_task - current_task["task_review_feedback"] = None source = self.current_state.current_epic.get("source", "app") await self.ui.send_task_progress( self.current_state.tasks.index(current_task) + 1, @@ -208,7 +192,6 @@ async def breakdown_current_task(self) -> AgentResponse: ) log.debug(f"Breaking down the current task: {current_task['description']}") - await self.send_message("Thinking about how to implement this task ...") log.debug(f"Current state files: {len(self.current_state.files)}, relevant {self.current_state.relevant_files}") # Check which files are relevant to the current task @@ -217,7 +200,9 @@ async def breakdown_current_task(self) -> AgentResponse: current_task_index = self.current_state.tasks.index(current_task) - llm = self.get_llm(TASK_BREAKDOWN_AGENT_NAME) + await self.send_message("Thinking about how to implement this task ...") + + llm = self.get_llm(TASK_BREAKDOWN_AGENT_NAME, stream_output=True) convo = AgentConvo(self).template( "breakdown", task=current_task, @@ -235,7 +220,7 @@ async def breakdown_current_task(self) -> AgentResponse: } self.next_state.flag_tasks_as_modified() - llm = self.get_llm() + llm = self.get_llm(PARSE_TASK_AGENT_NAME) await self.send_message("Breaking down the task into steps ...") convo.assistant(response).template("parse_task").require_schema(TaskSteps) response: TaskSteps = await llm(convo, parser=JSONParser(TaskSteps), temperature=0) @@ -257,6 +242,7 @@ async def breakdown_current_task(self) -> AgentResponse: def set_next_steps(self, response: TaskSteps, source: str): # For logging/debugging purposes, we don't want to remove the finished steps # until we're done with the task. + unique_steps = self.remove_duplicate_steps({**response.model_dump()}) finished_steps = [step for step in self.current_state.steps if step["completed"]] self.next_state.steps = finished_steps + [ { @@ -264,30 +250,31 @@ def set_next_steps(self, response: TaskSteps, source: str): "completed": False, "source": source, "iteration_index": len(self.current_state.iterations), - **step.model_dump(), + **step, } - for step in response.steps + for step in unique_steps["steps"] ] - if ( - len(self.next_state.unfinished_steps) > 0 - and source != "review" - and ( - self.next_state.current_iteration is None - or self.next_state.current_iteration["status"] != IterationStatus.AWAITING_LOGGING - ) - ): - self.next_state.steps += [ - # TODO: add refactor step here once we have the refactor agent - { - "id": uuid4().hex, - "completed": False, - "type": "review_task", - "source": source, - "iteration_index": len(self.current_state.iterations), - }, - ] log.debug(f"Next steps: {self.next_state.unfinished_steps}") + def remove_duplicate_steps(self, data): + unique_steps = [] + + # Process steps attribute + for step in data["steps"]: + if isinstance(step, SaveFileStep) and any( + s["type"] == "save_file" and s["save_file"]["path"] == step["save_file"]["path"] for s in unique_steps + ): + continue + unique_steps.append(step) + + # Update steps attribute + data["steps"] = unique_steps + + # Use the serializable_steps for JSON dumping + data["original_response"] = json.dumps(unique_steps, indent=2) + + return data + async def ask_to_execute_task(self) -> bool: """ Asks the user to approve, skip or edit the current task. @@ -302,8 +289,7 @@ async def ask_to_execute_task(self) -> bool: buttons["skip"] = "Skip Task" description = self.current_state.current_task["description"] - await self.send_message("Starting new task with description:") - await self.send_message(description) + await self.send_message("Starting new task with description:\n\n" + description) user_response = await self.ask_question( "Do you want to execute the above task?", buttons=buttons, @@ -334,8 +320,8 @@ async def ask_to_execute_task(self) -> bool: initial_text=description, ) if user_response.button == "cancel" or user_response.cancelled: - # User hasn't edited the task so we can execute it immediately as is - return True + # User hasn't edited the task, so we can execute it immediately as is + return await self.ask_to_execute_task() self.next_state.current_task["description"] = user_response.text self.next_state.current_task["run_always"] = True diff --git a/core/agents/error_handler.py b/core/agents/error_handler.py index 0150bf3..b48968f 100644 --- a/core/agents/error_handler.py +++ b/core/agents/error_handler.py @@ -85,7 +85,7 @@ async def handle_command_error(self, message: str, details: dict) -> AgentRespon log.info("Skipping command error debug (requested by user)") return AgentResponse.done(self) - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = AgentConvo(self).template( "debug", task_steps=self.current_state.steps, diff --git a/core/agents/executor.py b/core/agents/executor.py index db0e0e7..e29c8e9 100644 --- a/core/agents/executor.py +++ b/core/agents/executor.py @@ -54,7 +54,6 @@ def __init__( output_handler=self.output_handler, exit_handler=self.exit_handler, ) - self.stream_output = True def for_step(self, step): # FIXME: not needed, refactor to use self.current_state.current_step @@ -100,6 +99,7 @@ async def run(self) -> AgentResponse: log.info(f"Running command `{cmd}` with timeout {timeout}s") status_code, stdout, stderr = await self.process_manager.run_command(cmd, timeout=timeout) + llm_response = await self.check_command_output(cmd, timeout, stdout, stderr, status_code) duration = (datetime.now(timezone.utc) - started_at).total_seconds() @@ -122,7 +122,8 @@ async def run(self) -> AgentResponse: ) await self.state_manager.log_command_run(exec_log) - if llm_response.success: + # FIXME: ErrorHandler isn't debugged with BugHunter - we should move all commands to run before testing and debug them with BugHunter + if True or llm_response.success: return AgentResponse.done(self) return AgentResponse.error( diff --git a/core/agents/external_docs.py b/core/agents/external_docs.py index 635f9b5..a5f6c99 100644 --- a/core/agents/external_docs.py +++ b/core/agents/external_docs.py @@ -44,6 +44,9 @@ class ExternalDocumentation(BaseAgent): display_name = "Documentation" async def run(self) -> AgentResponse: + await self._store_docs([], []) + return AgentResponse.done(self) + if self.current_state.specification.example_project: log.debug("Example project detected, no documentation selected.") available_docsets = [] @@ -85,7 +88,7 @@ async def _select_docsets(self, available_docsets: list[tuple]) -> dict[str, str if not available_docsets: return {} - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = ( AgentConvo(self) .template( @@ -109,7 +112,7 @@ async def _create_queries(self, docsets: dict[str, str]) -> dict[str, list[str]] queries = {} await self.send_message("Getting relevant documentation for the following topics:") for k, short_desc in docsets.items(): - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = ( AgentConvo(self) .template( diff --git a/core/agents/importer.py b/core/agents/importer.py index 00101ba..3704c93 100644 --- a/core/agents/importer.py +++ b/core/agents/importer.py @@ -54,7 +54,7 @@ async def start_import_process(self): await self.state_manager.commit() async def analyze_project(self): - llm = self.get_llm() + llm = self.get_llm(stream_output=True) self.send_message("Inspecting most important project files ...") diff --git a/core/agents/legacy_handler.py b/core/agents/legacy_handler.py new file mode 100644 index 0000000..f675152 --- /dev/null +++ b/core/agents/legacy_handler.py @@ -0,0 +1,14 @@ +from core.agents.base import BaseAgent +from core.agents.response import AgentResponse + + +class LegacyHandler(BaseAgent): + agent_type = "legacy-handler" + display_name = "Legacy Handler" + + async def run(self) -> AgentResponse: + if self.data["type"] == "review_task": + self.next_state.complete_step() + return AgentResponse.done(self) + + raise ValueError(f"Unknown reason for calling Legacy Handler with data: {self.data}") diff --git a/core/agents/mixins.py b/core/agents/mixins.py index cc6ba97..907a5c7 100644 --- a/core/agents/mixins.py +++ b/core/agents/mixins.py @@ -1,21 +1,40 @@ -from typing import Optional +from typing import List, Optional, Union from pydantic import BaseModel, Field from core.agents.convo import AgentConvo from core.agents.response import AgentResponse -from core.config import GET_RELEVANT_FILES_AGENT_NAME +from core.config import GET_RELEVANT_FILES_AGENT_NAME, TROUBLESHOOTER_BUG_REPORT from core.llm.parser import JSONParser from core.log import get_logger log = get_logger(__name__) +class ReadFilesAction(BaseModel): + read_files: Optional[List[str]] = Field( + description="List of files you want to read. All listed files must be in the project." + ) + + +class AddFilesAction(BaseModel): + add_files: Optional[List[str]] = Field( + description="List of files you want to add to the list of relevant files. All listed files must be in the project. You must read files before adding them." + ) + + +class RemoveFilesAction(BaseModel): + remove_files: Optional[List[str]] = Field( + description="List of files you want to remove from the list of relevant files. All listed files must be in the relevant files list." + ) + + +class DoneBooleanAction(BaseModel): + done: Optional[bool] = Field(description="Boolean flag to indicate that you are done creating breakdown.") + + class RelevantFiles(BaseModel): - read_files: list[str] = Field(description="List of files you want to read.") - add_files: list[str] = Field(description="List of files you want to add to the list of relevant files.") - remove_files: list[str] = Field(description="List of files you want to remove from the list of relevant files.") - done: bool = Field(description="Boolean flag to indicate that you are done selecting relevant files.") + action: Union[ReadFilesAction, AddFilesAction, RemoveFilesAction, DoneBooleanAction] class IterationPromptMixin: @@ -42,7 +61,7 @@ async def find_solution( :param bug_hunting_cycles: Data about logs that need to be added to the code (optional). :return: The generated solution to the problem. """ - llm = self.get_llm() + llm = self.get_llm(TROUBLESHOOTER_BUG_REPORT, stream_output=True) convo = AgentConvo(self).template( "iteration", user_feedback=user_feedback, @@ -63,8 +82,6 @@ async def get_relevant_files( self, user_feedback: Optional[str] = None, solution_description: Optional[str] = None ) -> AgentResponse: log.debug("Getting relevant files for the current task") - await self.send_message("Figuring out which project files are relevant for the next task ...") - done = False relevant_files = set() llm = self.get_llm(GET_RELEVANT_FILES_AGENT_NAME) @@ -81,25 +98,26 @@ async def get_relevant_files( while not done and len(convo.messages) < 13: llm_response: RelevantFiles = await llm(convo, parser=JSONParser(RelevantFiles), temperature=0) + action = llm_response.action # Check if there are files to add to the list - if llm_response.add_files: + if getattr(action, "add_files", None): # Add only the files from add_files that are not already in relevant_files - relevant_files.update(file for file in llm_response.add_files if file not in relevant_files) + relevant_files.update(file for file in action.add_files if file not in relevant_files) # Check if there are files to remove from the list - if llm_response.remove_files: + if getattr(action, "remove_files", None): # Remove files from relevant_files that are in remove_files - relevant_files.difference_update(llm_response.remove_files) + relevant_files.difference_update(action.remove_files) - read_files = [file for file in self.current_state.files if file.path in llm_response.read_files] + read_files = [file for file in self.current_state.files if file.path in getattr(action, "read_files", [])] convo.remove_last_x_messages(1) convo.assistant(llm_response.original_response) convo.template("filter_files_loop", read_files=read_files, relevant_files=relevant_files).require_schema( RelevantFiles ) - done = llm_response.done + done = getattr(action, "done", False) existing_files = {file.path for file in self.current_state.files} relevant_files = [path for path in relevant_files if path in existing_files] diff --git a/core/agents/orchestrator.py b/core/agents/orchestrator.py index 2b6bdc4..756cb59 100644 --- a/core/agents/orchestrator.py +++ b/core/agents/orchestrator.py @@ -1,21 +1,21 @@ -from typing import Optional +import asyncio +from typing import List, Optional, Union from core.agents.architect import Architect from core.agents.base import BaseAgent from core.agents.bug_hunter import BugHunter from core.agents.code_monkey import CodeMonkey -from core.agents.code_reviewer import CodeReviewer from core.agents.developer import Developer from core.agents.error_handler import ErrorHandler from core.agents.executor import Executor from core.agents.external_docs import ExternalDocumentation from core.agents.human_input import HumanInput from core.agents.importer import Importer +from core.agents.legacy_handler import LegacyHandler from core.agents.problem_solver import ProblemSolver from core.agents.response import AgentResponse, ResponseType from core.agents.spec_writer import SpecWriter from core.agents.task_completer import TaskCompleter -from core.agents.task_reviewer import TaskReviewer from core.agents.tech_lead import TechLead from core.agents.tech_writer import TechnicalWriter from core.agents.troubleshooter import Troubleshooter @@ -63,8 +63,19 @@ async def run(self) -> bool: await self.update_stats() agent = self.create_agent(response) - log.debug(f"Running agent {agent.__class__.__name__} (step {self.current_state.step_index})") - response = await agent.run() + + # In case where agent is a list, run all agents in parallel. + # Only one agent type can be run in parallel at a time (for now). See handle_parallel_responses(). + if isinstance(agent, list): + tasks = [single_agent.run() for single_agent in agent] + log.debug( + f"Running agents {[a.__class__.__name__ for a in agent]} (step {self.current_state.step_index})" + ) + responses = await asyncio.gather(*tasks) + response = self.handle_parallel_responses(agent[0], responses) + else: + log.debug(f"Running agent {agent.__class__.__name__} (step {self.current_state.step_index})") + response = await agent.run() if response.type == ResponseType.EXIT: log.debug(f"Agent {agent.__class__.__name__} requested exit") @@ -77,6 +88,31 @@ async def run(self) -> bool: # TODO: rollback changes to "next" so they aren't accidentally committed? return True + def handle_parallel_responses(self, agent: BaseAgent, responses: List[AgentResponse]) -> AgentResponse: + """ + Handle responses from agents that were run in parallel. + + This method is called when multiple agents are run in parallel, and it + should return a single response that represents the combined responses + of all agents. + + :param agent: The original agent that was run in parallel. + :param responses: List of responses from all agents. + :return: Combined response. + """ + response = AgentResponse.done(agent) + if isinstance(agent, CodeMonkey): + files = [] + for single_response in responses: + if single_response.type == ResponseType.INPUT_REQUIRED: + files += single_response.data.get("files", []) + break + if files: + response = AgentResponse.input_required(agent, files) + return response + else: + raise ValueError(f"Unhandled parallel agent type: {agent.__class__.__name__}") + async def offline_changes_check(self): """ Check for changes outside Pythagora. @@ -86,7 +122,7 @@ async def offline_changes_check(self): """ log.info("Checking for offline changes.") - modified_files = await self.state_manager.get_modified_files() + modified_files = await self.state_manager.get_modified_files_with_content() if self.state_manager.workspace_is_empty(): # NOTE: this will currently get triggered on a new project, but will do @@ -95,7 +131,7 @@ async def offline_changes_check(self): await self.state_manager.restore_files() elif modified_files: await self.send_message(f"We found {len(modified_files)} new and/or modified files.") - + await self.ui.send_modified_files(modified_files) hint = "".join( [ "If you would like Pythagora to import those changes, click 'Yes'.\n", @@ -161,23 +197,17 @@ async def handle_done(self, agent: BaseAgent, response: AgentResponse) -> AgentR return import_files_response - def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: + def create_agent(self, prev_response: Optional[AgentResponse]) -> Union[List[BaseAgent], BaseAgent]: state = self.current_state if prev_response: if prev_response.type in [ResponseType.CANCEL, ResponseType.ERROR]: return ErrorHandler(self.state_manager, self.ui, prev_response=prev_response) - if prev_response.type == ResponseType.CODE_REVIEW: - return CodeReviewer(self.state_manager, self.ui, prev_response=prev_response) - if prev_response.type == ResponseType.CODE_REVIEW_FEEDBACK: - return CodeMonkey(self.state_manager, self.ui, prev_response=prev_response, step=state.current_step) if prev_response.type == ResponseType.DESCRIBE_FILES: return CodeMonkey(self.state_manager, self.ui, prev_response=prev_response) if prev_response.type == ResponseType.INPUT_REQUIRED: # FIXME: HumanInput should be on the whole time and intercept chat/interrupt return HumanInput(self.state_manager, self.ui, prev_response=prev_response) - if prev_response.type == ResponseType.TASK_REVIEW_FEEDBACK: - return Developer(self.state_manager, self.ui, prev_response=prev_response) if prev_response.type == ResponseType.IMPORT_PROJECT: return Importer(self.state_manager, self.ui, prev_response=prev_response) if prev_response.type == ResponseType.EXTERNAL_DOCS_REQUIRED: @@ -212,10 +242,7 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: if current_task_status == TaskStatus.REVIEWED: # User reviewed the task, call TechnicalWriter to see if documentation needs to be updated return TechnicalWriter(self.state_manager, self.ui) - elif current_task_status == TaskStatus.DOCUMENTED: - # After documentation is done, call TechLead update the development plan (remaining tasks) - return TechLead(self.state_manager, self.ui) - elif current_task_status in [TaskStatus.EPIC_UPDATED, TaskStatus.SKIPPED]: + elif current_task_status in [TaskStatus.DOCUMENTED, TaskStatus.SKIPPED]: # Task is fully done or skipped, call TaskCompleter to mark it as completed return TaskCompleter(self.state_manager, self.ui) @@ -233,6 +260,9 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: if current_iteration_status == IterationStatus.HUNTING_FOR_BUG: # Triggering the bug hunter to start the hunt return BugHunter(self.state_manager, self.ui) + elif current_iteration_status == IterationStatus.START_PAIR_PROGRAMMING: + # Pythagora cannot solve the issue so we're starting pair programming + return BugHunter(self.state_manager, self.ui) elif current_iteration_status == IterationStatus.AWAITING_LOGGING: # Get the developer to implement logs needed for debugging return Developer(self.state_manager, self.ui) @@ -261,16 +291,20 @@ def create_agent(self, prev_response: Optional[AgentResponse]) -> BaseAgent: # We have just finished the task, call Troubleshooter to ask the user to review return Troubleshooter(self.state_manager, self.ui) - def create_agent_for_step(self, step: dict) -> BaseAgent: + def create_agent_for_step(self, step: dict) -> Union[List[BaseAgent], BaseAgent]: step_type = step.get("type") if step_type == "save_file": - return CodeMonkey(self.state_manager, self.ui, step=step) + steps = self.current_state.get_steps_of_type("save_file") + parallel = [] + for step in steps: + parallel.append(CodeMonkey(self.state_manager, self.ui, step=step)) + return parallel elif step_type == "command": return self.executor.for_step(step) elif step_type == "human_intervention": return HumanInput(self.state_manager, self.ui, step=step) elif step_type == "review_task": - return TaskReviewer(self.state_manager, self.ui) + return LegacyHandler(self.state_manager, self.ui, data={"type": "review_task"}) elif step_type == "create_readme": return TechnicalWriter(self.state_manager, self.ui) else: diff --git a/core/agents/problem_solver.py b/core/agents/problem_solver.py index 08de13a..e7b8de0 100644 --- a/core/agents/problem_solver.py +++ b/core/agents/problem_solver.py @@ -46,7 +46,7 @@ async def run(self) -> AgentResponse: return await self.try_alternative_solutions() async def generate_alternative_solutions(self): - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = ( AgentConvo(self) .template( diff --git a/core/agents/response.py b/core/agents/response.py index 3fa0d61..7a40a8b 100644 --- a/core/agents/response.py +++ b/core/agents/response.py @@ -24,21 +24,12 @@ class ResponseType(str, Enum): EXIT = "exit" """Pythagora should exit.""" - CODE_REVIEW = "code-review" - """Agent is requesting a review of the created code.""" - - CODE_REVIEW_FEEDBACK = "code-review-feedback" - """Agent is providing feedback on the code review.""" - DESCRIBE_FILES = "describe-files" """Analysis of the files in the project is requested.""" INPUT_REQUIRED = "input-required" """User needs to modify a line in the generated code.""" - TASK_REVIEW_FEEDBACK = "task-review-feedback" - """Agent is providing feedback on the entire task.""" - IMPORT_PROJECT = "import-project" """User wants to import an existing project.""" @@ -82,46 +73,6 @@ def cancel(agent: "BaseAgent") -> "AgentResponse": def exit(agent: "ErrorHandler") -> "AgentResponse": return AgentResponse(type=ResponseType.EXIT, agent=agent) - @staticmethod - def code_review( - agent: "BaseAgent", - path: str, - instructions: str, - old_content: str, - new_content: str, - attempt: int, - ) -> "AgentResponse": - return AgentResponse( - type=ResponseType.CODE_REVIEW, - agent=agent, - data={ - "path": path, - "instructions": instructions, - "old_content": old_content, - "new_content": new_content, - "attempt": attempt, - }, - ) - - @staticmethod - def code_review_feedback( - agent: "BaseAgent", - new_content: str, - approved_content: str, - feedback: str, - attempt: int, - ) -> "AgentResponse": - return AgentResponse( - type=ResponseType.CODE_REVIEW_FEEDBACK, - agent=agent, - data={ - "new_content": new_content, - "approved_content": approved_content, - "feedback": feedback, - "attempt": attempt, - }, - ) - @staticmethod def describe_files(agent: "BaseAgent") -> "AgentResponse": return AgentResponse(type=ResponseType.DESCRIBE_FILES, agent=agent) @@ -130,16 +81,6 @@ def describe_files(agent: "BaseAgent") -> "AgentResponse": def input_required(agent: "BaseAgent", files: list[dict[str, int]]) -> "AgentResponse": return AgentResponse(type=ResponseType.INPUT_REQUIRED, agent=agent, data={"files": files}) - @staticmethod - def task_review_feedback(agent: "BaseAgent", feedback: str) -> "AgentResponse": - return AgentResponse( - type=ResponseType.TASK_REVIEW_FEEDBACK, - agent=agent, - data={ - "feedback": feedback, - }, - ) - @staticmethod def import_project(agent: "BaseAgent") -> "AgentResponse": return AgentResponse(type=ResponseType.IMPORT_PROJECT, agent=agent) diff --git a/core/agents/spec_writer.py b/core/agents/spec_writer.py index 9699fa2..d1cdb98 100644 --- a/core/agents/spec_writer.py +++ b/core/agents/spec_writer.py @@ -41,8 +41,6 @@ async def initialize_spec(self) -> AgentResponse: "Describe your app in as much detail as possible", allow_empty=False, buttons={ - # FIXME: must be lowercase becase VSCode doesn't recognize it otherwise. Needs a fix in the extension - "continue": "continue", "example": "Start an example project", "import": "Import an existing project", }, @@ -57,11 +55,6 @@ async def initialize_spec(self) -> AgentResponse: await self.prepare_example_project(DEFAULT_EXAMPLE_PROJECT) return AgentResponse.done(self) - elif response.button == "continue": - # FIXME: Workaround for the fact that VSCode "continue" button does - # nothing but repeat the question. We reproduce this bug for bug here. - return AgentResponse.done(self) - user_description = response.text.strip() complexity = await self.check_prompt_complexity(user_description) @@ -98,11 +91,11 @@ async def update_spec(self, iteration_mode) -> AgentResponse: await self.send_message( f"Making the following changes to project specification:\n\n{feature_description}\n\nUpdated project specification:" ) - llm = self.get_llm(SPEC_WRITER_AGENT_NAME) + llm = self.get_llm(SPEC_WRITER_AGENT_NAME, stream_output=True) convo = AgentConvo(self).template("add_new_feature", feature_description=feature_description) llm_response: str = await llm(convo, temperature=0, parser=StringParser()) updated_spec = llm_response.strip() - await self.ui.generate_diff(self.current_state.specification.description, updated_spec) + await self.ui.generate_diff("project_specification", self.current_state.specification.description, updated_spec) user_response = await self.ask_question( "Do you accept these changes to the project specification?", buttons={"yes": "Yes", "no": "No"}, @@ -157,7 +150,7 @@ async def analyze_spec(self, spec: str) -> str: ) await self.send_message(msg) - llm = self.get_llm(SPEC_WRITER_AGENT_NAME) + llm = self.get_llm(SPEC_WRITER_AGENT_NAME, stream_output=True) convo = AgentConvo(self).template("ask_questions").user(spec) n_questions = 0 n_answers = 0 @@ -165,25 +158,26 @@ async def analyze_spec(self, spec: str) -> str: while True: response: str = await llm(convo) if len(response) > 500: - # The response is too long for it to be a question, assume it's the spec + # The response is too long for it to be a question, assume it's the updated spec confirm = await self.ask_question( ( - "Can we proceed with this project description? If so, just press ENTER. " + "Can we proceed with this project description? If so, just press Continue. " "Otherwise, please tell me what's missing or what you'd like to add." ), allow_empty=True, - buttons={"continue": "continue"}, + buttons={"continue": "Continue"}, ) if confirm.cancelled or confirm.button == "continue" or confirm.text == "": + updated_spec = response.strip() await telemetry.trace_code_event( "spec-writer-questions", { "num_questions": n_questions, "num_answers": n_answers, - "new_spec": spec, + "new_spec": updated_spec, }, ) - return spec + return updated_spec convo.user(confirm.text) else: @@ -200,7 +194,7 @@ async def analyze_spec(self, spec: str) -> str: "Please output the spec now, without additional comments or questions." ) response: str = await llm(convo) - return response + return response.strip() n_answers += 1 convo.user(user_response.text) @@ -212,4 +206,6 @@ async def review_spec(self, desc: str, spec: str) -> str: additional_info = llm_response.strip() if additional_info and len(additional_info) > 6: spec += "\n\nAdditional info/examples:\n\n" + additional_info + await self.send_message(f"\n\nAdditional info/examples:\n\n {additional_info}") + return spec diff --git a/core/agents/task_reviewer.py b/core/agents/task_reviewer.py deleted file mode 100644 index 5cdd4f3..0000000 --- a/core/agents/task_reviewer.py +++ /dev/null @@ -1,58 +0,0 @@ -from core.agents.base import BaseAgent -from core.agents.convo import AgentConvo -from core.agents.response import AgentResponse -from core.log import get_logger - -log = get_logger(__name__) - - -class TaskReviewer(BaseAgent): - agent_type = "task-reviewer" - display_name = "Task Reviewer" - - async def run(self) -> AgentResponse: - response = await self.review_code_changes() - self.next_state.complete_step() - return response - - async def review_code_changes(self) -> AgentResponse: - """ - Review all the code changes during current task. - """ - - log.debug(f"Reviewing code changes for task {self.current_state.current_task['description']}") - await self.send_message("Reviewing the task implementation ...") - all_feedbacks = [ - iteration["user_feedback"].replace("```", "").strip() - for iteration in self.current_state.iterations - # Some iterations are created by the task reviewer and have no user feedback - if iteration["user_feedback"] - ] - bug_hunter_instructions = [ - iteration["bug_hunting_cycles"][-1]["human_readable_instructions"].replace("```", "").strip() - for iteration in self.current_state.iterations - if iteration["bug_hunting_cycles"] - ] - - files_before_modification = self.current_state.modified_files - files_after_modification = [ - (file.path, file.content.content) - for file in self.current_state.files - if (file.path in files_before_modification) - ] - - llm = self.get_llm() - # TODO instead of sending files before and after maybe add nice way to show diff for multiple files - convo = AgentConvo(self).template( - "review_task", - all_feedbacks=all_feedbacks, - files_before_modification=files_before_modification, - files_after_modification=files_after_modification, - bug_hunter_instructions=bug_hunter_instructions, - ) - llm_response: str = await llm(convo, temperature=0.7) - - if "done" in llm_response.strip().lower()[-20:]: - return AgentResponse.done(self) - else: - return AgentResponse.task_review_feedback(self, llm_response) diff --git a/core/agents/tech_lead.py b/core/agents/tech_lead.py index c217328..3484582 100644 --- a/core/agents/tech_lead.py +++ b/core/agents/tech_lead.py @@ -5,6 +5,7 @@ from core.agents.base import BaseAgent from core.agents.convo import AgentConvo from core.agents.response import AgentResponse +from core.config import TECH_LEAD_PLANNING from core.db.models.project_state import TaskStatus from core.llm.parser import JSONParser from core.log import get_logger @@ -20,15 +21,17 @@ class Epic(BaseModel): description: str = Field(description=("Description of an epic.")) +class Task(BaseModel): + description: str = Field(description="Description of a task.") + testing_instructions: str = Field(description="Instructions for testing the task.") + + class DevelopmentPlan(BaseModel): plan: list[Epic] = Field(description="List of epics that need to be done to implement the entire plan.") -class UpdatedDevelopmentPlan(BaseModel): - updated_current_epic: Epic = Field( - description="Updated description of what was implemented while working on the current epic." - ) - plan: list[Epic] = Field(description="List of unfinished epics.") +class EpicPlan(BaseModel): + plan: list[Task] = Field(description="List of tasks that need to be done to implement the entire epic.") class TechLead(BaseAgent): @@ -36,10 +39,6 @@ class TechLead(BaseAgent): display_name = "Tech Lead" async def run(self) -> AgentResponse: - current_task_status = self.current_state.current_task.get("status") if self.current_state.current_task else None - if current_task_status and current_task_status == TaskStatus.DOCUMENTED: - return await self.update_epic() - if len(self.current_state.epics) == 0: if self.current_state.specification.example_project: self.plan_example_project() @@ -52,7 +51,21 @@ async def run(self) -> AgentResponse: if self.current_state.specification.templates and not self.current_state.files: await self.apply_project_templates() self.next_state.action = "Apply project templates" - return AgentResponse.done(self) + await self.ui.send_epics_and_tasks( + self.next_state.current_epic["sub_epics"], + self.next_state.tasks, + ) + + inputs = [] + for file in self.next_state.files: + input_required = self.state_manager.get_input_required(file.content.content) + if input_required: + inputs += [{"file": file.path, "line": line} for line in input_required] + + if inputs: + return AgentResponse.input_required(self, inputs) + else: + return AgentResponse.done(self) if self.current_state.current_epic: self.next_state.action = "Create a development plan" @@ -61,7 +74,7 @@ async def run(self) -> AgentResponse: return await self.ask_for_new_feature() def create_initial_project_epic(self): - log.debug("Creating initial project epic") + log.debug("Creating initial project Epic") self.next_state.epics = [ { "id": uuid4().hex, @@ -72,6 +85,7 @@ def create_initial_project_epic(self): "summary": None, "completed": False, "complexity": self.current_state.specification.complexity, + "sub_epics": [], } ] @@ -114,16 +128,20 @@ async def ask_for_new_feature(self) -> AgentResponse: if len(self.current_state.epics) > 2: await self.ui.send_message("Your new feature is complete!", source=success_source) else: - await self.ui.send_message("Your app is DONE!!! You can start using it right now!", source=success_source) + await self.ui.send_message("Your app is DONE! You can start using it right now!", source=success_source) + + if self.current_state.run_command: + await self.ui.send_run_command(self.current_state.run_command) log.debug("Asking for new feature") response = await self.ask_question( - "Do you have a new feature to add to the project? Just write it here", + "Do you have a new feature to add to the project? Just write it here:", buttons={"continue": "continue", "end": "No, I'm done"}, - allow_empty=True, + allow_empty=False, ) - if response.cancelled or not response.text: + if response.button == "end" or response.cancelled or not response.text: + await self.ui.send_message("Thanks for using Pythagora!") return AgentResponse.exit(self) self.next_state.epics = self.current_state.epics + [ @@ -136,6 +154,7 @@ async def ask_for_new_feature(self) -> AgentResponse: "summary": None, "completed": False, "complexity": None, # Determined and defined in SpecWriter + "sub_epics": [], } ] # Orchestrator will rerun us to break down the new feature epic @@ -144,9 +163,9 @@ async def ask_for_new_feature(self) -> AgentResponse: async def plan_epic(self, epic) -> AgentResponse: log.debug(f"Planning tasks for the epic: {epic['name']}") - await self.send_message("Starting to create the action plan for development ...") + await self.send_message("Creating the development plan ...") - llm = self.get_llm() + llm = self.get_llm(TECH_LEAD_PLANNING) convo = ( AgentConvo(self) .template( @@ -160,15 +179,70 @@ async def plan_epic(self, epic) -> AgentResponse: ) response: DevelopmentPlan = await llm(convo, parser=JSONParser(DevelopmentPlan)) - self.next_state.tasks = self.current_state.tasks + [ - { - "id": uuid4().hex, - "description": task.description, - "instructions": None, - "status": TaskStatus.TODO, - } - for task in response.plan - ] + + convo.remove_last_x_messages(1) + formatted_tasks = [f"Epic #{index}: {task.description}" for index, task in enumerate(response.plan, start=1)] + tasks_string = "\n\n".join(formatted_tasks) + convo = convo.assistant(tasks_string) + llm = self.get_llm(TECH_LEAD_PLANNING) + + if epic.get("source") == "feature" or epic.get("complexity") == "simple": + await self.send_message(f"Epic 1: {epic['name']}") + self.next_state.current_epic["sub_epics"] = [ + { + "id": 1, + "description": epic["name"], + } + ] + await self.send_message("Creating tasks for this epic ...") + self.next_state.tasks = self.next_state.tasks + [ + { + "id": uuid4().hex, + "description": task.description, + "instructions": None, + "pre_breakdown_testing_instructions": None, + "status": TaskStatus.TODO, + "sub_epic_id": 1, + } + for task in response.plan + ] + await self.ui.send_epics_and_tasks( + self.next_state.current_epic["sub_epics"], + self.next_state.tasks, + ) + else: + self.next_state.current_epic["sub_epics"] = self.next_state.current_epic["sub_epics"] + [ + { + "id": sub_epic_number, + "description": sub_epic.description, + } + for sub_epic_number, sub_epic in enumerate(response.plan, start=1) + ] + for sub_epic_number, sub_epic in enumerate(response.plan, start=1): + await self.send_message(f"Epic {sub_epic_number}: {sub_epic.description}") + convo = convo.template( + "epic_breakdown", epic_number=sub_epic_number, epic_description=sub_epic.description + ).require_schema(EpicPlan) + await self.send_message("Creating tasks for this epic ...") + epic_plan: EpicPlan = await llm(convo, parser=JSONParser(EpicPlan)) + self.next_state.tasks = self.next_state.tasks + [ + { + "id": uuid4().hex, + "description": task.description, + "instructions": None, + "pre_breakdown_testing_instructions": task.testing_instructions, + "status": TaskStatus.TODO, + "sub_epic_id": sub_epic_number, + } + for task in epic_plan.plan + ] + convo.remove_last_x_messages(2) + + await self.ui.send_epics_and_tasks( + self.next_state.current_epic["sub_epics"], + self.next_state.tasks, + ) + await telemetry.trace_code_event( "development-plan", { @@ -178,55 +252,6 @@ async def plan_epic(self, epic) -> AgentResponse: ) return AgentResponse.done(self) - async def update_epic(self) -> AgentResponse: - """ - Update the development plan for the current epic. - """ - epic = self.current_state.current_epic - self.next_state.set_current_task_status(TaskStatus.EPIC_UPDATED) - - if len(self.next_state.unfinished_tasks) == 1 or not self.current_state.iterations: - # Current task is still "unfinished" at this point, so if it's last task, there's nothing to update - return AgentResponse.done(self) - - finished_tasks = [task for task in self.next_state.tasks if task["status"] == TaskStatus.DONE] - finished_tasks.append(self.next_state.current_task) - - log.debug(f"Updating development plan for {epic['name']}") - await self.send_message("Updating development plan ...") - - llm = self.get_llm() - convo = ( - AgentConvo(self) - .template( - "update_plan", - finished_tasks=finished_tasks, - task_type=self.current_state.current_epic.get("source", "app"), - modified_files=[f for f in self.current_state.files if f.path in self.current_state.modified_files], - ) - .require_schema(UpdatedDevelopmentPlan) - ) - - response: UpdatedDevelopmentPlan = await llm( - convo, - parser=JSONParser(UpdatedDevelopmentPlan), - temperature=0, - ) - log.debug(f"Reworded last task as: {response.updated_current_epic.description}") - finished_tasks[-1]["description"] = response.updated_current_epic.description - - self.next_state.tasks = finished_tasks + [ - { - "id": uuid4().hex, - "description": task.description, - "instructions": None, - "status": TaskStatus.TODO, - } - for task in response.plan - ] - log.debug(f"Updated development plan for {epic['name']}, {len(response.plan)} tasks remaining") - return AgentResponse.done(self) - def plan_example_project(self): example_name = self.current_state.specification.example_project log.debug(f"Planning example project: {example_name}") @@ -238,6 +263,12 @@ def plan_example_project(self): "description": example["description"], "completed": False, "complexity": example["complexity"], + "sub_epics": [ + { + "id": 1, + "description": "Single Epic Example", + } + ], } ] self.next_state.tasks = example["plan"] diff --git a/core/agents/tech_writer.py b/core/agents/tech_writer.py index 3ab625e..90d6f5e 100644 --- a/core/agents/tech_writer.py +++ b/core/agents/tech_writer.py @@ -52,7 +52,7 @@ async def send_congratulations(self): async def create_readme(self): await self.send_message("Creating README ...") - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = AgentConvo(self).template("create_readme") llm_response: str = await llm(convo) await self.state_manager.save_file("README.md", llm_response) diff --git a/core/agents/troubleshooter.py b/core/agents/troubleshooter.py index 497eb78..25fd575 100644 --- a/core/agents/troubleshooter.py +++ b/core/agents/troubleshooter.py @@ -7,6 +7,7 @@ from core.agents.convo import AgentConvo from core.agents.mixins import IterationPromptMixin, RelevantFilesMixin from core.agents.response import AgentResponse +from core.config import TROUBLESHOOTER_GET_RUN_COMMAND from core.db.models.file import File from core.db.models.project_state import IterationStatus, TaskStatus from core.llm.parser import JSONParser, OptionalCodeBlockParser @@ -71,7 +72,10 @@ async def create_iteration(self) -> AgentResponse: self.next_state.flag_tasks_as_modified() return AgentResponse.done(self) else: - await self.send_message("Here are instruction on how to test the app:\n\n" + user_instructions) + await self.send_message("Here are instructions on how to test the app:\n\n" + user_instructions) + + await self.ui.stop_app() + await self.ui.send_test_instructions(user_instructions) # Developer sets iteration as "completed" when it generates the step breakdown, so we can't # use "current_iteration" here @@ -90,7 +94,7 @@ async def create_iteration(self) -> AgentResponse: user_feedback_qa = None # await self.generate_bug_report(run_command, user_instructions, user_feedback) if is_loop: - if last_iteration["alternative_solutions"]: + if last_iteration is not None and last_iteration.get("alternative_solutions"): # If we already have alternative solutions, it means we were already in a loop. return self.try_next_alternative_solution(user_feedback, user_feedback_qa) else: @@ -161,11 +165,13 @@ async def get_run_command(self) -> Optional[str]: await self.send_message("Figuring out how to run the app ...") - llm = self.get_llm() + llm = self.get_llm(TROUBLESHOOTER_GET_RUN_COMMAND) convo = self._get_task_convo().template("get_run_command") # Although the prompt is explicit about not using "```", LLM may still return it llm_response: str = await llm(convo, temperature=0, parser=OptionalCodeBlockParser()) + if len(llm_response) < 5: + llm_response = "" self.next_state.run_command = llm_response return llm_response @@ -216,11 +222,13 @@ async def get_user_feedback( If "is_loop" is True, Pythagora is stuck in a loop and needs to consider alternative solutions. The last element in the tuple is the user feedback, which may be empty if the user provided no - feedback (eg. if they just clicked on "Continue" or "I'm stuck in a loop"). + feedback (eg. if they just clicked on "Continue" or "Start Pair Programming"). """ bug_report = None change_description = None + hint = None + is_loop = False should_iterate = True @@ -231,9 +239,11 @@ async def get_user_feedback( if run_command: await self.ui.send_run_command(run_command) - buttons = {"continue": "Everything works", "change": "I want to make a change", "bug": "There is an issue"} - if last_iteration: - buttons["loop"] = "I'm stuck in a loop" + buttons = { + "continue": "Everything works", + "change": "I want to make a change", + "bug": "There is an issue", + } user_response = await self.ask_question( test_message, buttons=buttons, default="continue", buttons_only=True, hint=hint @@ -241,30 +251,17 @@ async def get_user_feedback( if user_response.button == "continue" or user_response.cancelled: should_iterate = False - elif user_response.button == "loop": - await telemetry.trace_code_event( - "stuck-in-loop", - { - "clicked": True, - "task_index": self.current_state.tasks.index(self.current_state.current_task) + 1, - "num_tasks": len(self.current_state.tasks), - "num_epics": len(self.current_state.epics), - "num_iterations": len(self.current_state.iterations), - "num_steps": len(self.current_state.steps), - "architecture": { - "system_dependencies": self.current_state.specification.system_dependencies, - "app_dependencies": self.current_state.specification.package_dependencies, - }, - }, - ) - is_loop = True - elif user_response.button == "change": - user_description = await self.ask_question("Please describe the change you want to make (one at a time)") + user_description = await self.ask_question( + "Please describe the change you want to make to the project specification (one at a time)" + ) change_description = user_description.text elif user_response.button == "bug": - user_description = await self.ask_question("Please describe the issue you found (one at a time)") + user_description = await self.ask_question( + "Please describe the issue you found (one at a time) and share any relevant server logs", + buttons={"copy_server_logs": "Copy Server Logs"}, + ) bug_report = user_description.text return should_iterate, is_loop, bug_report, change_description @@ -304,7 +301,7 @@ async def generate_bug_report( :return: Additional questions and answers to generate a better bug report. """ additional_qa = [] - llm = self.get_llm() + llm = self.get_llm(stream_output=True) convo = ( AgentConvo(self) .template( diff --git a/core/cli/main.py b/core/cli/main.py index 204e790..39c74f1 100644 --- a/core/cli/main.py +++ b/core/cli/main.py @@ -1,3 +1,4 @@ +import asyncio import sys from argparse import Namespace from asyncio import run @@ -62,7 +63,7 @@ async def run_project(sm: StateManager, ui: UIBase) -> bool: async def llm_api_check(ui: UIBase) -> bool: """ - Check whether the configured LLMs are reachable. + Check whether the configured LLMs are reachable in parallel. :param ui: UI we'll use to report any issues :return: True if all the LLMs are reachable. @@ -73,28 +74,42 @@ async def llm_api_check(ui: UIBase) -> bool: async def handler(*args, **kwargs): pass - success = True checked_llms: set[LLMProvider] = set() - for llm_config in config.all_llms(): - if llm_config.provider in checked_llms: - continue + tasks = [] + + async def check_llm(llm_config): + if llm_config.provider + llm_config.model in checked_llms: + return True + checked_llms.add(llm_config.provider + llm_config.model) client_class = BaseLLMClient.for_provider(llm_config.provider) llm_client = client_class(llm_config, stream_handler=handler, error_handler=handler) try: resp = await llm_client.api_check() if not resp: - success = False - log.warning(f"API check for {llm_config.provider.value} failed.") + await ui.send_message( + f"API check for {llm_config.provider.value} {llm_config.model} failed.", + source=pythagora_source, + ) + log.warning(f"API check for {llm_config.provider.value} {llm_config.model} failed.") + return False else: - log.info(f"API check for {llm_config.provider.value} succeeded.") + log.info(f"API check for {llm_config.provider.value} {llm_config.model} succeeded.") + return True except APIError as err: await ui.send_message( - f"API check for {llm_config.provider.value} failed with: {err}", + f"API check for {llm_config.provider.value} {llm_config.model} failed with: {err}", source=pythagora_source, ) log.warning(f"API check for {llm_config.provider.value} failed with: {err}") - success = False + return False + + for llm_config in config.all_llms(): + tasks.append(check_llm(llm_config)) + + results = await asyncio.gather(*tasks) + + success = all(results) if not success: telemetry.set("end_result", "failure:api-error") diff --git a/core/config/__init__.py b/core/config/__init__.py index ea8b56d..90ac3c6 100644 --- a/core/config/__init__.py +++ b/core/config/__init__.py @@ -35,9 +35,14 @@ # Agents with sane setup in the default configuration DEFAULT_AGENT_NAME = "default" CODE_MONKEY_AGENT_NAME = "CodeMonkey" +CODE_REVIEW_AGENT_NAME = "CodeMonkey.code_review" DESCRIBE_FILES_AGENT_NAME = "CodeMonkey.describe_files" CHECK_LOGS_AGENT_NAME = "BugHunter.check_logs" +PARSE_TASK_AGENT_NAME = "Developer.parse_task" TASK_BREAKDOWN_AGENT_NAME = "Developer.breakdown_current_task" +TROUBLESHOOTER_BUG_REPORT = "Troubleshooter.generate_bug_report" +TROUBLESHOOTER_GET_RUN_COMMAND = "Troubleshooter.get_run_command" +TECH_LEAD_PLANNING = "TechLead.plan_epic" SPEC_WRITER_AGENT_NAME = "SpecWriter" GET_RELEVANT_FILES_AGENT_NAME = "get_relevant_files" @@ -96,7 +101,7 @@ class ProviderConfig(_StrictModel): ge=0.0, ) read_timeout: float = Field( - default=10.0, + default=20.0, description="Timeout (in seconds) for receiving a new chunk of data from the response stream", ge=0.0, ) @@ -151,7 +156,7 @@ class LLMConfig(_StrictModel): ge=0.0, ) read_timeout: float = Field( - default=10.0, + default=20.0, description="Timeout (in seconds) for receiving a new chunk of data from the response stream", ge=0.0, ) @@ -318,20 +323,56 @@ class Config(_StrictModel): agent: dict[str, AgentLLMConfig] = Field( default={ DEFAULT_AGENT_NAME: AgentLLMConfig(), - CODE_MONKEY_AGENT_NAME: AgentLLMConfig(model="gpt-4-0125-preview", temperature=0.0), - DESCRIBE_FILES_AGENT_NAME: AgentLLMConfig(model="gpt-3.5-turbo", temperature=0.0), CHECK_LOGS_AGENT_NAME: AgentLLMConfig( provider=LLMProvider.ANTHROPIC, model="claude-3-5-sonnet-20240620", temperature=0.5, ), + CODE_MONKEY_AGENT_NAME: AgentLLMConfig( + provider=LLMProvider.OPENAI, + model="gpt-4-0125-preview", + temperature=0.0, + ), + CODE_REVIEW_AGENT_NAME: AgentLLMConfig( + provider=LLMProvider.ANTHROPIC, + model="claude-3-5-sonnet-20240620", + temperature=0.0, + ), + DESCRIBE_FILES_AGENT_NAME: AgentLLMConfig( + provider=LLMProvider.OPENAI, + model="gpt-4o-mini-2024-07-18", + temperature=0.0, + ), + PARSE_TASK_AGENT_NAME: AgentLLMConfig( + provider=LLMProvider.OPENAI, + model="gpt-4-0125-preview", + temperature=0.0, + ), + SPEC_WRITER_AGENT_NAME: AgentLLMConfig( + provider=LLMProvider.OPENAI, + model="gpt-4-0125-preview", + temperature=0.0, + ), TASK_BREAKDOWN_AGENT_NAME: AgentLLMConfig( provider=LLMProvider.ANTHROPIC, model="claude-3-5-sonnet-20240620", temperature=0.5, ), - SPEC_WRITER_AGENT_NAME: AgentLLMConfig(model="gpt-4-0125-preview", temperature=0.0), - GET_RELEVANT_FILES_AGENT_NAME: AgentLLMConfig(model="claude-3-5-sonnet-20240620", temperature=0.0), + TECH_LEAD_PLANNING: AgentLLMConfig( + provider=LLMProvider.ANTHROPIC, + model="claude-3-5-sonnet-20240620", + temperature=0.5, + ), + TROUBLESHOOTER_BUG_REPORT: AgentLLMConfig( + provider=LLMProvider.ANTHROPIC, + model="claude-3-5-sonnet-20240620", + temperature=0.5, + ), + TROUBLESHOOTER_GET_RUN_COMMAND: AgentLLMConfig( + provider=LLMProvider.ANTHROPIC, + model="claude-3-5-sonnet-20240620", + temperature=0.0, + ), } ) prompt: PromptConfig = PromptConfig() @@ -424,13 +465,40 @@ def load(self, path: str) -> Config: loader = ConfigLoader() +def adapt_for_bedrock(config: Config) -> Config: + """ + Adapt the configuration for use with Bedrock. + + :param config: Configuration to adapt. + :return: Adapted configuration. + """ + if "anthropic" not in config.llm: + return config + + if config.llm["anthropic"].base_url is None or "bedrock/anthropic" not in config.llm["anthropic"].base_url: + return config + + replacement_map = { + "claude-3-5-sonnet-20240620": "us.anthropic.claude-3-5-sonnet-20240620-v1:0", + "claude-3-sonnet-20240229": "us.anthropic.claude-3-sonnet-20240229-v1:0", + "claude-3-haiku-20240307": "us.anthropic.claude-3-haiku-20240307-v1:0", + "claude-3-opus-20240229": "us.anthropic.claude-3-opus-20240229-v1:0", + } + + for agent in config.agent: + if config.agent[agent].model in replacement_map: + config.agent[agent].model = replacement_map[config.agent[agent].model] + + return config + + def get_config() -> Config: """ Return current configuration. :return: Current configuration object. """ - return loader.config + return adapt_for_bedrock(loader.config) __all__ = ["loader", "get_config"] diff --git a/core/config/magic_words.py b/core/config/magic_words.py index 8b91db8..089dff3 100644 --- a/core/config/magic_words.py +++ b/core/config/magic_words.py @@ -1,2 +1,27 @@ PROBLEM_IDENTIFIED = "PROBLEM_IDENTIFIED" ADD_LOGS = "ADD_LOGS" +THINKING_LOGS = [ + "Pythagora is crunching the numbers...", + "Pythagora is deep in thought...", + "Pythagora is analyzing your request...", + "Pythagora is brewing up a solution...", + "Pythagora is putting the pieces together...", + "Pythagora is working its magic...", + "Pythagora is crafting the perfect response...", + "Pythagora is decoding your query...", + "Pythagora is on the case...", + "Pythagora is computing an answer...", + "Pythagora is sorting through the data...", + "Pythagora is gathering insights...", + "Pythagora is making connections...", + "Pythagora is tuning the algorithms...", + "Pythagora is piecing together the puzzle...", + "Pythagora is scanning the possibilities...", + "Pythagora is engineering a response...", + "Pythagora is building the answer...", + "Pythagora is mapping out a solution...", + "Pythagora is figuring this out for you...", + "Pythagora is thinking hard right now...", + "Pythagora is working for you, so relax!", + "Pythagora might take some time to figure this out...", +] diff --git a/core/db/models/llm_request.py b/core/db/models/llm_request.py index 0a0a1cf..9738a17 100644 --- a/core/db/models/llm_request.py +++ b/core/db/models/llm_request.py @@ -3,6 +3,7 @@ from uuid import UUID from sqlalchemy import ForeignKey, inspect +from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import Mapped, mapped_column, relationship from sqlalchemy.sql import func @@ -59,7 +60,7 @@ def from_request_log( :param request_log: Request log. :return: Newly created LLM request log in the database. """ - session = inspect(project_state).async_session + session: AsyncSession = inspect(project_state).async_session obj = cls( project_state=project_state, diff --git a/core/db/models/project_state.py b/core/db/models/project_state.py index 3d520cc..3e55264 100644 --- a/core/db/models/project_state.py +++ b/core/db/models/project_state.py @@ -42,6 +42,7 @@ class IterationStatus: FIND_SOLUTION = "find_solution" PROBLEM_SOLVER = "problem_solver" NEW_FEATURE_REQUESTED = "new_feature_requested" + START_PAIR_PROGRAMMING = "start_pair_programming" DONE = "done" @@ -433,3 +434,12 @@ def get_source_index(self, source: str) -> int: return len([step for step in steps if step.get("type") == "review_task"]) return 1 + + def get_steps_of_type(self, step_type: str) -> [dict]: + """ + Get list of unfinished steps with specific type. + + :return: List of steps, or empty list if there are no unfinished steps of that type. + """ + li = self.unfinished_steps + return [step for step in li if step.get("type") == step_type] if li else [] diff --git a/core/db/session.py b/core/db/session.py index f55460d..3f2b684 100644 --- a/core/db/session.py +++ b/core/db/session.py @@ -1,3 +1,5 @@ +import time + from sqlalchemy import event from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine @@ -33,6 +35,16 @@ def __init__(self, config: DBConfig): self.recursion_depth = 0 event.listen(self.engine.sync_engine, "connect", self._on_connect) + event.listen(self.engine.sync_engine, "before_cursor_execute", self.before_cursor_execute) + event.listen(self.engine.sync_engine, "after_cursor_execute", self.after_cursor_execute) + + def before_cursor_execute(self, conn, cursor, statement, parameters, context, executemany): + conn.info.setdefault("query_start_time", []).append(time.time()) + log.debug(f"Executing SQL: {statement}") + + def after_cursor_execute(self, conn, cursor, statement, parameters, context, executemany): + total = time.time() - conn.info["query_start_time"].pop(-1) + log.debug(f"SQL execution time: {total:.3f} seconds") def _on_connect(self, dbapi_connection, _): """Connection event handler""" @@ -44,6 +56,7 @@ def _on_connect(self, dbapi_connection, _): # it's a local file. PostgreSQL or other database use a real connection pool # by default. dbapi_connection.execute("pragma foreign_keys=on") + dbapi_connection.execute("PRAGMA journal_mode=WAL;") async def start(self) -> AsyncSession: if self.session is not None: diff --git a/core/llm/anthropic_client.py b/core/llm/anthropic_client.py index 1fc7d1d..5011966 100644 --- a/core/llm/anthropic_client.py +++ b/core/llm/anthropic_client.py @@ -74,8 +74,14 @@ async def _make_request( "temperature": self.config.temperature if temperature is None else temperature, } + if "bedrock/anthropic" in self.config.base_url: + completion_kwargs["extra_headers"] = {"anthropic-version": "bedrock-2023-05-31"} + if "sonnet" in self.config.model: - completion_kwargs["extra_headers"] = {"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"} + if "extra_headers" in completion_kwargs: + completion_kwargs["extra_headers"]["anthropic-beta"] = "max-tokens-3-5-sonnet-2024-07-15" + else: + completion_kwargs["extra_headers"] = {"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"} completion_kwargs["max_tokens"] = MAX_TOKENS_SONNET if json_mode: diff --git a/core/llm/base.py b/core/llm/base.py index bd99eb6..1c1143f 100644 --- a/core/llm/base.py +++ b/core/llm/base.py @@ -244,7 +244,18 @@ async def __call__( # so we can't be certain that's the problem in Anthropic case. # Here we try to detect that and tell the user what happened. log.info(f"API status error: {err}") - err_code = err.response.json().get("error", {}).get("code", "") + try: + if hasattr(err, "response"): + if err.response.headers.get("Content-Type", "").startswith("application/json"): + err_code = err.response.json().get("error", {}).get("code", "") + else: + err_code = str(err.response.text) + elif isinstance(err, str): + err_code = err + else: + err_code = json.dumps(err) + except Exception as e: + err_code = f"Error parsing response: {str(e)}" if err_code in ("request_too_large", "context_length_exceeded", "string_above_max_length"): # Handle OpenAI and Groq token limit exceeded # OpenAI will return `string_above_max_length` for prompts more than 1M characters diff --git a/core/prompts/architect/technologies.prompt b/core/prompts/architect/technologies.prompt index 23d30ea..73cbe28 100644 --- a/core/prompts/architect/technologies.prompt +++ b/core/prompts/architect/technologies.prompt @@ -1,6 +1,6 @@ You're designing the architecture and technical specifications for a new project. -If the project requirements call out for specific technology, use that. Otherwise, if working on a web app, prefer Node.js for the backend (with Express if a web server is needed, and MongoDB if a database is needed), and Bootstrap for the front-end. You MUST NOT use Docker, Kubernetes, microservices and single-page app frameworks like Next.js, Angular, Vue or Svelte unless the project details explicitly require it. +If the project requirements call out for specific technology, use that. Otherwise, if working on a web app, prefer Node.js for the backend (with Express if a web server is needed, and MongoDB if a database is needed), and Bootstrap for the front-end. You MUST NOT use Docker, Kubernetes, microservices and single-page app frameworks like React, Next.js, Angular, Vue or Svelte unless the project details explicitly require it. Here is a high level description of "{{ state.branch.project.name }}": ``` diff --git a/core/prompts/bug-hunter/ask_a_question.prompt b/core/prompts/bug-hunter/ask_a_question.prompt new file mode 100644 index 0000000..348ee3e --- /dev/null +++ b/core/prompts/bug-hunter/ask_a_question.prompt @@ -0,0 +1,4 @@ +The developer wants to ask you a question. Here is the question: +{{question}} + +Please answer and refer to all the files in the repository and everything we've talked about so far but do not form your answer in any way that was asked for before, just answer the question as if you're talking to a colleague. diff --git a/core/prompts/bug-hunter/data_about_logs.prompt b/core/prompts/bug-hunter/data_about_logs.prompt new file mode 100644 index 0000000..630b2fc --- /dev/null +++ b/core/prompts/bug-hunter/data_about_logs.prompt @@ -0,0 +1,7 @@ +Tell me the most important logs that are relevant for this issue. For each log, tell me the the following: + 1. line in the code (eg. `print(...)`, `console.log(...)`, etc.) that generated the log + 2. what file is the log in (eg. `index.js`, `routes/users.js`, etc. - make sure to put the entire path like listed above) + 2. the current output of that log (make sure not to put the entire log output but maximum 5-10 lines of the output) + 3. the expected output of that log (also make sure to put maximum of 5-10 lines of the output) + 4. should the log be different from the current output or are the current and expected output the same + 5. a brief explanation of why the output is incorrect and what should be different here (use maximum 2-3 sentences) diff --git a/core/prompts/bug-hunter/instructions_from_human_hint.prompt b/core/prompts/bug-hunter/instructions_from_human_hint.prompt new file mode 100644 index 0000000..a2d95ca --- /dev/null +++ b/core/prompts/bug-hunter/instructions_from_human_hint.prompt @@ -0,0 +1,6 @@ +The human is sending you a hint about how to solve this bug. Here is what human said: +``` +{{ human_hint }} +``` + +Now, based on this hint, break down exactly what the problem is, what is the solution to this problem and how can we implement this solution so that the bug is fixed. diff --git a/core/prompts/bug-hunter/iteration.prompt b/core/prompts/bug-hunter/iteration.prompt index a0cd528..91b19da 100644 --- a/core/prompts/bug-hunter/iteration.prompt +++ b/core/prompts/bug-hunter/iteration.prompt @@ -20,6 +20,13 @@ A part of the app is already finished. {% include "partials/user_feedback.prompt" %} +{% if current_task.test_instructions is defined %} +Here are the test instructions the user was following when the issue occurred: +``` +{{ current_task.test_instructions }} +``` +{% endif %} + {% if next_solution_to_try is not none %} Focus on solving this issue in the following way: ``` diff --git a/core/prompts/bug-hunter/log_data.prompt b/core/prompts/bug-hunter/log_data.prompt index af76a1a..511081f 100644 --- a/core/prompts/bug-hunter/log_data.prompt +++ b/core/prompts/bug-hunter/log_data.prompt @@ -7,7 +7,13 @@ Here are the logs we added to the frontend: ``` {{ frontend_logs }} ``` +{% endif %}{% if user_feedback is not none %} +Finally, here is a hint from a human who tested the app: +``` +{{ user_feedback }} +``` +When you're thinking about what to do next, take into the account human's feedback. {% endif %}{% if fix_attempted %} The problem wasn't solved with the last changes. You have 2 options - to tell me exactly where is the problem happening or to add more logs to better determine where is the problem. If you think we should add more logs around the code to better understand the problem, tell me code snippets in which we should add the logs. If you think you know where the issue is, don't add any new logs but explain what log print tell point you to the problem, what the problem is, what is the solution to this problem and how the solution will fix the problem. What is your answer? Make sure not to repeat mistakes from before that didn't work. {% endif %} -{% if backend_logs is none and frontend_logs is none and fix_attempted == false %}Human didn't supply any data{% endif %} +{% if backend_logs is none and frontend_logs is none and user_feedback is none and fix_attempted == false %}Human didn't supply any data{% endif %} diff --git a/core/prompts/bug-hunter/problem_explanation.prompt b/core/prompts/bug-hunter/problem_explanation.prompt new file mode 100644 index 0000000..3064d1d --- /dev/null +++ b/core/prompts/bug-hunter/problem_explanation.prompt @@ -0,0 +1,4 @@ +This also didn't help to solve the issue so we can conclude that you are unable to solve this problem yourself so I got a human here who will help you out. + +First, focus on the problem you're facing and explain it to the human. Explain what is the issue that you're working in and what should the human try to do to solve this problem. Is there anything the human can look at that you don't have access to - a database, API response, etc.? If there is something for the human to look at, specify exactly how can the human obtain this information. Keep the answer short and to the point. + diff --git a/core/prompts/bug-hunter/tell_me_more.prompt b/core/prompts/bug-hunter/tell_me_more.prompt new file mode 100644 index 0000000..5a25ff0 --- /dev/null +++ b/core/prompts/bug-hunter/tell_me_more.prompt @@ -0,0 +1 @@ +Please tell me more about the problem we're working on and don't repeat things you said before but tell me something I don't know. diff --git a/core/prompts/code-reviewer/review_changes.prompt b/core/prompts/code-monkey/review_changes.prompt similarity index 100% rename from core/prompts/code-reviewer/review_changes.prompt rename to core/prompts/code-monkey/review_changes.prompt diff --git a/core/prompts/code-reviewer/breakdown.prompt b/core/prompts/code-reviewer/breakdown.prompt deleted file mode 100644 index f575d5d..0000000 --- a/core/prompts/code-reviewer/breakdown.prompt +++ /dev/null @@ -1,2 +0,0 @@ -{# This is the same template as for Developer's breakdown because Code Reviewer is reusing it in a conversation #} -{% extends "developer/breakdown.prompt" %} diff --git a/core/prompts/code-reviewer/system.prompt b/core/prompts/code-reviewer/system.prompt deleted file mode 100644 index 17d4635..0000000 --- a/core/prompts/code-reviewer/system.prompt +++ /dev/null @@ -1,2 +0,0 @@ -You are a world class full stack software developer. You write modular, clean, maintainable, production-ready code. -Your job is to review changes implemented by your junior team members. diff --git a/core/prompts/developer/breakdown.prompt b/core/prompts/developer/breakdown.prompt index 51715cb..86ae779 100644 --- a/core/prompts/developer/breakdown.prompt +++ b/core/prompts/developer/breakdown.prompt @@ -18,6 +18,7 @@ DO NOT specify commands to create any folders or files, they will be created aut {% include "partials/execution_order.prompt" %} {% include "partials/human_intervention_explanation.prompt" %} {% include "partials/file_size_limit.prompt" %} +{% include "partials/breakdown_code_instructions.prompt" %} Never use the port 5000 to run the app, it's reserved. @@ -33,6 +34,12 @@ You are currently working on task #{{ current_task_index + 1 }} with the followi ``` {{ task.description }} ``` +{% if task.get('pre_breakdown_testing_instructions') is not none %} +Here is how this task should be tested: +``` +{{ task.pre_breakdown_testing_instructions }} +```{% endif %} + {% if current_task_index != 0 %}All previous tasks are finished and you don't have to work on them.{% endif %} -Now, tell me all the code that needs to be written to implement ONLY this task and have it fully working and all commands that need to be run to implement this task. +Now, start by writing up what needs to be implemented to get this task working. Think about how routes are set up, how are variables called, and other important things, and mention files by name and where should all new functionality be called from. Then, tell me all the code that needs to be written to implement ONLY this task and have it fully working and all commands that need to be run to implement this task. diff --git a/core/prompts/error-handler/debug.prompt b/core/prompts/error-handler/debug.prompt index dcdd3da..e9462a2 100644 --- a/core/prompts/error-handler/debug.prompt +++ b/core/prompts/error-handler/debug.prompt @@ -16,7 +16,7 @@ Here are the detailed instructions for the current task: ``` {{ current_task.instructions }} ``` -{# FIXME: the above stands in place of a previous (task breakdown) convo, and is duplicated in define_user_review_goal, review_task and debug prompts #} +{# FIXME: the above stands in place of a previous (task breakdown) convo, and is duplicated in define_user_review_goal and debug prompts #} {% if task_steps and step_index is not none -%} The current task has been split into multiple steps, and each step is one of the following: diff --git a/core/prompts/partials/breakdown_code_instructions.prompt b/core/prompts/partials/breakdown_code_instructions.prompt new file mode 100644 index 0000000..ae503b1 --- /dev/null +++ b/core/prompts/partials/breakdown_code_instructions.prompt @@ -0,0 +1 @@ +Make sure that the user doesn't have to test anything with commands but that all features are reflected in the frontend and all information that user sees in the browser should on a stylized page and not as a plain text or JSON. diff --git a/core/prompts/partials/project_details.prompt b/core/prompts/partials/project_details.prompt index bea8edf..5f27d74 100644 --- a/core/prompts/partials/project_details.prompt +++ b/core/prompts/partials/project_details.prompt @@ -3,10 +3,6 @@ Here is a high level description of "{{ state.branch.project.name }}": {{ state.specification.description }} ``` -{% if state.specification.architecture %} -Here is a short description of the project architecture: -{{ state.specification.architecture }} -{% endif %} {% if state.specification.system_dependencies %} Here are the technologies that should be used for this project: diff --git a/core/prompts/partials/project_tasks.prompt b/core/prompts/partials/project_tasks.prompt index 58b0524..a08a232 100644 --- a/core/prompts/partials/project_tasks.prompt +++ b/core/prompts/partials/project_tasks.prompt @@ -4,6 +4,7 @@ ## Rule #1 Every epic must have only coding involved. There should never be a epic that is only testing or ensuring something works. There shouldn't be a epic for researching, deployment, writing documentation, testing or anything that is not writing the actual code. Testing if app works will be done as part of each epic. +Do not leave anything for interpretation, e.g. if something can be done in multiple ways, specify which way should be used and be as clear as possible. ## Rule #2 This rule applies to epic scope. @@ -12,13 +13,11 @@ Each epic must be deliverable that can be verified by non technical user. Each e ## Rule #3 This rule applies to the number of epics you will create. Every app should have different number of epics depending on complexity. Think epic by epic and create the minimum number of epics that are needed to develop this app. -Simple apps should have only 1 epic. -Medium complexity apps should have 2-5 epics. -Very complex apps should have 4-8 epics. +Simple apps should have only 1 epic. More complex apps should have more epics. Do not create more epics than needed. ## Rule #4 This rule applies to writing epic 'description'. -Every epic must have a clear, high level, and short 1 sentence 'description'. It must be very clear so that even non technical users who are reviewing it and just moved to this project can understand what is goal for the epic. +Every epic must have a clear, high level, and short 1-2 sentence 'description'. It must be very clear so that even non technical users who are reviewing it and just moved to this project can understand what is goal for the epic. ## Rule #5 This rule applies to order of epics. diff --git a/core/prompts/spec-writer/ask_questions.prompt b/core/prompts/spec-writer/ask_questions.prompt index 30fa853..d7bc962 100644 --- a/core/prompts/spec-writer/ask_questions.prompt +++ b/core/prompts/spec-writer/ask_questions.prompt @@ -40,7 +40,7 @@ Important note: don't ask trivial questions for obvious or unimportant parts of * Should the "Hello World" message be static text served directly from the server, or would you like it implemented via JavaScript on the client side? * Explanation: There's no need to micromanage the developer(s) and designer(s), the client would've specified these details if they were important. -If you ask such trivial questions, the client will think you're stupid and will leave. DOn'T DO THAT +If you ask such trivial questions, the client will think you're stupid and will leave. DON'T DO THAT Think carefully about what a developer must know to be able to build the app. The specification must address all of this information, otherwise the AI software developer will not be able to build the app. diff --git a/core/prompts/task-reviewer/review_task.prompt b/core/prompts/task-reviewer/review_task.prompt deleted file mode 100644 index f19e6b9..0000000 --- a/core/prompts/task-reviewer/review_task.prompt +++ /dev/null @@ -1,70 +0,0 @@ -You are working on a App called "{{ state.branch.project.name }}" and your job is to review changes made. - -{% include "partials/project_details.prompt" %} -{% include "partials/features_list.prompt" %} - -Development process of this app was split into smaller tasks. Here is the list of all tasks: -``` -{% for task in state.tasks %} -{{ loop.index }}. {{ task.description }} -{% endfor %} -``` - -You are currently working on, and have to focus only on, this task: -``` -{{ state.current_task.description }} -``` - -A part of the app is already finished. -{% include "partials/files_list.prompt" %} - -{% if all_feedbacks -%} -While working on this task, your colleague who is testing the app "{{ state.branch.project.name }}" sent you some additional information on what doesn't work as intended or what should be added. Here are all the inputs he sent you: -``` -{% for feedback in all_feedbacks %} -{{ loop.index }}. {{ feedback }} -{% endfor %} -``` - -After you got each of these additional inputs, you tried to fix it as part of this task. {% endif %} -{% if bug_hunter_instructions -%}Here are the last implementation instructions that were given while fixing a bug: -{% for instructions in bug_hunter_instructions %} -Instructions #{{ loop.index }} -``` -{{ instructions }} -``` -{% endfor %} -{% endif %} - -Files that were modified during implementation of the task are: -{% for path, content in files_after_modification %} -* `{{ path }}` -{% endfor %} - -Now I will show you how those files looked before this task implementation started. If a file is listed as the file that changed but is not in this list that means it was created during this task. Here are files before implementation of this task: - ----start_of_files_at_start_of_task--- -{% for path, content in files_before_modification.items() %}{% if content %} -* File `{{ path }}`: -``` -{{ content }}``` - -{% endif %}{% endfor %} ----end_of_files_at_start_of_task--- - -**IMPORTANT** -You have to review this task implementation. You are known to be very strict with your reviews and very good at noticing bugs but you don't mind minor changes like refactoring, adding or removing logs and so on. You think twice through all information given before giving any conclusions. - -Each task goes through multiple reviews and you have to focus only on your part of review. -In this review, your goal is to check: -1. If there are some functionalities that were removed but are still needed. -2. If new files or functions are created but never called or used. -3. If there is some "dead code" that should be removed. -4. If there is some duplicate code resulting from refactoring or moving code into separate classes or files. - -If everything is ok respond only with "DONE" and nothing else. Do NOT respond with thoughts, reasoning, explanations or anything similar if everything is ok, respond just with "DONE". - -If you find any of these 4 mistakes, describe in detail what has to be changed. - -{% include "partials/relative_paths.prompt" %} -{% include "partials/execution_order.prompt" %} diff --git a/core/prompts/task-reviewer/system.prompt b/core/prompts/task-reviewer/system.prompt deleted file mode 100644 index ab21517..0000000 --- a/core/prompts/task-reviewer/system.prompt +++ /dev/null @@ -1,7 +0,0 @@ -You are a world class full stack software developer working in a team. - -You write modular, well-organized code split across files that are not too big, so that the codebase is maintainable. You include proper error handling and logging for your clean, readable, production-level quality code. - -When reviewing other people's code, you are strict with your reviews and very good at noticing bugs but you don't mind minor changes like refactoring, adding or removing logs and so on. You think twice through all information given before giving any conclusions. - -Your job is to review tasks implemented by your team, following the task implementation instructions. diff --git a/core/prompts/tech-lead/epic_breakdown.prompt b/core/prompts/tech-lead/epic_breakdown.prompt new file mode 100644 index 0000000..9477f78 --- /dev/null +++ b/core/prompts/tech-lead/epic_breakdown.prompt @@ -0,0 +1 @@ +Ok, great. Now, you need to take the epic #{{ epic_number }} "{{ epic_description }}" and break it down into smaller tasks. Each task is one testable whole that the user can test and commit. Each task will be one commit that has to be testable by a human. Return the list of tasks for the Epic #{{ epic_number }}. For each task, write the the task description and a description of how a human should test if the task is successfully implemented or not. Keep in mind that there can be 1 task or multiple, depending on the complexity of the epic. The epics will be implemented one by one so make sure that the user needs to be able to test each task you write - for example, if something will be implemented in the epics after the epic #{{ epic_number }}, then you cannot write it here because the user won't be able to test it. diff --git a/core/prompts/troubleshooter/iteration.prompt b/core/prompts/troubleshooter/iteration.prompt index d0560dd..f653c9a 100644 --- a/core/prompts/troubleshooter/iteration.prompt +++ b/core/prompts/troubleshooter/iteration.prompt @@ -21,6 +21,14 @@ A part of the app is already finished. {% include "partials/user_feedback.prompt" %} +{% if state.current_task.test_instructions is defined %} +User was testing the current implementation of the app when they requested some changes to the app. These are the testing instructions: +``` +{{ state.current_task.test_instructions }} +``` +{% endif %} + + {% if next_solution_to_try is not none %} Focus on solving this issue in the following way: ``` @@ -28,12 +36,10 @@ Focus on solving this issue in the following way: ``` {% endif %} {% include "partials/doc_snippets.prompt" %} -Now, you have to debug this issue and comply with the additional user feedback. +Now, tell me how can we implement the changes that the user requested. Think step by step and explain each change you want to make and write code snippets that you want to change. **IMPORTANT** Think about all information provided. Your job is to look at big picture by analysing all files to find where the issue is. -Don't reply with any code, your thoughts or breakdown of the issue. Respond only with description of solution, explaining what should be steps in solving the issue. -Create as little steps as possible to fix the issue. Each step should describe, using sentences and not code, what changes are needed in specific file or describe command that needs to be executed to continue working on the issue. When there are multiple things that have to be done in one file write everything as one step and don't split it in multiple steps. You can count that the environment is set up previously and packages listed in files are installed so tell me only commands needed for installation of new dependencies, if there are any. diff --git a/core/state/state_manager.py b/core/state/state_manager.py index a7eac3a..5f1abf7 100644 --- a/core/state/state_manager.py +++ b/core/state/state_manager.py @@ -1,7 +1,12 @@ +import asyncio import os.path +import traceback +from contextlib import asynccontextmanager from typing import TYPE_CHECKING, Optional from uuid import UUID, uuid4 +from tenacity import retry, stop_after_attempt, wait_fixed + from core.config import FileSystemType, get_config from core.db.models import Branch, ExecLog, File, FileContent, LLMRequest, Project, ProjectState, UserInput from core.db.models.specification import Specification @@ -43,6 +48,18 @@ def __init__(self, session_manager: SessionManager, ui: Optional[UIBase] = None) self.current_state = None self.next_state = None self.current_session = None + self.blockDb = False + + @asynccontextmanager + async def db_blocker(self): + while self.blockDb: + await asyncio.sleep(0.1) # Wait if blocked + + try: + self.blockDb = True # Set the block + yield + finally: + self.blockDb = False # Unset the block async def list_projects(self) -> list[Project]: """ @@ -179,6 +196,10 @@ async def load_project( ) if self.current_state.current_epic and self.current_state.current_task and self.ui: + await self.ui.send_epics_and_tasks( + self.current_state.current_epic.get("sub_epics"), + self.current_state.tasks, + ) source = self.current_state.current_epic.get("source", "app") await self.ui.send_task_progress( self.current_state.tasks.index(self.current_state.current_task) + 1, @@ -192,6 +213,10 @@ async def load_project( return self.current_state + @retry(stop=stop_after_attempt(3), wait=wait_fixed(1)) + async def commit_with_retry(self): + await self.current_session.commit() + async def commit(self) -> ProjectState: """ Commit the new project state to the database. @@ -201,35 +226,43 @@ async def commit(self) -> ProjectState: :return: The committed state. """ - if self.next_state is None: - raise ValueError("No state to commit.") - if self.current_session is None: - raise ValueError("No database session open.") - - await self.current_session.commit() + try: + if self.next_state is None: + raise ValueError("No state to commit.") + if self.current_session is None: + raise ValueError("No database session open.") + + log.debug("Committing session") + await self.commit_with_retry() + log.debug("Session committed successfully") + + # Having a shorter-lived sessions is considered a good practice in SQLAlchemy, + # so we close and recreate the session for each state. This uses db + # connection from a connection pool, so it is fast. Note that SQLite uses + # no connection pool by default because it's all in-process so it's fast anyway. + self.current_session.expunge_all() + await self.session_manager.close() + self.current_session = await self.session_manager.start() - # Having a shorter-lived sessions is considered a good practice in SQLAlchemy, - # so we close and recreate the session for each state. This uses db - # connection from a connection pool, so it is fast. Note that SQLite uses - # no connection pool by default because it's all in-process so it's fast anyway. - self.current_session.expunge_all() - await self.session_manager.close() - self.current_session = await self.session_manager.start() + self.current_state = self.next_state + self.current_session.add(self.next_state) + self.next_state = await self.current_state.create_next_state() - self.current_state = self.next_state - self.current_session.add(self.next_state) - self.next_state = await self.current_state.create_next_state() + # After the next_state becomes the current_state, we need to load + # the FileContent model, which was previously loaded by the load_project(), + # but is not populated by the `create_next_state()` + for f in self.current_state.files: + await f.awaitable_attrs.content - # After the next_state becomes the current_state, we need to load - # the FileContent model, which was previously loaded by the load_project(), - # but is not populated by the `create_next_state()` - for f in self.current_state.files: - await f.awaitable_attrs.content + telemetry.inc("num_steps") - telemetry.inc("num_steps") + # FIXME: write a test to verify files (and file content) are preloaded + return self.current_state - # FIXME: write a test to verify files (and file content) are preloaded - return self.current_state + except Exception as e: + log.error(f"Error during commit: {str(e)}") + log.error(traceback.format_exc()) + raise async def rollback(self): """ @@ -253,12 +286,18 @@ async def log_llm_request(self, request_log: LLMRequestLog, agent: Optional["Bas :param request_log: The request log to log. """ - telemetry.record_llm_request( - request_log.prompt_tokens + request_log.completion_tokens, - request_log.duration, - request_log.status != LLMRequestStatus.SUCCESS, - ) - LLMRequest.from_request_log(self.current_state, agent, request_log) + async with self.db_blocker(): + try: + telemetry.record_llm_request( + request_log.prompt_tokens + request_log.completion_tokens, + request_log.duration, + request_log.status != LLMRequestStatus.SUCCESS, + ) + LLMRequest.from_request_log(self.current_state, agent, request_log) + + except Exception as e: + if self.ui: + await self.ui.send_message(f"An error occurred: {e}") async def log_user_input(self, question: str, response: UserInputData): """ @@ -350,7 +389,8 @@ async def save_file( self.file_system.save(path, content) hash = self.file_system.hash_string(content) - file_content = await FileContent.store(self.current_session, hash, content) + async with self.db_blocker(): + file_content = await FileContent.store(self.current_session, hash, content) file = self.next_state.save_file(path, file_content) if self.ui and not from_template: @@ -496,6 +536,50 @@ async def get_modified_files(self) -> list[str]: return modified_files + async def get_modified_files_with_content(self) -> list[dict]: + """ + Return a list of new or modified files from the file system, + including their paths, old content, and new content. + + :return: List of dictionaries containing paths, old content, + and new content for new or modified files. + """ + + modified_files = [] + files_in_workspace = self.file_system.list() + + for path in files_in_workspace: + content = self.file_system.read(path) + saved_file = self.current_state.get_file_by_path(path) + + # If there's a saved file, serialize its content; otherwise, set it to None + saved_file_content = saved_file.content.content if saved_file else None + + if saved_file_content == content: + continue + + modified_files.append( + { + "path": path, + "file_old": saved_file_content, # Serialized content + "file_new": content, + } + ) + + # Handle files removed from disk + await self.current_state.awaitable_attrs.files + for db_file in self.current_state.files: + if db_file.path not in files_in_workspace: + modified_files.append( + { + "path": db_file.path, + "file_old": db_file.content.content, # Serialized content + "file_new": "", # Empty string as the file is removed + } + ) + + return modified_files + def workspace_is_empty(self) -> bool: """ Returns whether the workspace has any files in them or is empty. diff --git a/core/templates/example_project.py b/core/templates/example_project.py index efb6a63..2ef8a47 100644 --- a/core/templates/example_project.py +++ b/core/templates/example_project.py @@ -66,6 +66,7 @@ "Integrate Boostrap 5 for styling - add CSS/JS to index.html, style App.jsx and other files as appropriate." ), "status": "todo", + "sub_epic_id": 1, } ] diff --git a/core/templates/tree/node_express_mongoose/package.json b/core/templates/tree/node_express_mongoose/package.json index 9b2ed93..a382164 100644 --- a/core/templates/tree/node_express_mongoose/package.json +++ b/core/templates/tree/node_express_mongoose/package.json @@ -22,6 +22,9 @@ "express-session": "^1.18.0", "connect-mongo": "^5.1.0", "moment": "^2.30.1", - "mongoose": "^8.1.1" + "mongoose": "^8.1.1", + "axios": "^1.7.7", + "openai": "^4.63.0", + "@anthropic-ai/sdk": "^0.27.3" } } diff --git a/core/templates/tree/node_express_mongoose/public/css/style.css b/core/templates/tree/node_express_mongoose/public/css/style.css index 97c2840..9b3e7fc 100644 --- a/core/templates/tree/node_express_mongoose/public/css/style.css +++ b/core/templates/tree/node_express_mongoose/public/css/style.css @@ -1 +1,17 @@ /* Placeholder for custom styles */ +body { + padding-bottom: 60px; +} + +footer { + height: 40px; +} + +nav.navbar { + padding: 10px 20px; +} + +.pythagora-logo { + height: 20px; + margin-left: 5px; +} diff --git a/core/templates/tree/node_express_mongoose/routes/middleware/authMiddleware.js b/core/templates/tree/node_express_mongoose/routes/middleware/authMiddleware.js index 2b8d97a..8f884a8 100644 --- a/core/templates/tree/node_express_mongoose/routes/middleware/authMiddleware.js +++ b/core/templates/tree/node_express_mongoose/routes/middleware/authMiddleware.js @@ -1,11 +1,20 @@ -const isAuthenticated = (req, res, next) => { +const User = require('../../models/User'); +const isAuthenticated = async (req, res, next) => { if (req.session && req.session.userId) { - return next(); // User is authenticated, proceed to the next middleware/route handler - } else { - return res.status(401).send('You are not authenticated'); // User is not authenticated + try { + const user = await User.findById(req.session.userId); + if (user) { + req.user = user; + return next(); + } + } catch (error) { + console.error('Error in authentication middleware:', error); + res.status(500).send('Error during authentication process'); + } } + return res.status(401).send('You are not authenticated'); }; module.exports = { isAuthenticated -}; \ No newline at end of file +}; diff --git a/core/templates/tree/node_express_mongoose/services/llm.js b/core/templates/tree/node_express_mongoose/services/llm.js new file mode 100644 index 0000000..9b8fe4c --- /dev/null +++ b/core/templates/tree/node_express_mongoose/services/llm.js @@ -0,0 +1,72 @@ +const axios = require('axios'); +const OpenAI = require('openai'); +const Anthropic = require('@anthropic-ai/sdk'); +const dotenv = require('dotenv'); + +dotenv.config(); + +const openai = new OpenAI({ + apiKey: process.env.OPENAI_API_KEY, +}); + +const anthropic = new Anthropic({ + apiKey: process.env.ANTHROPIC_API_KEY, +}); + +const MAX_RETRIES = 3; +const RETRY_DELAY = 1000; + +async function sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); +} + +async function sendRequestToOpenAI(model, message) { + for (let i = 0; i < MAX_RETRIES; i++) { + try { + const response = await openai.chat.completions.create({ + model: model, + messages: [{ role: 'user', content: message }], + max_tokens: 1024, + }); + return response.choices[0].message.content; + } catch (error) { + console.error(`Error sending request to OpenAI (attempt ${i + 1}):`, error.message, error.stack); + if (i === MAX_RETRIES - 1) throw error; + await sleep(RETRY_DELAY); + } + } +} + +async function sendRequestToAnthropic(model, message) { + for (let i = 0; i < MAX_RETRIES; i++) { + try { + console.log(`Sending request to Anthropic with model: ${model} and message: ${message}`); + const response = await anthropic.messages.create({ + model: model, + messages: [{ role: 'user', content: message }], + max_tokens: 1024, + }); + console.log(`Received response from Anthropic: ${JSON.stringify(response.content)}`); + return response.content[0].text; + } catch (error) { + console.error(`Error sending request to Anthropic (attempt ${i + 1}):`, error.message, error.stack); + if (i === MAX_RETRIES - 1) throw error; + await sleep(RETRY_DELAY); + } + } +} + +async function sendLLMRequest(provider, model, message) { + switch (provider.toLowerCase()) { + case 'openai': + return sendRequestToOpenAI(model, message); + case 'anthropic': + return sendRequestToAnthropic(model, message); + default: + throw new Error(`Unsupported LLM provider: ${provider}`); + } +} + +module.exports = { + sendLLMRequest +}; diff --git a/core/templates/tree/node_express_mongoose/views/partials/_footer.ejs b/core/templates/tree/node_express_mongoose/views/partials/_footer.ejs index 7c136a6..0e1a76a 100644 --- a/core/templates/tree/node_express_mongoose/views/partials/_footer.ejs +++ b/core/templates/tree/node_express_mongoose/views/partials/_footer.ejs @@ -1,7 +1,8 @@ - + diff --git a/core/ui/base.py b/core/ui/base.py index 3f92c95..d2c2981 100644 --- a/core/ui/base.py +++ b/core/ui/base.py @@ -97,7 +97,9 @@ async def stop(self): """ raise NotImplementedError() - async def send_stream_chunk(self, chunk: str, *, source: Optional[UISource] = None): + async def send_stream_chunk( + self, chunk: str, *, source: Optional[UISource] = None, project_state_id: Optional[str] = None + ): """ Send a chunk of the stream to the UI. @@ -106,7 +108,9 @@ async def send_stream_chunk(self, chunk: str, *, source: Optional[UISource] = No """ raise NotImplementedError() - async def send_message(self, message: str, *, source: Optional[UISource] = None): + async def send_message( + self, message: str, *, source: Optional[UISource] = None, project_state_id: Optional[str] = None + ): """ Send a complete message to the UI. @@ -162,6 +166,7 @@ async def ask_question( hint: Optional[str] = None, initial_text: Optional[str] = None, source: Optional[UISource] = None, + project_state_id: Optional[str] = None, ) -> UserInput: """ Ask the user a question. @@ -191,6 +196,19 @@ async def send_project_stage(self, stage: ProjectStage): """ raise NotImplementedError() + async def send_epics_and_tasks( + self, + epics: list[dict] = None, + tasks: list[dict] = None, + ): + """ + Send epics and tasks info to the UI. + + :param epics: List of all epics. + :param tasks: List of all tasks. + """ + raise NotImplementedError() + async def send_task_progress( self, index: int, @@ -231,6 +249,28 @@ async def send_step_progress( """ raise NotImplementedError() + async def send_modified_files( + self, + modified_files: dict[str, str, str], + ): + """ + Send a list of modified files to the UI. + + :param modified_files: List of modified files. + """ + raise NotImplementedError() + + async def send_data_about_logs( + self, + data_about_logs: dict, + ): + """ + Send the data about debugging logs. + + :param data_about_logs: Data about logs. + """ + raise NotImplementedError() + async def send_run_command(self, run_command: str): """ Send a run command to the UI. @@ -256,6 +296,13 @@ async def send_project_root(self, path: str): """ raise NotImplementedError() + async def start_important_stream(self, path: str): + """ + Tell the extension that next stream should be visible and rendered as markdown + + """ + raise NotImplementedError() + async def send_project_stats(self, stats: dict): """ Send project statistics to the UI. @@ -269,12 +316,55 @@ async def send_project_stats(self, stats: dict): """ raise NotImplementedError() - async def generate_diff(self, file_old: str, file_new: str): + async def send_test_instructions(self, test_instructions: str): + """ + Send test instructions. + + :param test_instructions: Test instructions. + """ + raise NotImplementedError() + + async def send_file_status(self, file_path: str, file_status: str): + """ + Send file status. + + :param file_path: File path. + :param file_status: File status. + """ + raise NotImplementedError() + + async def send_bug_hunter_status(self, status: str, num_cycles: int): + """ + Send bug hunter status. + + :param status: Bug hunter status. + :param num_cycles: Number of Bug hunter cycles. + """ + raise NotImplementedError() + + async def generate_diff( + self, file_path: str, file_old: str, file_new: str, n_new_lines: int = 0, n_del_lines: int = 0 + ): """ Generate a diff between two files. + :param file_path File path. :param file_old: Old file content. :param file_new: New file content. + :param n_new_lines: Number of new lines. + :param n_del_lines: Number of deleted lines. + """ + raise NotImplementedError() + + async def stop_app(self): + """ + Stop the App. + """ + raise NotImplementedError() + + async def close_diff(self): + """ + Close all diff views. """ raise NotImplementedError() diff --git a/core/ui/console.py b/core/ui/console.py index 0716fc7..eea0590 100644 --- a/core/ui/console.py +++ b/core/ui/console.py @@ -20,14 +20,18 @@ async def start(self) -> bool: async def stop(self): log.debug("Stopping console UI") - async def send_stream_chunk(self, chunk: Optional[str], *, source: Optional[UISource] = None): + async def send_stream_chunk( + self, chunk: Optional[str], *, source: Optional[UISource] = None, project_state_id: Optional[str] = None + ): if chunk is None: # end of stream print("", flush=True) else: print(chunk, end="", flush=True) - async def send_message(self, message: str, *, source: Optional[UISource] = None): + async def send_message( + self, message: str, *, source: Optional[UISource] = None, project_state_id: Optional[str] = None + ): if source: print(f"[{source}] {message}") else: @@ -64,6 +68,7 @@ async def ask_question( hint: Optional[str] = None, initial_text: Optional[str] = None, source: Optional[UISource] = None, + project_state_id: Optional[str] = None, ) -> UserInput: if source: print(f"[{source}] {question}") @@ -97,6 +102,13 @@ async def ask_question( async def send_project_stage(self, stage: ProjectStage): pass + async def send_epics_and_tasks( + self, + epics: list[dict], + tasks: list[dict], + ): + pass + async def send_task_progress( self, index: int, @@ -118,6 +130,18 @@ async def send_step_progress( ): pass + async def send_modified_files( + self, + modified_files: dict[str, str, str], + ): + pass + + async def send_data_about_logs( + self, + data_about_logs: dict, + ): + pass + async def send_run_command(self, run_command: str): pass @@ -130,7 +154,24 @@ async def send_project_root(self, path: str): async def send_project_stats(self, stats: dict): pass - async def generate_diff(self, file_old: str, file_new: str): + async def send_test_instructions(self, test_instructions: str): + pass + + async def send_file_status(self, file_path: str, file_status: str): + pass + + async def send_bug_hunter_status(self, status: str, num_cycles: int): + pass + + async def generate_diff( + self, file_path: str, file_old: str, file_new: str, n_new_lines: int = 0, n_del_lines: int = 0 + ): + pass + + async def stop_app(self): + pass + + async def close_diff(self): pass async def loading_finished(self): @@ -145,5 +186,8 @@ async def send_features_list(self, features: list[str]): async def import_project(self, project_dir: str): pass + async def start_important_stream(self): + pass + __all__ = ["PlainConsoleUI"] diff --git a/core/ui/ipc_client.py b/core/ui/ipc_client.py index a3d09b9..7e6cd5d 100644 --- a/core/ui/ipc_client.py +++ b/core/ui/ipc_client.py @@ -29,6 +29,7 @@ class MessageType(str, Enum): USER_INPUT_REQUEST = "user_input_request" INFO = "info" PROGRESS = "progress" + DEBUGGING_LOGS = "debugging_logs" RUN_COMMAND = "run_command" OPEN_FILE = "openFile" PROJECT_FOLDER_NAME = "project_folder_name" @@ -44,6 +45,13 @@ class MessageType(str, Enum): FEATURE_FINISHED = "featureFinished" GENERATE_DIFF = "generateDiff" CLOSE_DIFF = "closeDiff" + FILE_STATUS = "fileStatus" + BUG_HUNTER_STATUS = "bugHunterStatus" + EPICS_AND_TASKS = "epicsAndTasks" + MODIFIED_FILES = "modifiedFiles" + IMPORTANT_STREAM = "importantStream" + TEST_INSTRUCTIONS = "testInstructions" + STOP_APP = "stopApp" class Message(BaseModel): @@ -58,6 +66,7 @@ class Message(BaseModel): type: MessageType category: Optional[str] = None + project_state_id: Optional[str] = None content: Union[str, dict, None] = None def to_bytes(self) -> bytes: @@ -174,7 +183,9 @@ async def stop(self): self.writer = None self.reader = None - async def send_stream_chunk(self, chunk: Optional[str], *, source: Optional[UISource] = None): + async def send_stream_chunk( + self, chunk: Optional[str], *, source: Optional[UISource] = None, project_state_id: Optional[str] = None + ): if not self.writer: return @@ -185,9 +196,12 @@ async def send_stream_chunk(self, chunk: Optional[str], *, source: Optional[UISo MessageType.STREAM, content=chunk, category=source.type_name if source else None, + project_state_id=project_state_id, ) - async def send_message(self, message: str, *, source: Optional[UISource] = None): + async def send_message( + self, message: str, *, source: Optional[UISource] = None, project_state_id: Optional[str] = None + ): if not self.writer: return @@ -196,6 +210,7 @@ async def send_message(self, message: str, *, source: Optional[UISource] = None) MessageType.VERBOSE, content=message, category=source.type_name if source else None, + project_state_id=project_state_id, ) async def send_key_expired(self, message: Optional[str] = None): @@ -242,6 +257,7 @@ async def ask_question( hint: Optional[str] = None, initial_text: Optional[str] = None, source: Optional[UISource] = None, + project_state_id: Optional[str] = None, ) -> UserInput: if not self.writer: raise UIClosedError() @@ -249,20 +265,30 @@ async def ask_question( category = source.type_name if source else None if hint: - await self._send(MessageType.HINT, content=hint, category=category) + await self._send(MessageType.HINT, content=hint, category=category, project_state_id=project_state_id) else: - await self._send(MessageType.VERBOSE, content=question, category=category) + await self._send( + MessageType.VERBOSE, content=question, category=category, project_state_id=project_state_id + ) - await self._send(MessageType.USER_INPUT_REQUEST, content=question, category=category) + await self._send( + MessageType.USER_INPUT_REQUEST, content=question, category=category, project_state_id=project_state_id + ) if buttons: buttons_str = "/".join(buttons.values()) if buttons_only: - await self._send(MessageType.BUTTONS_ONLY, content=buttons_str, category=category) + await self._send( + MessageType.BUTTONS_ONLY, content=buttons_str, category=category, project_state_id=project_state_id + ) else: - await self._send(MessageType.BUTTONS, content=buttons_str, category=category) + await self._send( + MessageType.BUTTONS, content=buttons_str, category=category, project_state_id=project_state_id + ) if initial_text: # FIXME: add this to base and console and document it after merging with hint PR - await self._send(MessageType.INPUT_PREFILL, content=initial_text, category=category) + await self._send( + MessageType.INPUT_PREFILL, content=initial_text, category=category, project_state_id=project_state_id + ) response = await self._receive() answer = response.content.strip() @@ -287,6 +313,19 @@ async def ask_question( async def send_project_stage(self, stage: ProjectStage): await self._send(MessageType.INFO, content=json.dumps({"project_stage": stage.value})) + async def send_epics_and_tasks( + self, + epics: list[dict], + tasks: list[dict], + ): + await self._send( + MessageType.EPICS_AND_TASKS, + content={ + "epics": epics, + "tasks": tasks, + }, + ) + async def send_task_progress( self, index: int, @@ -312,6 +351,15 @@ async def send_task_progress( }, ) + async def send_modified_files( + self, + modified_files: dict[str, str, str], + ): + await self._send( + MessageType.MODIFIED_FILES, + content={"files": modified_files}, + ) + async def send_step_progress( self, index: int, @@ -331,6 +379,15 @@ async def send_step_progress( }, ) + async def send_data_about_logs( + self, + data_about_logs: dict, + ): + await self._send( + MessageType.DEBUGGING_LOGS, + content=data_about_logs, + ) + async def send_run_command(self, run_command: str): await self._send( MessageType.RUN_COMMAND, @@ -352,21 +409,62 @@ async def send_project_root(self, path: str): content=basename(path), ) + async def start_important_stream(self): + await self._send( + MessageType.IMPORTANT_STREAM, + content={}, + ) + async def send_project_stats(self, stats: dict): await self._send( MessageType.PROJECT_STATS, content=stats, ) - async def generate_diff(self, file_old: str, file_new: str): + async def send_test_instructions(self, test_instructions: str): + await self._send( + MessageType.TEST_INSTRUCTIONS, + content={ + "test_instructions": test_instructions, + }, + ) + + async def send_file_status(self, file_path: str, file_status: str): + await self._send( + MessageType.FILE_STATUS, + content={ + "file_path": file_path, + "file_status": file_status, + }, + ) + + async def send_bug_hunter_status(self, status: str, num_cycles: int): + await self._send( + MessageType.BUG_HUNTER_STATUS, + content={ + "status": status, + "num_cycles": num_cycles, + }, + ) + + async def generate_diff( + self, file_path: str, file_old: str, file_new: str, n_new_lines: int = 0, n_del_lines: int = 0 + ): await self._send( MessageType.GENERATE_DIFF, content={ + "file_path": file_path, "file_old": file_old, "file_new": file_new, + "n_new_lines": n_new_lines, + "n_del_lines": n_del_lines, }, ) + async def stop_app(self): + log.debug("Sending signal to stop the App") + await self._send(MessageType.STOP_APP) + async def close_diff(self): log.debug("Sending signal to close the generated diff file") await self._send(MessageType.CLOSE_DIFF) diff --git a/core/ui/virtual.py b/core/ui/virtual.py index 146ca44..d7a6b68 100644 --- a/core/ui/virtual.py +++ b/core/ui/virtual.py @@ -21,14 +21,18 @@ async def start(self) -> bool: async def stop(self): log.debug("Stopping test UI") - async def send_stream_chunk(self, chunk: Optional[str], *, source: Optional[UISource] = None): + async def send_stream_chunk( + self, chunk: Optional[str], *, source: Optional[UISource] = None, project_state_id: Optional[str] = None + ): if chunk is None: # end of stream print("", flush=True) else: print(chunk, end="", flush=True) - async def send_message(self, message: str, *, source: Optional[UISource] = None): + async def send_message( + self, message: str, *, source: Optional[UISource] = None, project_state_id: Optional[str] = None + ): if source: print(f"[{source}] {message}") else: @@ -64,6 +68,7 @@ async def ask_question( hint: Optional[str] = None, initial_text: Optional[str] = None, source: Optional[UISource] = None, + project_state_id: Optional[str] = None, ) -> UserInput: if source: print(f"[{source}] {question}") @@ -90,6 +95,13 @@ async def ask_question( async def send_project_stage(self, stage: ProjectStage): pass + async def send_epics_and_tasks( + self, + epics: list[dict], + tasks: list[dict], + ): + pass + async def send_task_progress( self, index: int, @@ -111,6 +123,18 @@ async def send_step_progress( ): pass + async def send_data_about_logs( + self, + data_about_logs: dict, + ): + pass + + async def send_modified_files( + self, + modified_files: dict[str, str, str], + ): + pass + async def send_run_command(self, run_command: str): pass @@ -120,10 +144,30 @@ async def open_editor(self, file: str, line: Optional[int] = None): async def send_project_root(self, path: str): pass + async def start_important_stream(self): + pass + async def send_project_stats(self, stats: dict): pass - async def generate_diff(self, file_old: str, file_new: str): + async def send_test_instructions(self, test_instructions: str): + pass + + async def send_file_status(self, file_path: str, file_status: str): + pass + + async def send_bug_hunter_status(self, status: str, num_cycles: int): + pass + + async def generate_diff( + self, file_path: str, file_old: str, file_new: str, n_new_lines: int = 0, n_del_lines: int = 0 + ): + pass + + async def stop_app(self): + pass + + async def close_diff(self): pass async def loading_finished(self): diff --git a/example-config.json b/example-config.json index 24b2fdc..b6c0072 100644 --- a/example-config.json +++ b/example-config.json @@ -8,21 +8,21 @@ "base_url": null, "api_key": null, "connect_timeout": 60.0, - "read_timeout": 10.0 + "read_timeout": 20.0 }, // Example config for Anthropic (see https://docs.anthropic.com/docs/api-reference) "anthropic": { "base_url": "https://api.anthropic.com", "api_key": "your-api-key", "connect_timeout": 60.0, - "read_timeout": 10.0 + "read_timeout": 20.0 }, // Example config for Azure OpenAI (see https://learn.microsoft.com/en-us/azure/ai-services/openai/reference#chat-completions) "azure": { "base_url": "https://your-resource-name.openai.azure.com/", "api_key": "your-api-key", "connect_timeout": 60.0, - "read_timeout": 10.0, + "read_timeout": 20.0, "extra": { "azure_deployment": "your-azure-deployment-id", "api_version": "2024-02-01" diff --git a/pilot/.gitkeep b/pilot/.gitkeep deleted file mode 100644 index e69de29..0000000 diff --git a/pyproject.toml b/pyproject.toml index 6a94018..998c608 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "gpt-pilot" -version = "0.2.13" +version = "1.0.0" description = "Build complete apps using AI agents" authors = ["Senko Rasic "] license = "FSL-1.1-MIT" @@ -39,6 +39,7 @@ alembic = "^1.13.1" python-dotenv = "^1.0.1" prompt-toolkit = "^3.0.45" jsonref = "^1.1.0" +tenacity = "9.0.0" [tool.poetry.group.dev.dependencies] pytest = "^8.1.1" diff --git a/requirements.txt b/requirements.txt index 2ef591a..e643001 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,42 +1,44 @@ aiosqlite==0.20.0 -alembic==1.13.1 +alembic==1.13.2 annotated-types==0.7.0 anthropic==0.25.9 anyio==4.4.0 -certifi==2024.6.2 +certifi==2024.7.4 charset-normalizer==3.3.2 colorama==0.4.6 distro==1.9.0 -exceptiongroup==1.2.1 -filelock==3.14.0 -fsspec==2024.6.0 +exceptiongroup==1.2.2 +filelock==3.15.4 +fsspec==2024.6.1 greenlet==3.0.3 groq==0.6.0 h11==0.14.0 httpcore==1.0.5 httpx==0.27.0 -huggingface-hub==0.23.2 +huggingface-hub==0.24.5 idna==3.7 jinja2==3.1.4 +jiter==0.5.0 jsonref==1.1.0 mako==1.3.5 markupsafe==2.1.5 -openai==1.31.0 -packaging==24.0 -prompt-toolkit==3.0.46 +openai==1.40.6 +packaging==24.1 +prompt-toolkit==3.0.47 psutil==5.9.8 -pydantic-core==2.18.4 -pydantic==2.7.3 +pydantic-core==2.20.1 +pydantic==2.8.2 python-dotenv==1.0.1 -pyyaml==6.0.1 -regex==2024.5.15 +pyyaml==6.0.2 +regex==2024.7.24 requests==2.32.3 sniffio==1.3.1 -sqlalchemy==2.0.30 -sqlalchemy[asyncio]==2.0.30 +sqlalchemy==2.0.32 +sqlalchemy[asyncio]==2.0.32 +tenacity==9.0.0 tiktoken==0.6.0 -tokenizers==0.19.1 -tqdm==4.66.4 -typing-extensions==4.12.1 -urllib3==2.2.1 +tokenizers==0.20.0 +tqdm==4.66.5 +typing-extensions==4.12.2 +urllib3==2.2.2 wcwidth==0.2.13 diff --git a/tests/agents/test_base.py b/tests/agents/test_base.py index e6b5835..06fa8b9 100644 --- a/tests/agents/test_base.py +++ b/tests/agents/test_base.py @@ -14,19 +14,25 @@ class AgentUnderTest(BaseAgent): @pytest.mark.asyncio async def test_send_message(): ui = MagicMock(spec=UIBase) - agent = AgentUnderTest(None, ui) + sm = AsyncMock() + agent = AgentUnderTest(sm, ui) await agent.send_message("Hello, world!") - ui.send_message.assert_called_once_with("Hello, world!\n", source=agent.ui_source) + ui.send_message.assert_called_once_with( + "Hello, world!\n", source=agent.ui_source, project_state_id=str(agent.current_state.id) + ) @pytest.mark.asyncio async def test_stream_handler(): ui = MagicMock(spec=UIBase) - agent = AgentUnderTest(None, ui) + sm = AsyncMock() + agent = AgentUnderTest(sm, ui) await agent.stream_handler("chunk") - ui.send_stream_chunk.assert_called_once_with("chunk", source=agent.ui_source) + ui.send_stream_chunk.assert_called_once_with( + "chunk", source=agent.ui_source, project_state_id=str(agent.current_state.id) + ) @pytest.mark.asyncio @@ -46,6 +52,7 @@ async def test_ask_question(): hint=None, initial_text=None, source=agent.ui_source, + project_state_id=str(agent.current_state.id), ) state_manager.log_user_input.assert_awaited_once() @@ -63,7 +70,7 @@ async def test_get_llm(mock_BaseLLMClient): mock_client = AsyncMock(return_value=("response", "log")) mock_OpenAIClient.return_value = mock_client - llm = agent.get_llm() + llm = agent.get_llm(stream_output=True) mock_BaseLLMClient.for_provider.assert_called_once_with("openai") diff --git a/tests/agents/test_external_docs.py b/tests/agents/test_external_docs.py index a3eed86..a6e1eb5 100644 --- a/tests/agents/test_external_docs.py +++ b/tests/agents/test_external_docs.py @@ -6,7 +6,7 @@ from core.agents.external_docs import DocQueries, ExternalDocumentation, SelectedDocsets -@pytest.mark.asyncio +@pytest.mark.skip(reason="Temporary") async def test_stores_documentation_snippets_for_task(agentcontext): sm, _, ui, mock_llm = agentcontext diff --git a/tests/agents/test_orchestrator.py b/tests/agents/test_orchestrator.py index ae60504..58280bb 100644 --- a/tests/agents/test_orchestrator.py +++ b/tests/agents/test_orchestrator.py @@ -3,12 +3,11 @@ import pytest from core.agents.orchestrator import Orchestrator -from core.state.state_manager import StateManager @pytest.mark.asyncio async def test_offline_changes_check_restores_if_workspace_empty(): - sm = AsyncMock(spec=StateManager) + sm = AsyncMock() sm.workspace_is_empty = Mock(return_value=False) ui = AsyncMock() orca = Orchestrator(state_manager=sm, ui=ui) diff --git a/tests/agents/test_tech_lead.py b/tests/agents/test_tech_lead.py index c6a4a54..639f248 100644 --- a/tests/agents/test_tech_lead.py +++ b/tests/agents/test_tech_lead.py @@ -1,9 +1,8 @@ import pytest from core.agents.response import ResponseType -from core.agents.tech_lead import DevelopmentPlan, Epic, TechLead, UpdatedDevelopmentPlan +from core.agents.tech_lead import DevelopmentPlan, Epic, TechLead from core.db.models import Complexity -from core.db.models.project_state import TaskStatus from core.ui.base import UserInput @@ -27,12 +26,12 @@ async def test_create_initial_epic(agentcontext): assert sm.current_state.epics[0]["completed"] is False -@pytest.mark.asyncio +@pytest.mark.skip(reason="Temporary") async def test_apply_project_template(agentcontext): sm, _, ui, _ = agentcontext sm.current_state.specification.templates = {"node_express_mongoose": {}} - sm.current_state.epics = [{"name": "Initial Project"}] + sm.current_state.epics = [{"name": "Initial Project", "sub_epics": []}] await sm.commit() @@ -65,7 +64,7 @@ async def test_ask_for_feature(agentcontext): assert sm.current_state.epics[1]["completed"] is False -@pytest.mark.asyncio +@pytest.mark.skip(reason="Temporary") async def test_plan_epic(agentcontext): """ If called and there's an incomplete epic, the TechLead agent should plan the epic. @@ -100,39 +99,3 @@ async def test_plan_epic(agentcontext): assert len(sm.current_state.tasks) == 2 assert sm.current_state.tasks[0]["description"] == "Task 1" assert sm.current_state.tasks[1]["description"] == "Task 2" - - -@pytest.mark.asyncio -async def test_update_epic(agentcontext): - """ - Updating the current epic's dev plan according to the current task iterations. - """ - sm, _, ui, mock_get_llm = agentcontext - - sm.current_state.epics = [{"id": "abc", "name": "Initial Project"}] - sm.current_state.tasks = [ - {"description": "Just Finished", "status": "reviewed"}, - {"description": "Future Task", "status": "todo"}, - ] - sm.current_state.iterations = [ - {"user_feedback": "Doesn't work", "description": "There, I fixed it"}, - ] - await sm.commit() - - tl = TechLead(sm, ui) - tl.get_llm = mock_get_llm( - return_value=UpdatedDevelopmentPlan( - updated_current_epic=Epic(description="Updated Just Finished"), - plan=[Epic(description="Alternative Future Task")], - ) - ) - - response = await tl.update_epic() - assert response.type == ResponseType.DONE - - await sm.commit() - - assert sm.current_state.tasks[0]["description"] == "Updated Just Finished" - assert sm.current_state.tasks[0]["status"] == TaskStatus.EPIC_UPDATED - assert sm.current_state.tasks[1]["description"] == "Alternative Future Task" - assert sm.current_state.tasks[1]["status"] == TaskStatus.TODO diff --git a/tests/ui/test_ipc_client.py b/tests/ui/test_ipc_client.py index 18a79b3..ab52af7 100644 --- a/tests/ui/test_ipc_client.py +++ b/tests/ui/test_ipc_client.py @@ -92,7 +92,7 @@ async def test_send_message(): connected = await ui.start() assert connected is True - await ui.send_message("Hello from the other side ♫", source=src) + await ui.send_message("Hello from the other side ♫", source=src, project_state_id="123") await ui.stop() assert messages == [ @@ -100,11 +100,13 @@ async def test_send_message(): "type": "verbose", "content": "Hello from the other side ♫", "category": "agent:product-owner", + "project_state_id": "123", }, { "type": "exit", "content": None, "category": None, + "project_state_id": None, }, ] @@ -121,7 +123,7 @@ async def test_stream(): assert connected is True for word in ["Hello", "world"]: - await ui.send_stream_chunk(word, source=src) + await ui.send_stream_chunk(word, source=src, project_state_id="123") await asyncio.sleep(0.01) await ui.stop() @@ -130,16 +132,19 @@ async def test_stream(): "type": "stream", "content": "Hello", "category": "agent:product-owner", + "project_state_id": "123", }, { "type": "stream", "content": "world", "category": "agent:product-owner", + "project_state_id": "123", }, { "type": "exit", "content": None, "category": None, + "project_state_id": None, }, ]