From 9ea513b4a116353c6ca28af4118030230d0a0533 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Thu, 8 Aug 2024 19:28:07 -0500 Subject: [PATCH 01/11] Competency extraction --- app/domain/__init__.py | 3 + .../chat/chat_pipeline_execution_dto.py | 7 +- ...tency_extraction_pipeline_execution_dto.py | 19 ++++ app/domain/data/competency_dto.py | 27 ++++++ .../ingestion_pipeline_execution_dto.py | 9 +- app/domain/pipeline_execution_dto.py | 12 ++- ...competency_extraction_status_update_dto.py | 6 ++ app/pipeline/__init__.py | 2 +- .../competency_extraction_pipeline.py | 94 +++++++++++++++++++ app/pipeline/prompts/competency_extraction.py | 42 +++++++++ app/web/routers/pipelines.py | 42 +++++++++ app/web/status/status_update.py | 27 ++++++ 12 files changed, 274 insertions(+), 16 deletions(-) create mode 100644 app/domain/competency_extraction_pipeline_execution_dto.py create mode 100644 app/domain/status/competency_extraction_status_update_dto.py create mode 100644 app/pipeline/competency_extraction_pipeline.py create mode 100644 app/pipeline/prompts/competency_extraction.py diff --git a/app/domain/__init__.py b/app/domain/__init__.py index 2f56f3f3..b32ca726 100644 --- a/app/domain/__init__.py +++ b/app/domain/__init__.py @@ -3,6 +3,9 @@ from .pipeline_execution_settings_dto import PipelineExecutionSettingsDTO from .chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO from .chat.chat_pipeline_execution_base_data_dto import ChatPipelineExecutionBaseDataDTO +from .competency_extraction_pipeline_execution_dto import ( + CompetencyExtractionPipelineExecutionDTO, +) from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import ( ExerciseChatPipelineExecutionDTO, ) diff --git a/app/domain/chat/chat_pipeline_execution_dto.py b/app/domain/chat/chat_pipeline_execution_dto.py index 31fa7593..e3e63284 100644 --- a/app/domain/chat/chat_pipeline_execution_dto.py +++ b/app/domain/chat/chat_pipeline_execution_dto.py @@ -2,16 +2,11 @@ from pydantic import Field -from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO +from app.domain import PipelineExecutionDTO from app.domain.pyris_message import PyrisMessage from app.domain.data.user_dto import UserDTO -from app.domain.status.stage_dto import StageDTO class ChatPipelineExecutionDTO(PipelineExecutionDTO): chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[]) user: Optional[UserDTO] - settings: Optional[PipelineExecutionSettingsDTO] - initial_stages: Optional[List[StageDTO]] = Field( - default=None, alias="initialStages" - ) diff --git a/app/domain/competency_extraction_pipeline_execution_dto.py b/app/domain/competency_extraction_pipeline_execution_dto.py new file mode 100644 index 00000000..497fa828 --- /dev/null +++ b/app/domain/competency_extraction_pipeline_execution_dto.py @@ -0,0 +1,19 @@ +from typing import List, Optional + +from pydantic import Field, BaseModel + +from . import PipelineExecutionDTO +from .data.competency_dto import CompetencyTaxonomy + + +class CompetencyExtractionPipelineExecutionDTO(BaseModel): + execution: PipelineExecutionDTO + course_description: Optional[str] = Field(alias="courseDescription") + taxonomy_options: List[CompetencyTaxonomy] = Field( + alias="taxonomyOptions", default=[] + ) + max_n: int = Field( + alias="maxN", + description="Maximum number of competencies to extract from the course description", + default=10, + ) diff --git a/app/domain/data/competency_dto.py b/app/domain/data/competency_dto.py index 0e2c697c..3e1f2be4 100644 --- a/app/domain/data/competency_dto.py +++ b/app/domain/data/competency_dto.py @@ -3,6 +3,7 @@ from typing import Optional from pydantic import BaseModel, Field +from pydantic.v1 import validator class CompetencyTaxonomy(str, Enum): @@ -21,3 +22,29 @@ class CompetencyDTO(BaseModel): taxonomy: Optional[CompetencyTaxonomy] = None soft_due_date: Optional[datetime] = Field(default=None, alias="softDueDate") optional: Optional[bool] = None + + +class Competency(BaseModel): + title: str = Field( + description="Title of the competency that contains no more than 4 words", + ) + description: str = Field( + description="Description of the competency as plain string. DO NOT RETURN A LIST OF STRINGS." + ) + taxonomy: CompetencyTaxonomy = Field( + description="Selected taxonomy based on bloom's taxonomy" + ) + + @validator("subject") + def validate_subject(cls, field): + """Validate the subject of the competency.""" + if len(field.split()) > 4: + raise ValueError("Subject must contain no more than 4 words") + return field + + @validator("taxonomy") + def validate_selected_taxonomy(cls, field): + """Validate the selected taxonomy.""" + if field not in CompetencyTaxonomy.__members__.keys(): + raise ValueError(f"Invalid taxonomy: {field}") + return field diff --git a/app/domain/ingestion/ingestion_pipeline_execution_dto.py b/app/domain/ingestion/ingestion_pipeline_execution_dto.py index 393767e8..e8a9882f 100644 --- a/app/domain/ingestion/ingestion_pipeline_execution_dto.py +++ b/app/domain/ingestion/ingestion_pipeline_execution_dto.py @@ -1,17 +1,12 @@ -from typing import List, Optional +from typing import List from pydantic import Field -from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO +from app.domain import PipelineExecutionDTO from app.domain.data.lecture_unit_dto import LectureUnitDTO -from app.domain.status.stage_dto import StageDTO class IngestionPipelineExecutionDto(PipelineExecutionDTO): lecture_units: List[LectureUnitDTO] = Field( ..., alias="pyrisLectureUnitWebhookDTOS" ) - settings: Optional[PipelineExecutionSettingsDTO] - initial_stages: Optional[List[StageDTO]] = Field( - default=None, alias="initialStages" - ) diff --git a/app/domain/pipeline_execution_dto.py b/app/domain/pipeline_execution_dto.py index 86299d40..fb447369 100644 --- a/app/domain/pipeline_execution_dto.py +++ b/app/domain/pipeline_execution_dto.py @@ -1,8 +1,16 @@ -from pydantic import BaseModel +from typing import Optional + +from pydantic import BaseModel, Field + +from app.domain.pipeline_execution_settings_dto import PipelineExecutionSettingsDTO +from app.domain.status.stage_dto import StageDTO class PipelineExecutionDTO(BaseModel): - pass + settings: Optional[PipelineExecutionSettingsDTO] + initial_stages: Optional[list[StageDTO]] = Field( + default=None, alias="initialStages" + ) class Config: populate_by_name = True diff --git a/app/domain/status/competency_extraction_status_update_dto.py b/app/domain/status/competency_extraction_status_update_dto.py new file mode 100644 index 00000000..e71f2bdf --- /dev/null +++ b/app/domain/status/competency_extraction_status_update_dto.py @@ -0,0 +1,6 @@ +from app.domain.data.competency_dto import Competency +from app.domain.status.status_update_dto import StatusUpdateDTO + + +class CompetencyExtractionStatusUpdateDTO(StatusUpdateDTO): + result: list[Competency] = [] diff --git a/app/pipeline/__init__.py b/app/pipeline/__init__.py index 13980f8d..c9faeebb 100644 --- a/app/pipeline/__init__.py +++ b/app/pipeline/__init__.py @@ -1 +1 @@ -from ..pipeline.pipeline import Pipeline +from app.pipeline.pipeline import Pipeline diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py new file mode 100644 index 00000000..f4c434c6 --- /dev/null +++ b/app/pipeline/competency_extraction_pipeline.py @@ -0,0 +1,94 @@ +import logging +from typing import Optional + +from langchain.output_parsers import PydanticOutputParser +from langchain_core.prompts import ( + ChatPromptTemplate, +) + +from app.domain import ( + CompetencyExtractionPipelineExecutionDTO, + PyrisMessage, + IrisMessageRole, +) +from app.domain.data.text_message_content_dto import TextMessageContentDTO +from app.domain.data.competency_dto import Competency +from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments +from app.pipeline import Pipeline +from app.web.status.status_update import CompetencyExtractionCallback +from app.pipeline.prompts.competency_extraction import system_prompt + +logger = logging.getLogger(__name__) + + +class CompetencyExtractionPipeline(Pipeline): + callback: CompetencyExtractionCallback + request_handler: CapabilityRequestHandler + output_parser: PydanticOutputParser + + def __init__(self, callback: Optional[CompetencyExtractionCallback] = None): + super().__init__( + implementation_id="competency_extraction_pipeline_reference_impl" + ) + self.callback = callback + self.request_handler = CapabilityRequestHandler(requirements=RequirementList()) + self.output_parser = PydanticOutputParser(pydantic_object=Competency) + + def __call__( + self, + dto: CompetencyExtractionPipelineExecutionDTO, + prompt: Optional[ChatPromptTemplate] = None, + **kwargs, + ): + if not dto.course_description: + self.callback.error("Course description is required") + if not dto.taxonomy_options: + self.callback.error("Taxonomy options are required") + if not dto.max_n: + self.callback.error("Non-zero max_n is required") + + taxonomy_options = ", ".join(dto.taxonomy_options) + + prompt = system_prompt.format( + taxonomy_list=taxonomy_options, + course_description=dto.course_description, + n=dto.max_n, + ) + prompt = PyrisMessage( + sender=IrisMessageRole.SYSTEM, + contents=[TextMessageContentDTO(text_content=prompt)], + ) + + self.callback.in_progress("Starting competency extraction") + + response = self.request_handler.chat( + [prompt], CompletionArguments(temperature=0.4) + ) + response = response.contents[0].text_content + + print(f"Received response from OpenAI: {response}") + + generated_competencies: list[Competency] = [] + + # Find all competencies in the response + competencies = response.split("\n\n") + for i, competency in enumerate(competencies): + print(f"Processing competency {i + 1}: {competency}") + if "{" not in competency or "}" not in competency: + print("Skipping competency without JSON") + continue + # Get the competency JSON object + start = competency.index("{") + end = competency.index("}") + 1 + competency = competency[start:end] + try: + competency = self.output_parser.parse(competency) + print(f"Generated competency: {competency}") + generated_competencies.append(competency) + self.callback.done(final_result=generated_competencies) + except Exception as e: + print(f"Error generating competency: {e}") + self.callback.error(f"Error generating competency: {e}") + # Mark all remaining competencies as skipped + for i in range(len(generated_competencies), len(competencies)): + self.callback.skip(f"Skipping competency {i + 1}") diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py new file mode 100644 index 00000000..0b6ce289 --- /dev/null +++ b/app/pipeline/prompts/competency_extraction.py @@ -0,0 +1,42 @@ +system_prompt = """ +You are an expert in all topics of computer science and its practical applications. +Your task consists of three parts: +1. Read the provided curriculum description a university course. +2. Extract all learning goals ("competencies") from the course description. + +Each competency must contain the following fields: + +- title: +The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words. + +- description: +A detailed description of the competency in 3 to 6 bullet points. +Each bullet point is a short sentence, at most 15 words. +Each bullet point illustrates a specific skill or concept of the competency. + +- taxonomy: +The classification of the competency within Bloom's taxonomy. +You must choose from these options in Bloom's taxonomy: {taxonomy_list} + +All competencies must meet the following requirements: + +- is mentioned in the course description. +- corresponds to exactly one subject or skill covered in the course description. +- is assigned to exactly one level of Bloom's taxonomy. +- is small and fine-grained. Large topics should be broken down into smaller competencies. +- does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed. + +Here is an example competency whose structure you should follow: + +{{ + "title": "Recursion", + "description": "- You understand the concept of recursion. + - You are able to understand complex recursive implementations. + - You are able to implement recursive solutions of medium difficulty independently.", + "taxonomy": "ANALYZE" +}} + +Here is the provided course description: {course_description} + +Respond with up to {n} competencies extracted from the course description, each in JSON format, split by two newlines. +""" diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index 7ac9d3da..0373a3e7 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -9,14 +9,17 @@ from app.domain import ( ExerciseChatPipelineExecutionDTO, CourseChatPipelineExecutionDTO, + CompetencyExtractionPipelineExecutionDTO, ) from app.web.status.status_update import ( ExerciseChatStatusCallback, CourseChatStatusCallback, + CompetencyExtractionCallback, ) from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline from app.pipeline.chat.exercise_chat_pipeline import ExerciseChatPipeline from app.dependencies import TokenValidator +from app.pipeline.competency_extraction_pipeline import CompetencyExtractionPipeline router = APIRouter(prefix="/api/v1/pipelines", tags=["pipelines"]) logger = logging.getLogger(__name__) @@ -86,6 +89,45 @@ def run_course_chat_pipeline(variant: str, dto: CourseChatPipelineExecutionDTO): thread.start() +def run_competency_extraction_pipeline_worker( + dto: CompetencyExtractionPipelineExecutionDTO, _variant: str +): + try: + callback = CompetencyExtractionCallback( + run_id=dto.execution.settings.authentication_token, + base_url=dto.execution.settings.artemis_base_url, + initial_stages=dto.execution.initial_stages, + num_iterations=dto.max_n, + ) + pipeline = CompetencyExtractionPipeline(callback=callback) + except Exception as e: + logger.error(f"Error preparing exercise chat pipeline: {e}") + logger.error(traceback.format_exc()) + capture_exception(e) + return + + try: + pipeline(dto=dto) + except Exception as e: + logger.error(f"Error running exercise chat pipeline: {e}") + logger.error(traceback.format_exc()) + callback.error("Fatal error.", exception=e) + + +@router.post( + "/competency-extraction/{variant}/run", + status_code=status.HTTP_202_ACCEPTED, + dependencies=[Depends(TokenValidator())], +) +def run_competency_extraction_pipeline( + variant: str, dto: CompetencyExtractionPipelineExecutionDTO +): + thread = Thread( + target=run_competency_extraction_pipeline_worker, args=(dto, variant) + ) + thread.start() + + @router.get("/{feature}") def get_pipeline(feature: str): """ diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 533047ca..6f64530c 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -5,6 +5,9 @@ import requests from abc import ABC +from domain.status.competency_extraction_status_update_dto import ( + CompetencyExtractionStatusUpdateDTO, +) from ...domain.chat.course_chat.course_chat_status_update_dto import ( CourseChatStatusUpdateDTO, ) @@ -219,3 +222,27 @@ def __init__( status = ExerciseChatStatusUpdateDTO(stages=stages) stage = stages[current_stage_index] super().__init__(url, run_id, status, stage, current_stage_index) + + +class CompetencyExtractionCallback(StatusCallback): + def __init__( + self, + run_id: str, + base_url: str, + initial_stages: List[StageDTO] = None, + num_iterations=10, + ): + url = f"{base_url}/api/public/pyris/pipelines/competency-extraction/runs/{run_id}/status" + current_stage_index = 1 if initial_stages else 0 + stages = initial_stages or [] + stages += [ + StageDTO( + weight=10, + state=StageStateEnum.NOT_STARTED, + name=f"Competency {i + 1}", + ) + for i in range(num_iterations) + ] + status = CompetencyExtractionStatusUpdateDTO(stages=stages) + stage = stages[current_stage_index] + super().__init__(url, run_id, status, stage, current_stage_index) From 97d4c2483c2ce921ffb72a68171b4e643715ec53 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Thu, 8 Aug 2024 19:52:08 -0500 Subject: [PATCH 02/11] Fix typos --- app/domain/data/competency_dto.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/domain/data/competency_dto.py b/app/domain/data/competency_dto.py index 3e1f2be4..fb2bca7f 100644 --- a/app/domain/data/competency_dto.py +++ b/app/domain/data/competency_dto.py @@ -35,11 +35,11 @@ class Competency(BaseModel): description="Selected taxonomy based on bloom's taxonomy" ) - @validator("subject") - def validate_subject(cls, field): + @validator("title") + def validate_title(cls, field): """Validate the subject of the competency.""" if len(field.split()) > 4: - raise ValueError("Subject must contain no more than 4 words") + raise ValueError("Title must contain no more than 4 words") return field @validator("taxonomy") From 15c5a51b851b6f251d5d82bc96912f71b3065d35 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Thu, 8 Aug 2024 19:54:18 -0500 Subject: [PATCH 03/11] Remove debug print statement --- app/pipeline/competency_extraction_pipeline.py | 6 +----- app/pipeline/prompts/competency_extraction.py | 3 ++- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index f4c434c6..94aba8d5 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -52,22 +52,18 @@ def __call__( prompt = system_prompt.format( taxonomy_list=taxonomy_options, course_description=dto.course_description, - n=dto.max_n, + max_n=dto.max_n, ) prompt = PyrisMessage( sender=IrisMessageRole.SYSTEM, contents=[TextMessageContentDTO(text_content=prompt)], ) - self.callback.in_progress("Starting competency extraction") - response = self.request_handler.chat( [prompt], CompletionArguments(temperature=0.4) ) response = response.contents[0].text_content - print(f"Received response from OpenAI: {response}") - generated_competencies: list[Competency] = [] # Find all competencies in the response diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py index 0b6ce289..7caf10d2 100644 --- a/app/pipeline/prompts/competency_extraction.py +++ b/app/pipeline/prompts/competency_extraction.py @@ -38,5 +38,6 @@ Here is the provided course description: {course_description} -Respond with up to {n} competencies extracted from the course description, each in JSON format, split by two newlines. +Respond with up to {max_n} competencies extracted from the course description, +each in JSON format, split by two newlines. """ From dcf3510a95561ad3d44ea9468749ded59ed3bfa9 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Fri, 9 Aug 2024 07:51:57 -0500 Subject: [PATCH 04/11] Apply coderabbit suggestions --- app/domain/data/competency_dto.py | 2 +- app/pipeline/competency_extraction_pipeline.py | 14 +++++++------- app/web/routers/pipelines.py | 4 ++-- app/web/status/status_update.py | 4 ++-- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/app/domain/data/competency_dto.py b/app/domain/data/competency_dto.py index fb2bca7f..9561d0c1 100644 --- a/app/domain/data/competency_dto.py +++ b/app/domain/data/competency_dto.py @@ -45,6 +45,6 @@ def validate_title(cls, field): @validator("taxonomy") def validate_selected_taxonomy(cls, field): """Validate the selected taxonomy.""" - if field not in CompetencyTaxonomy.__members__.keys(): + if field not in CompetencyTaxonomy.__members__: raise ValueError(f"Invalid taxonomy: {field}") return field diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index 94aba8d5..7716aff4 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -41,11 +41,11 @@ def __call__( **kwargs, ): if not dto.course_description: - self.callback.error("Course description is required") + raise ValueError("Course description is required") if not dto.taxonomy_options: - self.callback.error("Taxonomy options are required") + raise ValueError("Taxonomy options are required") if not dto.max_n: - self.callback.error("Non-zero max_n is required") + raise ValueError("Non-zero max_n is required") taxonomy_options = ", ".join(dto.taxonomy_options) @@ -69,9 +69,9 @@ def __call__( # Find all competencies in the response competencies = response.split("\n\n") for i, competency in enumerate(competencies): - print(f"Processing competency {i + 1}: {competency}") + logger.debug(f"Processing competency {i + 1}: {competency}") if "{" not in competency or "}" not in competency: - print("Skipping competency without JSON") + logger.debug("Skipping competency without JSON") continue # Get the competency JSON object start = competency.index("{") @@ -79,11 +79,11 @@ def __call__( competency = competency[start:end] try: competency = self.output_parser.parse(competency) - print(f"Generated competency: {competency}") + logger.debug(f"Generated competency: {competency}") generated_competencies.append(competency) self.callback.done(final_result=generated_competencies) except Exception as e: - print(f"Error generating competency: {e}") + logger.debug(f"Error generating competency: {e}") self.callback.error(f"Error generating competency: {e}") # Mark all remaining competencies as skipped for i in range(len(generated_competencies), len(competencies)): diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index 0373a3e7..5be6b07b 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -101,7 +101,7 @@ def run_competency_extraction_pipeline_worker( ) pipeline = CompetencyExtractionPipeline(callback=callback) except Exception as e: - logger.error(f"Error preparing exercise chat pipeline: {e}") + logger.error(f"Error preparing competency extraction pipeline: {e}") logger.error(traceback.format_exc()) capture_exception(e) return @@ -109,7 +109,7 @@ def run_competency_extraction_pipeline_worker( try: pipeline(dto=dto) except Exception as e: - logger.error(f"Error running exercise chat pipeline: {e}") + logger.error(f"Error running competency extraction pipeline: {e}") logger.error(traceback.format_exc()) callback.error("Fatal error.", exception=e) diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 6f64530c..abc2b1a5 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -229,8 +229,8 @@ def __init__( self, run_id: str, base_url: str, - initial_stages: List[StageDTO] = None, - num_iterations=10, + initial_stages: List[StageDTO], + num_iterations, ): url = f"{base_url}/api/public/pyris/pipelines/competency-extraction/runs/{run_id}/status" current_stage_index = 1 if initial_stages else 0 From 41df05028909e9ee6e9d869dd6a56598839dc893 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Fri, 9 Aug 2024 07:54:51 -0500 Subject: [PATCH 05/11] Format --- app/pipeline/chat/course_chat_pipeline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py index 42a046b0..17aca74a 100644 --- a/app/pipeline/chat/course_chat_pipeline.py +++ b/app/pipeline/chat/course_chat_pipeline.py @@ -266,7 +266,8 @@ def get_competency_list() -> list: def lecture_content_retrieval() -> str: """ Retrieve content from indexed lecture slides. - This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most relevant paragraphs. + This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most + relevant paragraphs. Use this if you think it can be useful to answer the student's question, or if the student explicitly asks a question about the lecture content or slides. Only use this once. From 117a6e2e31c4f053baeef49afeaef93c683619fd Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Fri, 9 Aug 2024 08:24:46 -0500 Subject: [PATCH 06/11] Add hard limit on generated competencies to max_n --- app/pipeline/competency_extraction_pipeline.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index 7716aff4..99ac0a28 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -66,8 +66,8 @@ def __call__( generated_competencies: list[Competency] = [] - # Find all competencies in the response - competencies = response.split("\n\n") + # Find all competencies in the response up to the max_n + competencies = response.split("\n\n")[: dto.max_n] for i, competency in enumerate(competencies): logger.debug(f"Processing competency {i + 1}: {competency}") if "{" not in competency or "}" not in competency: From a202b2cc57ca56001bf1115deb2fee09baeb567f Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Fri, 9 Aug 2024 14:19:55 -0500 Subject: [PATCH 07/11] Do not throw ValueError when calling callback.done() --- .../competency_extraction_pipeline.py | 10 +++--- app/web/status/status_update.py | 32 ++++++++----------- 2 files changed, 18 insertions(+), 24 deletions(-) diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index 99ac0a28..c569f704 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -79,12 +79,12 @@ def __call__( competency = competency[start:end] try: competency = self.output_parser.parse(competency) - logger.debug(f"Generated competency: {competency}") - generated_competencies.append(competency) - self.callback.done(final_result=generated_competencies) except Exception as e: - logger.debug(f"Error generating competency: {e}") - self.callback.error(f"Error generating competency: {e}") + logger.debug(f"Error parsing competency: {e}") + continue + logger.debug(f"Generated competency: {competency}") + generated_competencies.append(competency) + self.callback.done(final_result=generated_competencies) # Mark all remaining competencies as skipped for i in range(len(generated_competencies), len(competencies)): self.callback.skip(f"Skipping competency {i + 1}") diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index abc2b1a5..979d0956 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -104,24 +104,19 @@ def done( If there is a next stage, set the current stage to the next stage. """ - if self.stage.state == StageStateEnum.IN_PROGRESS: - self.stage.state = StageStateEnum.DONE - self.stage.message = message - self.status.result = final_result - if hasattr(self.status, "suggestions"): - self.status.suggestions = suggestions - next_stage = self.get_next_stage() - if next_stage is not None: - self.stage = next_stage - if next_stage_message: - self.stage.message = next_stage_message - if start_next_stage: - self.stage.state = StageStateEnum.IN_PROGRESS - self.on_status_update() - else: - raise ValueError( - "Invalid state transition to done. current state is ", self.stage.state - ) + self.stage.state = StageStateEnum.DONE + self.stage.message = message + self.status.result = final_result + if hasattr(self.status, "suggestions"): + self.status.suggestions = suggestions + next_stage = self.get_next_stage() + if next_stage is not None: + self.stage = next_stage + if next_stage_message: + self.stage.message = next_stage_message + if start_next_stage: + self.stage.state = StageStateEnum.IN_PROGRESS + self.on_status_update() def error(self, message: str, exception=None): """ @@ -131,7 +126,6 @@ def error(self, message: str, exception=None): self.stage.state = StageStateEnum.ERROR self.stage.message = message self.status.result = None - self.stage.suggestions = None # Set all subsequent stages to SKIPPED if an error occurs rest_of_index = ( self.current_stage_index + 1 From 6b81d36f7a7c3e19326a47d289fa7446f1f27ac9 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Fri, 9 Aug 2024 15:44:59 -0500 Subject: [PATCH 08/11] Tweak prompt --- app/pipeline/competency_extraction_pipeline.py | 5 +---- app/pipeline/prompts/competency_extraction.py | 13 ++++++------- app/web/routers/pipelines.py | 1 - app/web/status/status_update.py | 13 +++++-------- 4 files changed, 12 insertions(+), 20 deletions(-) diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index c569f704..396820f2 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -84,7 +84,4 @@ def __call__( continue logger.debug(f"Generated competency: {competency}") generated_competencies.append(competency) - self.callback.done(final_result=generated_competencies) - # Mark all remaining competencies as skipped - for i in range(len(generated_competencies), len(competencies)): - self.callback.skip(f"Skipping competency {i + 1}") + self.callback.done(final_result=generated_competencies) diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py index 7caf10d2..74cedfa0 100644 --- a/app/pipeline/prompts/competency_extraction.py +++ b/app/pipeline/prompts/competency_extraction.py @@ -10,9 +10,10 @@ The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words. - description: -A detailed description of the competency in 3 to 6 bullet points. -Each bullet point is a short sentence, at most 15 words. +A detailed description of the competency in 3 to 5 bullet points. Each bullet point illustrates a specific skill or concept of the competency. +Each bullet point is a complete sentence starting with "You" and containing at most 15 words. +Each bullet point is on a new line and starts with "- ". - taxonomy: The classification of the competency within Bloom's taxonomy. @@ -26,13 +27,11 @@ - is small and fine-grained. Large topics should be broken down into smaller competencies. - does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed. -Here is an example competency whose structure you should follow: +Here is a template competency in JSON format: {{ - "title": "Recursion", - "description": "- You understand the concept of recursion. - - You are able to understand complex recursive implementations. - - You are able to implement recursive solutions of medium difficulty independently.", + "title": "Competency Title", + "description": "- You understand this.\n- You are proficient in doing that.\n- You know how to do this.", "taxonomy": "ANALYZE" }} diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py index 5be6b07b..f92f0d68 100644 --- a/app/web/routers/pipelines.py +++ b/app/web/routers/pipelines.py @@ -97,7 +97,6 @@ def run_competency_extraction_pipeline_worker( run_id=dto.execution.settings.authentication_token, base_url=dto.execution.settings.artemis_base_url, initial_stages=dto.execution.initial_stages, - num_iterations=dto.max_n, ) pipeline = CompetencyExtractionPipeline(callback=callback) except Exception as e: diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 979d0956..5867d70f 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -224,19 +224,16 @@ def __init__( run_id: str, base_url: str, initial_stages: List[StageDTO], - num_iterations, ): url = f"{base_url}/api/public/pyris/pipelines/competency-extraction/runs/{run_id}/status" - current_stage_index = 1 if initial_stages else 0 stages = initial_stages or [] - stages += [ + stages.append( StageDTO( weight=10, state=StageStateEnum.NOT_STARTED, - name=f"Competency {i + 1}", + name="Generating Competencies", ) - for i in range(num_iterations) - ] + ) status = CompetencyExtractionStatusUpdateDTO(stages=stages) - stage = stages[current_stage_index] - super().__init__(url, run_id, status, stage, current_stage_index) + stage = stages[-1] + super().__init__(url, run_id, status, stage, len(stages) - 1) From 0d590b743308163a971b38604ea963f3835969c4 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Tue, 20 Aug 2024 19:06:48 -0500 Subject: [PATCH 09/11] Avoid generating duplicate competencies --- .../competency_extraction_pipeline_execution_dto.py | 9 ++++++--- app/pipeline/competency_extraction_pipeline.py | 9 +++++++++ app/pipeline/prompts/competency_extraction.py | 10 ++++++---- 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/app/domain/competency_extraction_pipeline_execution_dto.py b/app/domain/competency_extraction_pipeline_execution_dto.py index 497fa828..05a88167 100644 --- a/app/domain/competency_extraction_pipeline_execution_dto.py +++ b/app/domain/competency_extraction_pipeline_execution_dto.py @@ -1,14 +1,17 @@ -from typing import List, Optional +from typing import List from pydantic import Field, BaseModel from . import PipelineExecutionDTO -from .data.competency_dto import CompetencyTaxonomy +from .data.competency_dto import CompetencyTaxonomy, Competency class CompetencyExtractionPipelineExecutionDTO(BaseModel): execution: PipelineExecutionDTO - course_description: Optional[str] = Field(alias="courseDescription") + course_description: str = Field(alias="courseDescription") + current_competencies: list[Competency] = Field( + alias="currentCompetencies", default=[] + ) taxonomy_options: List[CompetencyTaxonomy] = Field( alias="taxonomyOptions", default=[] ) diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py index 396820f2..da224ffe 100644 --- a/app/pipeline/competency_extraction_pipeline.py +++ b/app/pipeline/competency_extraction_pipeline.py @@ -48,11 +48,20 @@ def __call__( raise ValueError("Non-zero max_n is required") taxonomy_options = ", ".join(dto.taxonomy_options) + current_competencies = "\n\n".join( + [c.model_dump_json(indent=4) for c in dto.current_competencies] + ) + if current_competencies: + current_competencies = ( + f"\nHere are the current competencies in the course:\n{current_competencies}\n" + f"Do not repeat these competencies.\n" + ) prompt = system_prompt.format( taxonomy_list=taxonomy_options, course_description=dto.course_description, max_n=dto.max_n, + current_competencies=current_competencies, ) prompt = PyrisMessage( sender=IrisMessageRole.SYSTEM, diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py index 74cedfa0..4d87b6d4 100644 --- a/app/pipeline/prompts/competency_extraction.py +++ b/app/pipeline/prompts/competency_extraction.py @@ -10,9 +10,9 @@ The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words. - description: -A detailed description of the competency in 3 to 5 bullet points. +A detailed description of the competency in 2 to 5 bullet points. Each bullet point illustrates a specific skill or concept of the competency. -Each bullet point is a complete sentence starting with "You" and containing at most 15 words. +Each bullet point is a complete sentence containing at most 15 words. Each bullet point is on a new line and starts with "- ". - taxonomy: @@ -27,6 +27,8 @@ - is small and fine-grained. Large topics should be broken down into smaller competencies. - does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed. +Here is the provided course description: {course_description} + Here is a template competency in JSON format: {{ @@ -35,8 +37,8 @@ "taxonomy": "ANALYZE" }} -Here is the provided course description: {course_description} +{current_competencies} -Respond with up to {max_n} competencies extracted from the course description, +Respond with 0 to {max_n} competencies extracted from the course description, each in JSON format, split by two newlines. """ From 3507f45051411f6bab626c00540d7950461e7313 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Wed, 21 Aug 2024 09:54:59 -0500 Subject: [PATCH 10/11] Fix dumb import path --- app/web/status/status_update.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py index 5867d70f..1f497f75 100644 --- a/app/web/status/status_update.py +++ b/app/web/status/status_update.py @@ -5,7 +5,7 @@ import requests from abc import ABC -from domain.status.competency_extraction_status_update_dto import ( +from ...domain.status.competency_extraction_status_update_dto import ( CompetencyExtractionStatusUpdateDTO, ) from ...domain.chat.course_chat.course_chat_status_update_dto import ( From 38dcfe334a031d0e10a588b7d35249efc2941b69 Mon Sep 17 00:00:00 2001 From: Michael Dyer Date: Wed, 21 Aug 2024 10:37:10 -0500 Subject: [PATCH 11/11] Remove unused import causing Black to fail --- app/llm/external/openai_chat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py index a05c49ac..27e2d080 100644 --- a/app/llm/external/openai_chat.py +++ b/app/llm/external/openai_chat.py @@ -7,7 +7,6 @@ from openai import OpenAI from openai.lib.azure import AzureOpenAI from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam -from openai.types.chat.completion_create_params import ResponseFormat from openai.types.shared_params import ResponseFormatJSONObject from ...common.message_converters import map_str_to_role, map_role_to_str