From 9ea513b4a116353c6ca28af4118030230d0a0533 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Thu, 8 Aug 2024 19:28:07 -0500
Subject: [PATCH 01/11] Competency extraction

---
 app/domain/__init__.py                        |  3 +
 .../chat/chat_pipeline_execution_dto.py       |  7 +-
 ...tency_extraction_pipeline_execution_dto.py | 19 ++++
 app/domain/data/competency_dto.py             | 27 ++++++
 .../ingestion_pipeline_execution_dto.py       |  9 +-
 app/domain/pipeline_execution_dto.py          | 12 ++-
 ...competency_extraction_status_update_dto.py |  6 ++
 app/pipeline/__init__.py                      |  2 +-
 .../competency_extraction_pipeline.py         | 94 +++++++++++++++++++
 app/pipeline/prompts/competency_extraction.py | 42 +++++++++
 app/web/routers/pipelines.py                  | 42 +++++++++
 app/web/status/status_update.py               | 27 ++++++
 12 files changed, 274 insertions(+), 16 deletions(-)
 create mode 100644 app/domain/competency_extraction_pipeline_execution_dto.py
 create mode 100644 app/domain/status/competency_extraction_status_update_dto.py
 create mode 100644 app/pipeline/competency_extraction_pipeline.py
 create mode 100644 app/pipeline/prompts/competency_extraction.py

diff --git a/app/domain/__init__.py b/app/domain/__init__.py
index 2f56f3f3..b32ca726 100644
--- a/app/domain/__init__.py
+++ b/app/domain/__init__.py
@@ -3,6 +3,9 @@
 from .pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
 from .chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO
 from .chat.chat_pipeline_execution_base_data_dto import ChatPipelineExecutionBaseDataDTO
+from .competency_extraction_pipeline_execution_dto import (
+    CompetencyExtractionPipelineExecutionDTO,
+)
 from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import (
     ExerciseChatPipelineExecutionDTO,
 )
diff --git a/app/domain/chat/chat_pipeline_execution_dto.py b/app/domain/chat/chat_pipeline_execution_dto.py
index 31fa7593..e3e63284 100644
--- a/app/domain/chat/chat_pipeline_execution_dto.py
+++ b/app/domain/chat/chat_pipeline_execution_dto.py
@@ -2,16 +2,11 @@
 
 from pydantic import Field
 
-from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
+from app.domain import PipelineExecutionDTO
 from app.domain.pyris_message import PyrisMessage
 from app.domain.data.user_dto import UserDTO
-from app.domain.status.stage_dto import StageDTO
 
 
 class ChatPipelineExecutionDTO(PipelineExecutionDTO):
     chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[])
     user: Optional[UserDTO]
-    settings: Optional[PipelineExecutionSettingsDTO]
-    initial_stages: Optional[List[StageDTO]] = Field(
-        default=None, alias="initialStages"
-    )
diff --git a/app/domain/competency_extraction_pipeline_execution_dto.py b/app/domain/competency_extraction_pipeline_execution_dto.py
new file mode 100644
index 00000000..497fa828
--- /dev/null
+++ b/app/domain/competency_extraction_pipeline_execution_dto.py
@@ -0,0 +1,19 @@
+from typing import List, Optional
+
+from pydantic import Field, BaseModel
+
+from . import PipelineExecutionDTO
+from .data.competency_dto import CompetencyTaxonomy
+
+
+class CompetencyExtractionPipelineExecutionDTO(BaseModel):
+    execution: PipelineExecutionDTO
+    course_description: Optional[str] = Field(alias="courseDescription")
+    taxonomy_options: List[CompetencyTaxonomy] = Field(
+        alias="taxonomyOptions", default=[]
+    )
+    max_n: int = Field(
+        alias="maxN",
+        description="Maximum number of competencies to extract from the course description",
+        default=10,
+    )
diff --git a/app/domain/data/competency_dto.py b/app/domain/data/competency_dto.py
index 0e2c697c..3e1f2be4 100644
--- a/app/domain/data/competency_dto.py
+++ b/app/domain/data/competency_dto.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 from pydantic import BaseModel, Field
+from pydantic.v1 import validator
 
 
 class CompetencyTaxonomy(str, Enum):
@@ -21,3 +22,29 @@ class CompetencyDTO(BaseModel):
     taxonomy: Optional[CompetencyTaxonomy] = None
     soft_due_date: Optional[datetime] = Field(default=None, alias="softDueDate")
     optional: Optional[bool] = None
+
+
+class Competency(BaseModel):
+    title: str = Field(
+        description="Title of the competency that contains no more than 4 words",
+    )
+    description: str = Field(
+        description="Description of the competency as plain string. DO NOT RETURN A LIST OF STRINGS."
+    )
+    taxonomy: CompetencyTaxonomy = Field(
+        description="Selected taxonomy based on bloom's taxonomy"
+    )
+
+    @validator("subject")
+    def validate_subject(cls, field):
+        """Validate the subject of the competency."""
+        if len(field.split()) > 4:
+            raise ValueError("Subject must contain no more than 4 words")
+        return field
+
+    @validator("taxonomy")
+    def validate_selected_taxonomy(cls, field):
+        """Validate the selected taxonomy."""
+        if field not in CompetencyTaxonomy.__members__.keys():
+            raise ValueError(f"Invalid taxonomy: {field}")
+        return field
diff --git a/app/domain/ingestion/ingestion_pipeline_execution_dto.py b/app/domain/ingestion/ingestion_pipeline_execution_dto.py
index 393767e8..e8a9882f 100644
--- a/app/domain/ingestion/ingestion_pipeline_execution_dto.py
+++ b/app/domain/ingestion/ingestion_pipeline_execution_dto.py
@@ -1,17 +1,12 @@
-from typing import List, Optional
+from typing import List
 
 from pydantic import Field
 
-from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
+from app.domain import PipelineExecutionDTO
 from app.domain.data.lecture_unit_dto import LectureUnitDTO
-from app.domain.status.stage_dto import StageDTO
 
 
 class IngestionPipelineExecutionDto(PipelineExecutionDTO):
     lecture_units: List[LectureUnitDTO] = Field(
         ..., alias="pyrisLectureUnitWebhookDTOS"
     )
-    settings: Optional[PipelineExecutionSettingsDTO]
-    initial_stages: Optional[List[StageDTO]] = Field(
-        default=None, alias="initialStages"
-    )
diff --git a/app/domain/pipeline_execution_dto.py b/app/domain/pipeline_execution_dto.py
index 86299d40..fb447369 100644
--- a/app/domain/pipeline_execution_dto.py
+++ b/app/domain/pipeline_execution_dto.py
@@ -1,8 +1,16 @@
-from pydantic import BaseModel
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+from app.domain.pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
+from app.domain.status.stage_dto import StageDTO
 
 
 class PipelineExecutionDTO(BaseModel):
-    pass
+    settings: Optional[PipelineExecutionSettingsDTO]
+    initial_stages: Optional[list[StageDTO]] = Field(
+        default=None, alias="initialStages"
+    )
 
     class Config:
         populate_by_name = True
diff --git a/app/domain/status/competency_extraction_status_update_dto.py b/app/domain/status/competency_extraction_status_update_dto.py
new file mode 100644
index 00000000..e71f2bdf
--- /dev/null
+++ b/app/domain/status/competency_extraction_status_update_dto.py
@@ -0,0 +1,6 @@
+from app.domain.data.competency_dto import Competency
+from app.domain.status.status_update_dto import StatusUpdateDTO
+
+
+class CompetencyExtractionStatusUpdateDTO(StatusUpdateDTO):
+    result: list[Competency] = []
diff --git a/app/pipeline/__init__.py b/app/pipeline/__init__.py
index 13980f8d..c9faeebb 100644
--- a/app/pipeline/__init__.py
+++ b/app/pipeline/__init__.py
@@ -1 +1 @@
-from ..pipeline.pipeline import Pipeline
+from app.pipeline.pipeline import Pipeline
diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py
new file mode 100644
index 00000000..f4c434c6
--- /dev/null
+++ b/app/pipeline/competency_extraction_pipeline.py
@@ -0,0 +1,94 @@
+import logging
+from typing import Optional
+
+from langchain.output_parsers import PydanticOutputParser
+from langchain_core.prompts import (
+    ChatPromptTemplate,
+)
+
+from app.domain import (
+    CompetencyExtractionPipelineExecutionDTO,
+    PyrisMessage,
+    IrisMessageRole,
+)
+from app.domain.data.text_message_content_dto import TextMessageContentDTO
+from app.domain.data.competency_dto import Competency
+from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
+from app.pipeline import Pipeline
+from app.web.status.status_update import CompetencyExtractionCallback
+from app.pipeline.prompts.competency_extraction import system_prompt
+
+logger = logging.getLogger(__name__)
+
+
+class CompetencyExtractionPipeline(Pipeline):
+    callback: CompetencyExtractionCallback
+    request_handler: CapabilityRequestHandler
+    output_parser: PydanticOutputParser
+
+    def __init__(self, callback: Optional[CompetencyExtractionCallback] = None):
+        super().__init__(
+            implementation_id="competency_extraction_pipeline_reference_impl"
+        )
+        self.callback = callback
+        self.request_handler = CapabilityRequestHandler(requirements=RequirementList())
+        self.output_parser = PydanticOutputParser(pydantic_object=Competency)
+
+    def __call__(
+        self,
+        dto: CompetencyExtractionPipelineExecutionDTO,
+        prompt: Optional[ChatPromptTemplate] = None,
+        **kwargs,
+    ):
+        if not dto.course_description:
+            self.callback.error("Course description is required")
+        if not dto.taxonomy_options:
+            self.callback.error("Taxonomy options are required")
+        if not dto.max_n:
+            self.callback.error("Non-zero max_n is required")
+
+        taxonomy_options = ", ".join(dto.taxonomy_options)
+
+        prompt = system_prompt.format(
+            taxonomy_list=taxonomy_options,
+            course_description=dto.course_description,
+            n=dto.max_n,
+        )
+        prompt = PyrisMessage(
+            sender=IrisMessageRole.SYSTEM,
+            contents=[TextMessageContentDTO(text_content=prompt)],
+        )
+
+        self.callback.in_progress("Starting competency extraction")
+
+        response = self.request_handler.chat(
+            [prompt], CompletionArguments(temperature=0.4)
+        )
+        response = response.contents[0].text_content
+
+        print(f"Received response from OpenAI: {response}")
+
+        generated_competencies: list[Competency] = []
+
+        # Find all competencies in the response
+        competencies = response.split("\n\n")
+        for i, competency in enumerate(competencies):
+            print(f"Processing competency {i + 1}: {competency}")
+            if "{" not in competency or "}" not in competency:
+                print("Skipping competency without JSON")
+                continue
+            # Get the competency JSON object
+            start = competency.index("{")
+            end = competency.index("}") + 1
+            competency = competency[start:end]
+            try:
+                competency = self.output_parser.parse(competency)
+                print(f"Generated competency: {competency}")
+                generated_competencies.append(competency)
+                self.callback.done(final_result=generated_competencies)
+            except Exception as e:
+                print(f"Error generating competency: {e}")
+                self.callback.error(f"Error generating competency: {e}")
+        # Mark all remaining competencies as skipped
+        for i in range(len(generated_competencies), len(competencies)):
+            self.callback.skip(f"Skipping competency {i + 1}")
diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py
new file mode 100644
index 00000000..0b6ce289
--- /dev/null
+++ b/app/pipeline/prompts/competency_extraction.py
@@ -0,0 +1,42 @@
+system_prompt = """
+You are an expert in all topics of computer science and its practical applications.
+Your task consists of three parts:
+1. Read the provided curriculum description a university course.
+2. Extract all learning goals ("competencies") from the course description.
+
+Each competency must contain the following fields:
+
+- title:
+The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words.
+
+- description:
+A detailed description of the competency in 3 to 6 bullet points.
+Each bullet point is a short sentence, at most 15 words.
+Each bullet point illustrates a specific skill or concept of the competency.
+
+- taxonomy:
+The classification of the competency within Bloom's taxonomy.
+You must choose from these options in Bloom's taxonomy: {taxonomy_list}
+
+All competencies must meet the following requirements:
+
+- is mentioned in the course description.
+- corresponds to exactly one subject or skill covered in the course description.
+- is assigned to exactly one level of Bloom's taxonomy.
+- is small and fine-grained. Large topics should be broken down into smaller competencies.
+- does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed.
+
+Here is an example competency whose structure you should follow:
+
+{{
+    "title": "Recursion",
+    "description": "- You understand the concept of recursion.
+    - You are able to understand complex recursive implementations.
+    - You are able to implement recursive solutions of medium difficulty independently.",
+    "taxonomy": "ANALYZE"
+}}
+
+Here is the provided course description: {course_description}
+
+Respond with up to {n} competencies extracted from the course description, each in JSON format, split by two newlines.
+"""
diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py
index 7ac9d3da..0373a3e7 100644
--- a/app/web/routers/pipelines.py
+++ b/app/web/routers/pipelines.py
@@ -9,14 +9,17 @@
 from app.domain import (
     ExerciseChatPipelineExecutionDTO,
     CourseChatPipelineExecutionDTO,
+    CompetencyExtractionPipelineExecutionDTO,
 )
 from app.web.status.status_update import (
     ExerciseChatStatusCallback,
     CourseChatStatusCallback,
+    CompetencyExtractionCallback,
 )
 from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline
 from app.pipeline.chat.exercise_chat_pipeline import ExerciseChatPipeline
 from app.dependencies import TokenValidator
+from app.pipeline.competency_extraction_pipeline import CompetencyExtractionPipeline
 
 router = APIRouter(prefix="/api/v1/pipelines", tags=["pipelines"])
 logger = logging.getLogger(__name__)
@@ -86,6 +89,45 @@ def run_course_chat_pipeline(variant: str, dto: CourseChatPipelineExecutionDTO):
     thread.start()
 
 
+def run_competency_extraction_pipeline_worker(
+    dto: CompetencyExtractionPipelineExecutionDTO, _variant: str
+):
+    try:
+        callback = CompetencyExtractionCallback(
+            run_id=dto.execution.settings.authentication_token,
+            base_url=dto.execution.settings.artemis_base_url,
+            initial_stages=dto.execution.initial_stages,
+            num_iterations=dto.max_n,
+        )
+        pipeline = CompetencyExtractionPipeline(callback=callback)
+    except Exception as e:
+        logger.error(f"Error preparing exercise chat pipeline: {e}")
+        logger.error(traceback.format_exc())
+        capture_exception(e)
+        return
+
+    try:
+        pipeline(dto=dto)
+    except Exception as e:
+        logger.error(f"Error running exercise chat pipeline: {e}")
+        logger.error(traceback.format_exc())
+        callback.error("Fatal error.", exception=e)
+
+
+@router.post(
+    "/competency-extraction/{variant}/run",
+    status_code=status.HTTP_202_ACCEPTED,
+    dependencies=[Depends(TokenValidator())],
+)
+def run_competency_extraction_pipeline(
+    variant: str, dto: CompetencyExtractionPipelineExecutionDTO
+):
+    thread = Thread(
+        target=run_competency_extraction_pipeline_worker, args=(dto, variant)
+    )
+    thread.start()
+
+
 @router.get("/{feature}")
 def get_pipeline(feature: str):
     """
diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py
index 533047ca..6f64530c 100644
--- a/app/web/status/status_update.py
+++ b/app/web/status/status_update.py
@@ -5,6 +5,9 @@
 import requests
 from abc import ABC
 
+from domain.status.competency_extraction_status_update_dto import (
+    CompetencyExtractionStatusUpdateDTO,
+)
 from ...domain.chat.course_chat.course_chat_status_update_dto import (
     CourseChatStatusUpdateDTO,
 )
@@ -219,3 +222,27 @@ def __init__(
         status = ExerciseChatStatusUpdateDTO(stages=stages)
         stage = stages[current_stage_index]
         super().__init__(url, run_id, status, stage, current_stage_index)
+
+
+class CompetencyExtractionCallback(StatusCallback):
+    def __init__(
+        self,
+        run_id: str,
+        base_url: str,
+        initial_stages: List[StageDTO] = None,
+        num_iterations=10,
+    ):
+        url = f"{base_url}/api/public/pyris/pipelines/competency-extraction/runs/{run_id}/status"
+        current_stage_index = 1 if initial_stages else 0
+        stages = initial_stages or []
+        stages += [
+            StageDTO(
+                weight=10,
+                state=StageStateEnum.NOT_STARTED,
+                name=f"Competency {i + 1}",
+            )
+            for i in range(num_iterations)
+        ]
+        status = CompetencyExtractionStatusUpdateDTO(stages=stages)
+        stage = stages[current_stage_index]
+        super().__init__(url, run_id, status, stage, current_stage_index)

From 97d4c2483c2ce921ffb72a68171b4e643715ec53 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Thu, 8 Aug 2024 19:52:08 -0500
Subject: [PATCH 02/11] Fix typos

---
 app/domain/data/competency_dto.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/app/domain/data/competency_dto.py b/app/domain/data/competency_dto.py
index 3e1f2be4..fb2bca7f 100644
--- a/app/domain/data/competency_dto.py
+++ b/app/domain/data/competency_dto.py
@@ -35,11 +35,11 @@ class Competency(BaseModel):
         description="Selected taxonomy based on bloom's taxonomy"
     )
 
-    @validator("subject")
-    def validate_subject(cls, field):
+    @validator("title")
+    def validate_title(cls, field):
         """Validate the subject of the competency."""
         if len(field.split()) > 4:
-            raise ValueError("Subject must contain no more than 4 words")
+            raise ValueError("Title must contain no more than 4 words")
         return field
 
     @validator("taxonomy")

From 15c5a51b851b6f251d5d82bc96912f71b3065d35 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Thu, 8 Aug 2024 19:54:18 -0500
Subject: [PATCH 03/11] Remove debug print statement

---
 app/pipeline/competency_extraction_pipeline.py | 6 +-----
 app/pipeline/prompts/competency_extraction.py  | 3 ++-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py
index f4c434c6..94aba8d5 100644
--- a/app/pipeline/competency_extraction_pipeline.py
+++ b/app/pipeline/competency_extraction_pipeline.py
@@ -52,22 +52,18 @@ def __call__(
         prompt = system_prompt.format(
             taxonomy_list=taxonomy_options,
             course_description=dto.course_description,
-            n=dto.max_n,
+            max_n=dto.max_n,
         )
         prompt = PyrisMessage(
             sender=IrisMessageRole.SYSTEM,
             contents=[TextMessageContentDTO(text_content=prompt)],
         )
 
-        self.callback.in_progress("Starting competency extraction")
-
         response = self.request_handler.chat(
             [prompt], CompletionArguments(temperature=0.4)
         )
         response = response.contents[0].text_content
 
-        print(f"Received response from OpenAI: {response}")
-
         generated_competencies: list[Competency] = []
 
         # Find all competencies in the response
diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py
index 0b6ce289..7caf10d2 100644
--- a/app/pipeline/prompts/competency_extraction.py
+++ b/app/pipeline/prompts/competency_extraction.py
@@ -38,5 +38,6 @@
 
 Here is the provided course description: {course_description}
 
-Respond with up to {n} competencies extracted from the course description, each in JSON format, split by two newlines.
+Respond with up to {max_n} competencies extracted from the course description,
+each in JSON format, split by two newlines.
 """

From dcf3510a95561ad3d44ea9468749ded59ed3bfa9 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Fri, 9 Aug 2024 07:51:57 -0500
Subject: [PATCH 04/11] Apply coderabbit suggestions

---
 app/domain/data/competency_dto.py              |  2 +-
 app/pipeline/competency_extraction_pipeline.py | 14 +++++++-------
 app/web/routers/pipelines.py                   |  4 ++--
 app/web/status/status_update.py                |  4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/app/domain/data/competency_dto.py b/app/domain/data/competency_dto.py
index fb2bca7f..9561d0c1 100644
--- a/app/domain/data/competency_dto.py
+++ b/app/domain/data/competency_dto.py
@@ -45,6 +45,6 @@ def validate_title(cls, field):
     @validator("taxonomy")
     def validate_selected_taxonomy(cls, field):
         """Validate the selected taxonomy."""
-        if field not in CompetencyTaxonomy.__members__.keys():
+        if field not in CompetencyTaxonomy.__members__:
             raise ValueError(f"Invalid taxonomy: {field}")
         return field
diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py
index 94aba8d5..7716aff4 100644
--- a/app/pipeline/competency_extraction_pipeline.py
+++ b/app/pipeline/competency_extraction_pipeline.py
@@ -41,11 +41,11 @@ def __call__(
         **kwargs,
     ):
         if not dto.course_description:
-            self.callback.error("Course description is required")
+            raise ValueError("Course description is required")
         if not dto.taxonomy_options:
-            self.callback.error("Taxonomy options are required")
+            raise ValueError("Taxonomy options are required")
         if not dto.max_n:
-            self.callback.error("Non-zero max_n is required")
+            raise ValueError("Non-zero max_n is required")
 
         taxonomy_options = ", ".join(dto.taxonomy_options)
 
@@ -69,9 +69,9 @@ def __call__(
         # Find all competencies in the response
         competencies = response.split("\n\n")
         for i, competency in enumerate(competencies):
-            print(f"Processing competency {i + 1}: {competency}")
+            logger.debug(f"Processing competency {i + 1}: {competency}")
             if "{" not in competency or "}" not in competency:
-                print("Skipping competency without JSON")
+                logger.debug("Skipping competency without JSON")
                 continue
             # Get the competency JSON object
             start = competency.index("{")
@@ -79,11 +79,11 @@ def __call__(
             competency = competency[start:end]
             try:
                 competency = self.output_parser.parse(competency)
-                print(f"Generated competency: {competency}")
+                logger.debug(f"Generated competency: {competency}")
                 generated_competencies.append(competency)
                 self.callback.done(final_result=generated_competencies)
             except Exception as e:
-                print(f"Error generating competency: {e}")
+                logger.debug(f"Error generating competency: {e}")
                 self.callback.error(f"Error generating competency: {e}")
         # Mark all remaining competencies as skipped
         for i in range(len(generated_competencies), len(competencies)):
diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py
index 0373a3e7..5be6b07b 100644
--- a/app/web/routers/pipelines.py
+++ b/app/web/routers/pipelines.py
@@ -101,7 +101,7 @@ def run_competency_extraction_pipeline_worker(
         )
         pipeline = CompetencyExtractionPipeline(callback=callback)
     except Exception as e:
-        logger.error(f"Error preparing exercise chat pipeline: {e}")
+        logger.error(f"Error preparing competency extraction pipeline: {e}")
         logger.error(traceback.format_exc())
         capture_exception(e)
         return
@@ -109,7 +109,7 @@ def run_competency_extraction_pipeline_worker(
     try:
         pipeline(dto=dto)
     except Exception as e:
-        logger.error(f"Error running exercise chat pipeline: {e}")
+        logger.error(f"Error running competency extraction pipeline: {e}")
         logger.error(traceback.format_exc())
         callback.error("Fatal error.", exception=e)
 
diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py
index 6f64530c..abc2b1a5 100644
--- a/app/web/status/status_update.py
+++ b/app/web/status/status_update.py
@@ -229,8 +229,8 @@ def __init__(
         self,
         run_id: str,
         base_url: str,
-        initial_stages: List[StageDTO] = None,
-        num_iterations=10,
+        initial_stages: List[StageDTO],
+        num_iterations,
     ):
         url = f"{base_url}/api/public/pyris/pipelines/competency-extraction/runs/{run_id}/status"
         current_stage_index = 1 if initial_stages else 0

From 41df05028909e9ee6e9d869dd6a56598839dc893 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Fri, 9 Aug 2024 07:54:51 -0500
Subject: [PATCH 05/11] Format

---
 app/pipeline/chat/course_chat_pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py
index 42a046b0..17aca74a 100644
--- a/app/pipeline/chat/course_chat_pipeline.py
+++ b/app/pipeline/chat/course_chat_pipeline.py
@@ -266,7 +266,8 @@ def get_competency_list() -> list:
         def lecture_content_retrieval() -> str:
             """
             Retrieve content from indexed lecture slides.
-            This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most relevant paragraphs.
+            This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most
+            relevant paragraphs.
             Use this if you think it can be useful to answer the student's question, or if the student explicitly asks
             a question about the lecture content or slides.
             Only use this once.

From 117a6e2e31c4f053baeef49afeaef93c683619fd Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Fri, 9 Aug 2024 08:24:46 -0500
Subject: [PATCH 06/11] Add hard limit on generated competencies to max_n

---
 app/pipeline/competency_extraction_pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py
index 7716aff4..99ac0a28 100644
--- a/app/pipeline/competency_extraction_pipeline.py
+++ b/app/pipeline/competency_extraction_pipeline.py
@@ -66,8 +66,8 @@ def __call__(
 
         generated_competencies: list[Competency] = []
 
-        # Find all competencies in the response
-        competencies = response.split("\n\n")
+        # Find all competencies in the response up to the max_n
+        competencies = response.split("\n\n")[: dto.max_n]
         for i, competency in enumerate(competencies):
             logger.debug(f"Processing competency {i + 1}: {competency}")
             if "{" not in competency or "}" not in competency:

From a202b2cc57ca56001bf1115deb2fee09baeb567f Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Fri, 9 Aug 2024 14:19:55 -0500
Subject: [PATCH 07/11] Do not throw ValueError when calling callback.done()

---
 .../competency_extraction_pipeline.py         | 10 +++---
 app/web/status/status_update.py               | 32 ++++++++-----------
 2 files changed, 18 insertions(+), 24 deletions(-)

diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py
index 99ac0a28..c569f704 100644
--- a/app/pipeline/competency_extraction_pipeline.py
+++ b/app/pipeline/competency_extraction_pipeline.py
@@ -79,12 +79,12 @@ def __call__(
             competency = competency[start:end]
             try:
                 competency = self.output_parser.parse(competency)
-                logger.debug(f"Generated competency: {competency}")
-                generated_competencies.append(competency)
-                self.callback.done(final_result=generated_competencies)
             except Exception as e:
-                logger.debug(f"Error generating competency: {e}")
-                self.callback.error(f"Error generating competency: {e}")
+                logger.debug(f"Error parsing competency: {e}")
+                continue
+            logger.debug(f"Generated competency: {competency}")
+            generated_competencies.append(competency)
+            self.callback.done(final_result=generated_competencies)
         # Mark all remaining competencies as skipped
         for i in range(len(generated_competencies), len(competencies)):
             self.callback.skip(f"Skipping competency {i + 1}")
diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py
index abc2b1a5..979d0956 100644
--- a/app/web/status/status_update.py
+++ b/app/web/status/status_update.py
@@ -104,24 +104,19 @@ def done(
         If there is a next stage, set the current
         stage to the next stage.
         """
-        if self.stage.state == StageStateEnum.IN_PROGRESS:
-            self.stage.state = StageStateEnum.DONE
-            self.stage.message = message
-            self.status.result = final_result
-            if hasattr(self.status, "suggestions"):
-                self.status.suggestions = suggestions
-            next_stage = self.get_next_stage()
-            if next_stage is not None:
-                self.stage = next_stage
-                if next_stage_message:
-                    self.stage.message = next_stage_message
-                if start_next_stage:
-                    self.stage.state = StageStateEnum.IN_PROGRESS
-            self.on_status_update()
-        else:
-            raise ValueError(
-                "Invalid state transition to done. current state is ", self.stage.state
-            )
+        self.stage.state = StageStateEnum.DONE
+        self.stage.message = message
+        self.status.result = final_result
+        if hasattr(self.status, "suggestions"):
+            self.status.suggestions = suggestions
+        next_stage = self.get_next_stage()
+        if next_stage is not None:
+            self.stage = next_stage
+            if next_stage_message:
+                self.stage.message = next_stage_message
+            if start_next_stage:
+                self.stage.state = StageStateEnum.IN_PROGRESS
+        self.on_status_update()
 
     def error(self, message: str, exception=None):
         """
@@ -131,7 +126,6 @@ def error(self, message: str, exception=None):
         self.stage.state = StageStateEnum.ERROR
         self.stage.message = message
         self.status.result = None
-        self.stage.suggestions = None
         # Set all subsequent stages to SKIPPED if an error occurs
         rest_of_index = (
             self.current_stage_index + 1

From 6b81d36f7a7c3e19326a47d289fa7446f1f27ac9 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Fri, 9 Aug 2024 15:44:59 -0500
Subject: [PATCH 08/11] Tweak prompt

---
 app/pipeline/competency_extraction_pipeline.py |  5 +----
 app/pipeline/prompts/competency_extraction.py  | 13 ++++++-------
 app/web/routers/pipelines.py                   |  1 -
 app/web/status/status_update.py                | 13 +++++--------
 4 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py
index c569f704..396820f2 100644
--- a/app/pipeline/competency_extraction_pipeline.py
+++ b/app/pipeline/competency_extraction_pipeline.py
@@ -84,7 +84,4 @@ def __call__(
                 continue
             logger.debug(f"Generated competency: {competency}")
             generated_competencies.append(competency)
-            self.callback.done(final_result=generated_competencies)
-        # Mark all remaining competencies as skipped
-        for i in range(len(generated_competencies), len(competencies)):
-            self.callback.skip(f"Skipping competency {i + 1}")
+        self.callback.done(final_result=generated_competencies)
diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py
index 7caf10d2..74cedfa0 100644
--- a/app/pipeline/prompts/competency_extraction.py
+++ b/app/pipeline/prompts/competency_extraction.py
@@ -10,9 +10,10 @@
 The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words.
 
 - description:
-A detailed description of the competency in 3 to 6 bullet points.
-Each bullet point is a short sentence, at most 15 words.
+A detailed description of the competency in 3 to 5 bullet points.
 Each bullet point illustrates a specific skill or concept of the competency.
+Each bullet point is a complete sentence starting with "You" and containing at most 15 words.
+Each bullet point is on a new line and starts with "- ".
 
 - taxonomy:
 The classification of the competency within Bloom's taxonomy.
@@ -26,13 +27,11 @@
 - is small and fine-grained. Large topics should be broken down into smaller competencies.
 - does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed.
 
-Here is an example competency whose structure you should follow:
+Here is a template competency in JSON format:
 
 {{
-    "title": "Recursion",
-    "description": "- You understand the concept of recursion.
-    - You are able to understand complex recursive implementations.
-    - You are able to implement recursive solutions of medium difficulty independently.",
+    "title": "Competency Title",
+    "description": "- You understand this.\n- You are proficient in doing that.\n- You know how to do this.",
     "taxonomy": "ANALYZE"
 }}
 
diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py
index 5be6b07b..f92f0d68 100644
--- a/app/web/routers/pipelines.py
+++ b/app/web/routers/pipelines.py
@@ -97,7 +97,6 @@ def run_competency_extraction_pipeline_worker(
             run_id=dto.execution.settings.authentication_token,
             base_url=dto.execution.settings.artemis_base_url,
             initial_stages=dto.execution.initial_stages,
-            num_iterations=dto.max_n,
         )
         pipeline = CompetencyExtractionPipeline(callback=callback)
     except Exception as e:
diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py
index 979d0956..5867d70f 100644
--- a/app/web/status/status_update.py
+++ b/app/web/status/status_update.py
@@ -224,19 +224,16 @@ def __init__(
         run_id: str,
         base_url: str,
         initial_stages: List[StageDTO],
-        num_iterations,
     ):
         url = f"{base_url}/api/public/pyris/pipelines/competency-extraction/runs/{run_id}/status"
-        current_stage_index = 1 if initial_stages else 0
         stages = initial_stages or []
-        stages += [
+        stages.append(
             StageDTO(
                 weight=10,
                 state=StageStateEnum.NOT_STARTED,
-                name=f"Competency {i + 1}",
+                name="Generating Competencies",
             )
-            for i in range(num_iterations)
-        ]
+        )
         status = CompetencyExtractionStatusUpdateDTO(stages=stages)
-        stage = stages[current_stage_index]
-        super().__init__(url, run_id, status, stage, current_stage_index)
+        stage = stages[-1]
+        super().__init__(url, run_id, status, stage, len(stages) - 1)

From 0d590b743308163a971b38604ea963f3835969c4 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Tue, 20 Aug 2024 19:06:48 -0500
Subject: [PATCH 09/11] Avoid generating duplicate competencies

---
 .../competency_extraction_pipeline_execution_dto.py    |  9 ++++++---
 app/pipeline/competency_extraction_pipeline.py         |  9 +++++++++
 app/pipeline/prompts/competency_extraction.py          | 10 ++++++----
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/app/domain/competency_extraction_pipeline_execution_dto.py b/app/domain/competency_extraction_pipeline_execution_dto.py
index 497fa828..05a88167 100644
--- a/app/domain/competency_extraction_pipeline_execution_dto.py
+++ b/app/domain/competency_extraction_pipeline_execution_dto.py
@@ -1,14 +1,17 @@
-from typing import List, Optional
+from typing import List
 
 from pydantic import Field, BaseModel
 
 from . import PipelineExecutionDTO
-from .data.competency_dto import CompetencyTaxonomy
+from .data.competency_dto import CompetencyTaxonomy, Competency
 
 
 class CompetencyExtractionPipelineExecutionDTO(BaseModel):
     execution: PipelineExecutionDTO
-    course_description: Optional[str] = Field(alias="courseDescription")
+    course_description: str = Field(alias="courseDescription")
+    current_competencies: list[Competency] = Field(
+        alias="currentCompetencies", default=[]
+    )
     taxonomy_options: List[CompetencyTaxonomy] = Field(
         alias="taxonomyOptions", default=[]
     )
diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py
index 396820f2..da224ffe 100644
--- a/app/pipeline/competency_extraction_pipeline.py
+++ b/app/pipeline/competency_extraction_pipeline.py
@@ -48,11 +48,20 @@ def __call__(
             raise ValueError("Non-zero max_n is required")
 
         taxonomy_options = ", ".join(dto.taxonomy_options)
+        current_competencies = "\n\n".join(
+            [c.model_dump_json(indent=4) for c in dto.current_competencies]
+        )
+        if current_competencies:
+            current_competencies = (
+                f"\nHere are the current competencies in the course:\n{current_competencies}\n"
+                f"Do not repeat these competencies.\n"
+            )
 
         prompt = system_prompt.format(
             taxonomy_list=taxonomy_options,
             course_description=dto.course_description,
             max_n=dto.max_n,
+            current_competencies=current_competencies,
         )
         prompt = PyrisMessage(
             sender=IrisMessageRole.SYSTEM,
diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py
index 74cedfa0..4d87b6d4 100644
--- a/app/pipeline/prompts/competency_extraction.py
+++ b/app/pipeline/prompts/competency_extraction.py
@@ -10,9 +10,9 @@
 The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words.
 
 - description:
-A detailed description of the competency in 3 to 5 bullet points.
+A detailed description of the competency in 2 to 5 bullet points.
 Each bullet point illustrates a specific skill or concept of the competency.
-Each bullet point is a complete sentence starting with "You" and containing at most 15 words.
+Each bullet point is a complete sentence containing at most 15 words.
 Each bullet point is on a new line and starts with "- ".
 
 - taxonomy:
@@ -27,6 +27,8 @@
 - is small and fine-grained. Large topics should be broken down into smaller competencies.
 - does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed.
 
+Here is the provided course description: {course_description}
+
 Here is a template competency in JSON format:
 
 {{
@@ -35,8 +37,8 @@
     "taxonomy": "ANALYZE"
 }}
 
-Here is the provided course description: {course_description}
+{current_competencies}
 
-Respond with up to {max_n} competencies extracted from the course description,
+Respond with 0 to {max_n} competencies extracted from the course description,
 each in JSON format, split by two newlines.
 """

From 3507f45051411f6bab626c00540d7950461e7313 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Wed, 21 Aug 2024 09:54:59 -0500
Subject: [PATCH 10/11] Fix dumb import path

---
 app/web/status/status_update.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py
index 5867d70f..1f497f75 100644
--- a/app/web/status/status_update.py
+++ b/app/web/status/status_update.py
@@ -5,7 +5,7 @@
 import requests
 from abc import ABC
 
-from domain.status.competency_extraction_status_update_dto import (
+from ...domain.status.competency_extraction_status_update_dto import (
     CompetencyExtractionStatusUpdateDTO,
 )
 from ...domain.chat.course_chat.course_chat_status_update_dto import (

From 38dcfe334a031d0e10a588b7d35249efc2941b69 Mon Sep 17 00:00:00 2001
From: Michael Dyer <michaelowendyer@gmail.com>
Date: Wed, 21 Aug 2024 10:37:10 -0500
Subject: [PATCH 11/11] Remove unused import causing Black to fail

---
 app/llm/external/openai_chat.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py
index a05c49ac..27e2d080 100644
--- a/app/llm/external/openai_chat.py
+++ b/app/llm/external/openai_chat.py
@@ -7,7 +7,6 @@
 from openai import OpenAI
 from openai.lib.azure import AzureOpenAI
 from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam
-from openai.types.chat.completion_create_params import ResponseFormat
 from openai.types.shared_params import ResponseFormatJSONObject
 
 from ...common.message_converters import map_str_to_role, map_role_to_str