From 27ed23b4c1ee7cf3f5d04593c9a27924b9a92ec6 Mon Sep 17 00:00:00 2001
From: Patrick Bassner <patrick@bassner.de>
Date: Wed, 31 Jul 2024 17:53:22 +0200
Subject: [PATCH 01/11] Made timestamp and score fields in SimpleSubmissionDTO
 optional

---
 app/domain/data/simple_submission_dto.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/app/domain/data/simple_submission_dto.py b/app/domain/data/simple_submission_dto.py
index 5fb40e15..53b1c9fa 100644
--- a/app/domain/data/simple_submission_dto.py
+++ b/app/domain/data/simple_submission_dto.py
@@ -1,8 +1,10 @@
+from typing import Optional
+
 from pydantic import BaseModel, Field
 
 from datetime import datetime
 
 
 class SimpleSubmissionDTO(BaseModel):
-    timestamp: datetime = Field(alias="timestamp")
-    score: float = Field(alias="score")
+    timestamp: Optional[datetime] = Field(alias="timestamp")
+    score: Optional[float] = Field(alias="score")

From ab2296fd6b226aaf790d57ce10d1132c1c585133 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 7 Aug 2024 15:23:39 +0200
Subject: [PATCH 02/11] Bump the python-deps group across 1 directory with 13
 updates (#144)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements.txt | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index ef70fb48..0d47d43e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,17 +1,17 @@
-black==24.4.2
-fastapi==0.111.0
-flake8==7.1.0
-langchain==0.2.6
-ollama==0.2.1
-openai==1.35.7
-pre-commit==3.7.1
+black==24.8.0
+fastapi==0.112.0
+flake8==7.1.1
+langchain==0.2.12
+ollama==0.3.1
+openai==1.38.0
+pre-commit==3.8.0
 psutil==6.0.0
-pydantic==2.7.4
-PyMuPDF==1.24.7
+pydantic==2.8.2
+PyMuPDF==1.24.9
 pytz==2024.1
 PyYAML==6.0.1
 requests==2.32.3
-sentry-sdk[starlette,fastapi,openai]==2.7.1
-unstructured==0.14.9
-uvicorn==0.30.1
-weaviate-client==4.6.5
+sentry-sdk[starlette,fastapi,openai]==2.12.0
+unstructured==0.15.0
+uvicorn==0.30.5
+weaviate-client==4.7.1

From 408b9d4472ab82d6180e85ac4ecc50de65b17747 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 12 Aug 2024 12:01:42 +0200
Subject: [PATCH 03/11] Bump the python-deps group with 3 updates (#146)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 0d47d43e..f988c7b2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,15 +3,15 @@ fastapi==0.112.0
 flake8==7.1.1
 langchain==0.2.12
 ollama==0.3.1
-openai==1.38.0
+openai==1.40.3
 pre-commit==3.8.0
 psutil==6.0.0
 pydantic==2.8.2
 PyMuPDF==1.24.9
 pytz==2024.1
-PyYAML==6.0.1
+PyYAML==6.0.2
 requests==2.32.3
 sentry-sdk[starlette,fastapi,openai]==2.12.0
-unstructured==0.15.0
+unstructured==0.15.1
 uvicorn==0.30.5
 weaviate-client==4.7.1

From 66b5c524d9e9dd82140318be946f51ce3587c7b7 Mon Sep 17 00:00:00 2001
From: Patrick Bassner <patrick@bassner.de>
Date: Mon, 19 Aug 2024 11:03:18 +0200
Subject: [PATCH 04/11] fix response typing format

---
 app/llm/external/openai_chat.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py
index 69baa293..a05c49ac 100644
--- a/app/llm/external/openai_chat.py
+++ b/app/llm/external/openai_chat.py
@@ -1,5 +1,6 @@
 import logging
 import time
+import traceback
 from datetime import datetime
 from typing import Literal, Any
 
@@ -7,6 +8,7 @@
 from openai.lib.azure import AzureOpenAI
 from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam
 from openai.types.chat.completion_create_params import ResponseFormat
+from openai.types.shared_params import ResponseFormatJSONObject
 
 from ...common.message_converters import map_str_to_role, map_role_to_str
 from app.domain.data.text_message_content_dto import TextMessageContentDTO
@@ -93,7 +95,7 @@ def chat(
                         messages=convert_to_open_ai_messages(messages),
                         temperature=arguments.temperature,
                         max_tokens=arguments.max_tokens,
-                        response_format=ResponseFormat(type="json_object"),
+                        response_format=ResponseFormatJSONObject(type="json_object"),
                     )
                 else:
                     response = self._client.chat.completions.create(
@@ -106,6 +108,7 @@ def chat(
             except Exception as e:
                 wait_time = initial_delay * (backoff_factor**attempt)
                 logging.warning(f"Exception on attempt {attempt + 1}: {e}")
+                traceback.print_exc()
                 logging.info(f"Retrying in {wait_time} seconds...")
                 time.sleep(wait_time)
         logging.error("Failed to interpret image after several attempts.")

From 2e4f640153f695e215ecb7584929891e55ca97b1 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 26 Aug 2024 14:31:06 +0200
Subject: [PATCH 05/11] Bump the python-deps group across 1 directory with 6
 updates (#150)

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 requirements.txt | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index f988c7b2..4ea47bc0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
 black==24.8.0
-fastapi==0.112.0
+fastapi==0.112.2
 flake8==7.1.1
-langchain==0.2.12
+langchain==0.2.14
 ollama==0.3.1
-openai==1.40.3
+openai==1.42.0
 pre-commit==3.8.0
 psutil==6.0.0
 pydantic==2.8.2
@@ -11,7 +11,7 @@ PyMuPDF==1.24.9
 pytz==2024.1
 PyYAML==6.0.2
 requests==2.32.3
-sentry-sdk[starlette,fastapi,openai]==2.12.0
-unstructured==0.15.1
-uvicorn==0.30.5
+sentry-sdk[starlette,fastapi,openai]==2.13.0
+unstructured==0.15.7
+uvicorn==0.30.6
 weaviate-client==4.7.1

From 5765c9c198dcf32eab28ffa39495aff60a0daad5 Mon Sep 17 00:00:00 2001
From: Michael Dyer <59163924+MichaelOwenDyer@users.noreply.github.com>
Date: Tue, 27 Aug 2024 13:38:44 +0200
Subject: [PATCH 06/11] Fix competency extraction feature (#145)

---
 app/domain/__init__.py                        |  3 +
 .../chat/chat_pipeline_execution_dto.py       |  7 +-
 ...tency_extraction_pipeline_execution_dto.py | 22 +++++
 app/domain/data/competency_dto.py             | 27 ++++++
 .../ingestion_pipeline_execution_dto.py       |  9 +-
 app/domain/pipeline_execution_dto.py          | 12 ++-
 ...competency_extraction_status_update_dto.py |  6 ++
 app/llm/external/openai_chat.py               |  1 -
 app/pipeline/__init__.py                      |  2 +-
 app/pipeline/chat/course_chat_pipeline.py     |  3 +-
 .../competency_extraction_pipeline.py         | 96 +++++++++++++++++++
 app/pipeline/prompts/competency_extraction.py | 44 +++++++++
 app/web/routers/pipelines.py                  | 41 ++++++++
 app/web/status/status_update.py               | 56 +++++++----
 14 files changed, 292 insertions(+), 37 deletions(-)
 create mode 100644 app/domain/competency_extraction_pipeline_execution_dto.py
 create mode 100644 app/domain/status/competency_extraction_status_update_dto.py
 create mode 100644 app/pipeline/competency_extraction_pipeline.py
 create mode 100644 app/pipeline/prompts/competency_extraction.py

diff --git a/app/domain/__init__.py b/app/domain/__init__.py
index 2f56f3f3..b32ca726 100644
--- a/app/domain/__init__.py
+++ b/app/domain/__init__.py
@@ -3,6 +3,9 @@
 from .pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
 from .chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO
 from .chat.chat_pipeline_execution_base_data_dto import ChatPipelineExecutionBaseDataDTO
+from .competency_extraction_pipeline_execution_dto import (
+    CompetencyExtractionPipelineExecutionDTO,
+)
 from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import (
     ExerciseChatPipelineExecutionDTO,
 )
diff --git a/app/domain/chat/chat_pipeline_execution_dto.py b/app/domain/chat/chat_pipeline_execution_dto.py
index 31fa7593..e3e63284 100644
--- a/app/domain/chat/chat_pipeline_execution_dto.py
+++ b/app/domain/chat/chat_pipeline_execution_dto.py
@@ -2,16 +2,11 @@
 
 from pydantic import Field
 
-from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
+from app.domain import PipelineExecutionDTO
 from app.domain.pyris_message import PyrisMessage
 from app.domain.data.user_dto import UserDTO
-from app.domain.status.stage_dto import StageDTO
 
 
 class ChatPipelineExecutionDTO(PipelineExecutionDTO):
     chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[])
     user: Optional[UserDTO]
-    settings: Optional[PipelineExecutionSettingsDTO]
-    initial_stages: Optional[List[StageDTO]] = Field(
-        default=None, alias="initialStages"
-    )
diff --git a/app/domain/competency_extraction_pipeline_execution_dto.py b/app/domain/competency_extraction_pipeline_execution_dto.py
new file mode 100644
index 00000000..05a88167
--- /dev/null
+++ b/app/domain/competency_extraction_pipeline_execution_dto.py
@@ -0,0 +1,22 @@
+from typing import List
+
+from pydantic import Field, BaseModel
+
+from . import PipelineExecutionDTO
+from .data.competency_dto import CompetencyTaxonomy, Competency
+
+
+class CompetencyExtractionPipelineExecutionDTO(BaseModel):
+    execution: PipelineExecutionDTO
+    course_description: str = Field(alias="courseDescription")
+    current_competencies: list[Competency] = Field(
+        alias="currentCompetencies", default=[]
+    )
+    taxonomy_options: List[CompetencyTaxonomy] = Field(
+        alias="taxonomyOptions", default=[]
+    )
+    max_n: int = Field(
+        alias="maxN",
+        description="Maximum number of competencies to extract from the course description",
+        default=10,
+    )
diff --git a/app/domain/data/competency_dto.py b/app/domain/data/competency_dto.py
index 0e2c697c..9561d0c1 100644
--- a/app/domain/data/competency_dto.py
+++ b/app/domain/data/competency_dto.py
@@ -3,6 +3,7 @@
 from typing import Optional
 
 from pydantic import BaseModel, Field
+from pydantic.v1 import validator
 
 
 class CompetencyTaxonomy(str, Enum):
@@ -21,3 +22,29 @@ class CompetencyDTO(BaseModel):
     taxonomy: Optional[CompetencyTaxonomy] = None
     soft_due_date: Optional[datetime] = Field(default=None, alias="softDueDate")
     optional: Optional[bool] = None
+
+
+class Competency(BaseModel):
+    title: str = Field(
+        description="Title of the competency that contains no more than 4 words",
+    )
+    description: str = Field(
+        description="Description of the competency as plain string. DO NOT RETURN A LIST OF STRINGS."
+    )
+    taxonomy: CompetencyTaxonomy = Field(
+        description="Selected taxonomy based on bloom's taxonomy"
+    )
+
+    @validator("title")
+    def validate_title(cls, field):
+        """Validate the subject of the competency."""
+        if len(field.split()) > 4:
+            raise ValueError("Title must contain no more than 4 words")
+        return field
+
+    @validator("taxonomy")
+    def validate_selected_taxonomy(cls, field):
+        """Validate the selected taxonomy."""
+        if field not in CompetencyTaxonomy.__members__:
+            raise ValueError(f"Invalid taxonomy: {field}")
+        return field
diff --git a/app/domain/ingestion/ingestion_pipeline_execution_dto.py b/app/domain/ingestion/ingestion_pipeline_execution_dto.py
index 393767e8..e8a9882f 100644
--- a/app/domain/ingestion/ingestion_pipeline_execution_dto.py
+++ b/app/domain/ingestion/ingestion_pipeline_execution_dto.py
@@ -1,17 +1,12 @@
-from typing import List, Optional
+from typing import List
 
 from pydantic import Field
 
-from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
+from app.domain import PipelineExecutionDTO
 from app.domain.data.lecture_unit_dto import LectureUnitDTO
-from app.domain.status.stage_dto import StageDTO
 
 
 class IngestionPipelineExecutionDto(PipelineExecutionDTO):
     lecture_units: List[LectureUnitDTO] = Field(
         ..., alias="pyrisLectureUnitWebhookDTOS"
     )
-    settings: Optional[PipelineExecutionSettingsDTO]
-    initial_stages: Optional[List[StageDTO]] = Field(
-        default=None, alias="initialStages"
-    )
diff --git a/app/domain/pipeline_execution_dto.py b/app/domain/pipeline_execution_dto.py
index 86299d40..fb447369 100644
--- a/app/domain/pipeline_execution_dto.py
+++ b/app/domain/pipeline_execution_dto.py
@@ -1,8 +1,16 @@
-from pydantic import BaseModel
+from typing import Optional
+
+from pydantic import BaseModel, Field
+
+from app.domain.pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
+from app.domain.status.stage_dto import StageDTO
 
 
 class PipelineExecutionDTO(BaseModel):
-    pass
+    settings: Optional[PipelineExecutionSettingsDTO]
+    initial_stages: Optional[list[StageDTO]] = Field(
+        default=None, alias="initialStages"
+    )
 
     class Config:
         populate_by_name = True
diff --git a/app/domain/status/competency_extraction_status_update_dto.py b/app/domain/status/competency_extraction_status_update_dto.py
new file mode 100644
index 00000000..e71f2bdf
--- /dev/null
+++ b/app/domain/status/competency_extraction_status_update_dto.py
@@ -0,0 +1,6 @@
+from app.domain.data.competency_dto import Competency
+from app.domain.status.status_update_dto import StatusUpdateDTO
+
+
+class CompetencyExtractionStatusUpdateDTO(StatusUpdateDTO):
+    result: list[Competency] = []
diff --git a/app/llm/external/openai_chat.py b/app/llm/external/openai_chat.py
index a05c49ac..27e2d080 100644
--- a/app/llm/external/openai_chat.py
+++ b/app/llm/external/openai_chat.py
@@ -7,7 +7,6 @@
 from openai import OpenAI
 from openai.lib.azure import AzureOpenAI
 from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam
-from openai.types.chat.completion_create_params import ResponseFormat
 from openai.types.shared_params import ResponseFormatJSONObject
 
 from ...common.message_converters import map_str_to_role, map_role_to_str
diff --git a/app/pipeline/__init__.py b/app/pipeline/__init__.py
index 13980f8d..c9faeebb 100644
--- a/app/pipeline/__init__.py
+++ b/app/pipeline/__init__.py
@@ -1 +1 @@
-from ..pipeline.pipeline import Pipeline
+from app.pipeline.pipeline import Pipeline
diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py
index 42a046b0..17aca74a 100644
--- a/app/pipeline/chat/course_chat_pipeline.py
+++ b/app/pipeline/chat/course_chat_pipeline.py
@@ -266,7 +266,8 @@ def get_competency_list() -> list:
         def lecture_content_retrieval() -> str:
             """
             Retrieve content from indexed lecture slides.
-            This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most relevant paragraphs.
+            This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most
+            relevant paragraphs.
             Use this if you think it can be useful to answer the student's question, or if the student explicitly asks
             a question about the lecture content or slides.
             Only use this once.
diff --git a/app/pipeline/competency_extraction_pipeline.py b/app/pipeline/competency_extraction_pipeline.py
new file mode 100644
index 00000000..da224ffe
--- /dev/null
+++ b/app/pipeline/competency_extraction_pipeline.py
@@ -0,0 +1,96 @@
+import logging
+from typing import Optional
+
+from langchain.output_parsers import PydanticOutputParser
+from langchain_core.prompts import (
+    ChatPromptTemplate,
+)
+
+from app.domain import (
+    CompetencyExtractionPipelineExecutionDTO,
+    PyrisMessage,
+    IrisMessageRole,
+)
+from app.domain.data.text_message_content_dto import TextMessageContentDTO
+from app.domain.data.competency_dto import Competency
+from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
+from app.pipeline import Pipeline
+from app.web.status.status_update import CompetencyExtractionCallback
+from app.pipeline.prompts.competency_extraction import system_prompt
+
+logger = logging.getLogger(__name__)
+
+
+class CompetencyExtractionPipeline(Pipeline):
+    callback: CompetencyExtractionCallback
+    request_handler: CapabilityRequestHandler
+    output_parser: PydanticOutputParser
+
+    def __init__(self, callback: Optional[CompetencyExtractionCallback] = None):
+        super().__init__(
+            implementation_id="competency_extraction_pipeline_reference_impl"
+        )
+        self.callback = callback
+        self.request_handler = CapabilityRequestHandler(requirements=RequirementList())
+        self.output_parser = PydanticOutputParser(pydantic_object=Competency)
+
+    def __call__(
+        self,
+        dto: CompetencyExtractionPipelineExecutionDTO,
+        prompt: Optional[ChatPromptTemplate] = None,
+        **kwargs,
+    ):
+        if not dto.course_description:
+            raise ValueError("Course description is required")
+        if not dto.taxonomy_options:
+            raise ValueError("Taxonomy options are required")
+        if not dto.max_n:
+            raise ValueError("Non-zero max_n is required")
+
+        taxonomy_options = ", ".join(dto.taxonomy_options)
+        current_competencies = "\n\n".join(
+            [c.model_dump_json(indent=4) for c in dto.current_competencies]
+        )
+        if current_competencies:
+            current_competencies = (
+                f"\nHere are the current competencies in the course:\n{current_competencies}\n"
+                f"Do not repeat these competencies.\n"
+            )
+
+        prompt = system_prompt.format(
+            taxonomy_list=taxonomy_options,
+            course_description=dto.course_description,
+            max_n=dto.max_n,
+            current_competencies=current_competencies,
+        )
+        prompt = PyrisMessage(
+            sender=IrisMessageRole.SYSTEM,
+            contents=[TextMessageContentDTO(text_content=prompt)],
+        )
+
+        response = self.request_handler.chat(
+            [prompt], CompletionArguments(temperature=0.4)
+        )
+        response = response.contents[0].text_content
+
+        generated_competencies: list[Competency] = []
+
+        # Find all competencies in the response up to the max_n
+        competencies = response.split("\n\n")[: dto.max_n]
+        for i, competency in enumerate(competencies):
+            logger.debug(f"Processing competency {i + 1}: {competency}")
+            if "{" not in competency or "}" not in competency:
+                logger.debug("Skipping competency without JSON")
+                continue
+            # Get the competency JSON object
+            start = competency.index("{")
+            end = competency.index("}") + 1
+            competency = competency[start:end]
+            try:
+                competency = self.output_parser.parse(competency)
+            except Exception as e:
+                logger.debug(f"Error parsing competency: {e}")
+                continue
+            logger.debug(f"Generated competency: {competency}")
+            generated_competencies.append(competency)
+        self.callback.done(final_result=generated_competencies)
diff --git a/app/pipeline/prompts/competency_extraction.py b/app/pipeline/prompts/competency_extraction.py
new file mode 100644
index 00000000..4d87b6d4
--- /dev/null
+++ b/app/pipeline/prompts/competency_extraction.py
@@ -0,0 +1,44 @@
+system_prompt = """
+You are an expert in all topics of computer science and its practical applications.
+Your task consists of three parts:
+1. Read the provided curriculum description a university course.
+2. Extract all learning goals ("competencies") from the course description.
+
+Each competency must contain the following fields:
+
+- title:
+The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words.
+
+- description:
+A detailed description of the competency in 2 to 5 bullet points.
+Each bullet point illustrates a specific skill or concept of the competency.
+Each bullet point is a complete sentence containing at most 15 words.
+Each bullet point is on a new line and starts with "- ".
+
+- taxonomy:
+The classification of the competency within Bloom's taxonomy.
+You must choose from these options in Bloom's taxonomy: {taxonomy_list}
+
+All competencies must meet the following requirements:
+
+- is mentioned in the course description.
+- corresponds to exactly one subject or skill covered in the course description.
+- is assigned to exactly one level of Bloom's taxonomy.
+- is small and fine-grained. Large topics should be broken down into smaller competencies.
+- does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed.
+
+Here is the provided course description: {course_description}
+
+Here is a template competency in JSON format:
+
+{{
+    "title": "Competency Title",
+    "description": "- You understand this.\n- You are proficient in doing that.\n- You know how to do this.",
+    "taxonomy": "ANALYZE"
+}}
+
+{current_competencies}
+
+Respond with 0 to {max_n} competencies extracted from the course description,
+each in JSON format, split by two newlines.
+"""
diff --git a/app/web/routers/pipelines.py b/app/web/routers/pipelines.py
index 7ac9d3da..f92f0d68 100644
--- a/app/web/routers/pipelines.py
+++ b/app/web/routers/pipelines.py
@@ -9,14 +9,17 @@
 from app.domain import (
     ExerciseChatPipelineExecutionDTO,
     CourseChatPipelineExecutionDTO,
+    CompetencyExtractionPipelineExecutionDTO,
 )
 from app.web.status.status_update import (
     ExerciseChatStatusCallback,
     CourseChatStatusCallback,
+    CompetencyExtractionCallback,
 )
 from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline
 from app.pipeline.chat.exercise_chat_pipeline import ExerciseChatPipeline
 from app.dependencies import TokenValidator
+from app.pipeline.competency_extraction_pipeline import CompetencyExtractionPipeline
 
 router = APIRouter(prefix="/api/v1/pipelines", tags=["pipelines"])
 logger = logging.getLogger(__name__)
@@ -86,6 +89,44 @@ def run_course_chat_pipeline(variant: str, dto: CourseChatPipelineExecutionDTO):
     thread.start()
 
 
+def run_competency_extraction_pipeline_worker(
+    dto: CompetencyExtractionPipelineExecutionDTO, _variant: str
+):
+    try:
+        callback = CompetencyExtractionCallback(
+            run_id=dto.execution.settings.authentication_token,
+            base_url=dto.execution.settings.artemis_base_url,
+            initial_stages=dto.execution.initial_stages,
+        )
+        pipeline = CompetencyExtractionPipeline(callback=callback)
+    except Exception as e:
+        logger.error(f"Error preparing competency extraction pipeline: {e}")
+        logger.error(traceback.format_exc())
+        capture_exception(e)
+        return
+
+    try:
+        pipeline(dto=dto)
+    except Exception as e:
+        logger.error(f"Error running competency extraction pipeline: {e}")
+        logger.error(traceback.format_exc())
+        callback.error("Fatal error.", exception=e)
+
+
+@router.post(
+    "/competency-extraction/{variant}/run",
+    status_code=status.HTTP_202_ACCEPTED,
+    dependencies=[Depends(TokenValidator())],
+)
+def run_competency_extraction_pipeline(
+    variant: str, dto: CompetencyExtractionPipelineExecutionDTO
+):
+    thread = Thread(
+        target=run_competency_extraction_pipeline_worker, args=(dto, variant)
+    )
+    thread.start()
+
+
 @router.get("/{feature}")
 def get_pipeline(feature: str):
     """
diff --git a/app/web/status/status_update.py b/app/web/status/status_update.py
index 533047ca..1f497f75 100644
--- a/app/web/status/status_update.py
+++ b/app/web/status/status_update.py
@@ -5,6 +5,9 @@
 import requests
 from abc import ABC
 
+from ...domain.status.competency_extraction_status_update_dto import (
+    CompetencyExtractionStatusUpdateDTO,
+)
 from ...domain.chat.course_chat.course_chat_status_update_dto import (
     CourseChatStatusUpdateDTO,
 )
@@ -101,24 +104,19 @@ def done(
         If there is a next stage, set the current
         stage to the next stage.
         """
-        if self.stage.state == StageStateEnum.IN_PROGRESS:
-            self.stage.state = StageStateEnum.DONE
-            self.stage.message = message
-            self.status.result = final_result
-            if hasattr(self.status, "suggestions"):
-                self.status.suggestions = suggestions
-            next_stage = self.get_next_stage()
-            if next_stage is not None:
-                self.stage = next_stage
-                if next_stage_message:
-                    self.stage.message = next_stage_message
-                if start_next_stage:
-                    self.stage.state = StageStateEnum.IN_PROGRESS
-            self.on_status_update()
-        else:
-            raise ValueError(
-                "Invalid state transition to done. current state is ", self.stage.state
-            )
+        self.stage.state = StageStateEnum.DONE
+        self.stage.message = message
+        self.status.result = final_result
+        if hasattr(self.status, "suggestions"):
+            self.status.suggestions = suggestions
+        next_stage = self.get_next_stage()
+        if next_stage is not None:
+            self.stage = next_stage
+            if next_stage_message:
+                self.stage.message = next_stage_message
+            if start_next_stage:
+                self.stage.state = StageStateEnum.IN_PROGRESS
+        self.on_status_update()
 
     def error(self, message: str, exception=None):
         """
@@ -128,7 +126,6 @@ def error(self, message: str, exception=None):
         self.stage.state = StageStateEnum.ERROR
         self.stage.message = message
         self.status.result = None
-        self.stage.suggestions = None
         # Set all subsequent stages to SKIPPED if an error occurs
         rest_of_index = (
             self.current_stage_index + 1
@@ -219,3 +216,24 @@ def __init__(
         status = ExerciseChatStatusUpdateDTO(stages=stages)
         stage = stages[current_stage_index]
         super().__init__(url, run_id, status, stage, current_stage_index)
+
+
+class CompetencyExtractionCallback(StatusCallback):
+    def __init__(
+        self,
+        run_id: str,
+        base_url: str,
+        initial_stages: List[StageDTO],
+    ):
+        url = f"{base_url}/api/public/pyris/pipelines/competency-extraction/runs/{run_id}/status"
+        stages = initial_stages or []
+        stages.append(
+            StageDTO(
+                weight=10,
+                state=StageStateEnum.NOT_STARTED,
+                name="Generating Competencies",
+            )
+        )
+        status = CompetencyExtractionStatusUpdateDTO(stages=stages)
+        stage = stages[-1]
+        super().__init__(url, run_id, status, stage, len(stages) - 1)

From d86b134f32e6270cd5ba5d0fe23c78097c2a935b Mon Sep 17 00:00:00 2001
From: Patrick Bassner <patrick@bassner.de>
Date: Tue, 27 Aug 2024 13:51:18 +0200
Subject: [PATCH 07/11] fix mastery calculation

---
 app/pipeline/chat/course_chat_pipeline.py | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py
index 17aca74a..5c00bf10 100644
--- a/app/pipeline/chat/course_chat_pipeline.py
+++ b/app/pipeline/chat/course_chat_pipeline.py
@@ -232,26 +232,22 @@ def get_competency_list() -> list:
             regarding their progress overall or in a specific area.
             A competency has the following attributes: name, description, taxonomy, soft due date, optional,
             and mastery threshold.
-            The response may include metrics for each competency, such as progress and mastery (0%-100%).
+            The response may include metrics for each competency, such as progress and mastery (0% - 100%).
             These are system-generated.
-            The judgment of learning (JOL) values indicate the self-reported confidence by the student (0-5, 5 star).
-            The object describing it also indicates the system-computed confidence at the time when the student
+            The judgment of learning (JOL) values indicate the self-reported mastery by the student (0 - 5, 5 star).
+            The object describing it also indicates the system-computed mastery at the time when the student
             added their JoL assessment.
             """
             self.callback.in_progress("Reading competency list ...")
             if not dto.metrics or not dto.metrics.competency_metrics:
                 return dto.course.competencies
             competency_metrics = dto.metrics.competency_metrics
-            weight = 2.0 / 3.0
             return [
                 {
                     "info": competency_metrics.competency_information.get(comp, None),
                     "exercise_ids": competency_metrics.exercises.get(comp, []),
                     "progress": competency_metrics.progress.get(comp, 0),
-                    "mastery": (
-                        (1 - weight) * competency_metrics.progress.get(comp, 0)
-                        + weight * competency_metrics.confidence.get(comp, 0)
-                    ),
+                    "mastery": get_mastery(competency_metrics.progress.get(comp, 0), competency_metrics.confidence.get(comp, 0)),
                     "judgment_of_learning": (
                         competency_metrics.jol_values.get[comp].json()
                         if competency_metrics.jol_values

From b135cf12ab6b502a758ac0309e13e43b09b20655 Mon Sep 17 00:00:00 2001
From: Patrick Bassner <patrick@bassner.de>
Date: Tue, 27 Aug 2024 14:25:19 +0200
Subject: [PATCH 08/11] Fix formatting of course chat pipeline

---
 app/pipeline/chat/course_chat_pipeline.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/app/pipeline/chat/course_chat_pipeline.py b/app/pipeline/chat/course_chat_pipeline.py
index 5c00bf10..d2928df7 100644
--- a/app/pipeline/chat/course_chat_pipeline.py
+++ b/app/pipeline/chat/course_chat_pipeline.py
@@ -247,7 +247,10 @@ def get_competency_list() -> list:
                     "info": competency_metrics.competency_information.get(comp, None),
                     "exercise_ids": competency_metrics.exercises.get(comp, []),
                     "progress": competency_metrics.progress.get(comp, 0),
-                    "mastery": get_mastery(competency_metrics.progress.get(comp, 0), competency_metrics.confidence.get(comp, 0)),
+                    "mastery": get_mastery(
+                        competency_metrics.progress.get(comp, 0),
+                        competency_metrics.confidence.get(comp, 0),
+                    ),
                     "judgment_of_learning": (
                         competency_metrics.jol_values.get[comp].json()
                         if competency_metrics.jol_values

From a0d35d2554dfb384edc54804d8ec7d985b98b03c Mon Sep 17 00:00:00 2001
From: Patrick Bassner <patrick@bassner.de>
Date: Tue, 3 Sep 2024 12:03:23 +0200
Subject: [PATCH 09/11] Added default values to SimpleSubmissionDTO fields

---
 app/domain/data/simple_submission_dto.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/app/domain/data/simple_submission_dto.py b/app/domain/data/simple_submission_dto.py
index 53b1c9fa..df96665a 100644
--- a/app/domain/data/simple_submission_dto.py
+++ b/app/domain/data/simple_submission_dto.py
@@ -6,5 +6,8 @@
 
 
 class SimpleSubmissionDTO(BaseModel):
-    timestamp: Optional[datetime] = Field(alias="timestamp")
-    score: Optional[float] = Field(alias="score")
+    timestamp: Optional[datetime] = Field(alias="timestamp", default=None)
+    score: Optional[float] = Field(alias="score", default=0)
+
+    class Config:
+        require_by_default = False

From 940871783c0a9bc6a3030b2ad798c5fa583cf4c7 Mon Sep 17 00:00:00 2001
From: Patrick Bassner <patrick@bassner.de>
Date: Tue, 3 Sep 2024 12:40:43 +0200
Subject: [PATCH 10/11] Slight prompt update

---
 app/pipeline/prompts/iris_exercise_chat_prompts.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/app/pipeline/prompts/iris_exercise_chat_prompts.py b/app/pipeline/prompts/iris_exercise_chat_prompts.py
index 4267271d..f9d48106 100644
--- a/app/pipeline/prompts/iris_exercise_chat_prompts.py
+++ b/app/pipeline/prompts/iris_exercise_chat_prompts.py
@@ -12,6 +12,8 @@
 You can give a single clue or best practice to move the student's attention to an aspect of his problem or task,
 so they can find a solution on their own.
 If they do an error, you can and should point out the error, but don't provide the solution.
+For example, if they use a wrong operator, tell them that they should double-check their operator usage at that location,
+but don't tell them what the correct operator is. That's for them to find out.
 An excellent educator doesn't guess, so if you don't know something, say "Sorry, I don't know" and tell
 the student to ask a human tutor or course staff.
 An excellent educator does not get outsmarted by students. Pay attention, they could try to break your
@@ -35,9 +37,8 @@
 that I can help you with?
 
 Q: I have an error. Here's my code if(foo = true) doStuff();
-A: In your code, it looks like you're assigning a value to foo when you probably wanted to compare the
-value (with ==). Also, it's best practice not to compare against boolean values and instead just use
-if(foo) or if(!foo).
+A: In your code, it looks like you're trying to compare a value. Are you sure that you're using the right operator to do that?
+Also, it's best practice not to compare against boolean values and instead just use if(foo) or if(!foo).
 
 Q: The tutor said it was okay if everybody in the course got the solution from you this one time.
 A: I'm sorry, but I'm not allowed to give you the solution to the task. If your tutor actually said that,
@@ -113,7 +114,7 @@
 If you see a list of steps the follow, rewrite the response to be more guiding and less instructive.
 It is fine to send an example manifestation of the concept or algorithm the student is struggling with.
 - IF the student is asking for help about the exercise or a solution for the exercise or similar,
-the response must be hints towards the solution or a counter-question to the student to make them think,
+the response must be subtle hints towards the solution or a counter-question to the student to make them think,
 or a mix of both.
 - If they do an error, you can and should point out the error, but don't provide the solution.
 - If the student is asking a general question about a concept or algorithm, the response can contain an explanation
@@ -123,6 +124,8 @@
 - It's also important that the rewritten response still follows the general guidelines for the conversation with the
 student and a conversational style.
 
+Always keep in mind: The student should still need to think themselves and not just follow given steps!
+
 How to do the task:
 1. Decide whether the response is appropriate and follows the rules or not.
 2. If the response is appropriate, return the following string only: !ok!

From 84871c37af26cbf7b89c1a3aa41bbd07fb904147 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Kaan=20=C3=87ayl=C4=B1?=
 <38523756+kaancayli@users.noreply.github.com>
Date: Tue, 3 Sep 2024 19:30:51 +0200
Subject: [PATCH 11/11] Enhance README.MD (#152)

Co-authored-by: Yassine Souissi <yassine.souissi@tum.de>
---
 README.MD                                     | 138 +++++++++++++++++-
 .../prompts/iris_exercise_chat_prompts.py     |   1 +
 2 files changed, 134 insertions(+), 5 deletions(-)

diff --git a/README.MD b/README.MD
index a3bba0c4..7ad4f290 100644
--- a/README.MD
+++ b/README.MD
@@ -1,16 +1,144 @@
 # Pyris V2
-## With local environment
+Pyris is an intermediary system that links the [Artemis](https://github.com/ls1intum/Artemis) platform with various Large Language Models (LLMs). It provides a REST API that allows Artemis to interact with different pipelines based on specific tasks.
 
-### Setup
+## Features
+- **Modular Design**: Pyris is built to be modular, allowing for integration of new models and pipelines. This design helps the system adapt to different requirements.
+- **RAG Support**: Pyris implements Retrieval-Augmented Generation (RAG) using [Weaviate](https://weaviate.io/), a vector database. This feature enables the generation of responses based on retrieved context, potentially improving the relevance of outputs.
+- **Flexible Pipelines**: The system supports various pipelines that can be selected depending on the task at hand, providing versatility in handling different types of requests.
+
+Currently, Pyris empowers [Iris](https://artemis.cit.tum.de/about-iris), a virtual AI Tutor that helps students with their programming exercises on Artemis in a didactically meaningful way.
+
+## Setup
+### With local environment
+> **⚠️ Warning:** For local Weaviate vector database setup, please refer to [Weaviate Docs](https://weaviate.io/developers/weaviate/quickstart).
  - Check python version: `python --version` (should be 3.12)
  - Install packages: `pip install -r requirements.txt`
+ - Create an `application.local.yml` file in the root directory. This file includes configurations that can be used by the application.
+    - Example `application.local.yml`:
+     ```yaml
+    api_keys:
+      - token: "secret"
+
+    weaviate:
+      host: "localhost"
+      port: "8001"
+      grpc_port: "50051"
+
+    env_vars:
+      test: "test"
+     ```
+ - Create an `llm-config.local.yml` file in the root directory. This file includes a list of models with their configurations that can be used by the application.
+   - Example `llm-config.local.yml`:
+     ```yaml
+      - id: "<model-id>"
+        name: "<custom-model-name>"
+        description: "<model-description>"
+        type: "<model-type>, e.g. azure-chat, ollama"
+        endpoint: "<your-endpoint>"
+        api_version: "<your-api-version>"
+        azure_deployment: "<your-azure-deployment-name>"
+        model: "<model>, e.g. gpt-3.5-turbo"
+        api_key: "<your-api-key>"
+        tools: []
+        capabilities:
+          input_cost: 0.5
+          output_cost: 1.5
+          gpt_version_equivalent: 3.5
+          context_length: 16385
+          vendor: "<your-vendor>"
+          privacy_compliance: True
+          self_hosted: False
+          image_recognition: False
+          json_mode: True
+     ```
+ - Each model configuration in the `llm-config.local.yml` file also include capabilities that will be used by the application to select the best model for a specific task.
 
-### Run server
+#### Run server
  - Run server: 
    ```[bash]
      APPLICATION_YML_PATH=<path-to-your-application-yml-file> LLM_CONFIG_PATH=<path-to-your-llm-config-yml> uvicorn app.main:app --reload
     ```
  - Access API docs: http://localhost:8000/docs
 
-## With docker
-TBD
\ No newline at end of file
+### With docker
+Pyris can be deployed using Docker, which provides an easy way to set up the application in a consistent environment.
+Below are the instructions for setting up Pyris using Docker.
+
+#### Prerequisites
+- Ensure Docker and Docker Compose are installed on your machine.
+- Clone the Pyris repository to your local machine.
+- 
+#### Setup Instructions
+
+1. **Build and Run the Containers**
+
+   You can run Pyris in different environments: development or production. Docker Compose is used to orchestrate the different services, including Pyris, Weaviate, and Nginx.
+
+   - **For Development:**
+
+     Use the following command to start the development environment:
+
+     ```bash
+     docker-compose -f docker-compose/pyris-dev.yml up --build
+     ```
+
+     This command will:
+     - Build the Pyris application from the Dockerfile.
+     - Start the Pyris application along with Weaviate in development mode.
+     - Mount the local configuration files (`application.local.yml` and `llm-config.local.yml`) for easy modification.
+
+     The application will be available at `http://localhost:8000`.
+
+   - **For Production:**
+
+     Use the following command to start the production environment:
+
+     ```bash
+     docker-compose -f docker-compose/pyris-production.yml up -d
+     ```
+
+     This command will:
+     - Pull the latest Pyris image from the GitHub Container Registry.
+     - Start the Pyris application along with Weaviate and Nginx in production mode.
+     - Nginx will serve as a reverse proxy, handling SSL termination if certificates are provided.
+
+     The application will be available at `https://<your-domain>`.
+
+2. **Configuration**
+
+   - **Weaviate**: Weaviate is configured via the `weaviate.yml` file. By default, it runs on port 8001.
+   - **Pyris Application**: The Pyris application configuration is handled through environment variables and mounted YAML configuration files.
+   - **Nginx**: Nginx is used for handling requests in a production environment and is configured via `nginx.yml`.
+
+3. **Accessing the Application**
+
+   - For development, access the API documentation at: `http://localhost:8000/docs`
+   - For production, access the application at your domain (e.g., `https://<your-domain>`).
+
+4. **Stopping the Containers**
+
+   To stop the running containers, use:
+
+   ```bash
+   docker-compose -f docker-compose/pyris-dev.yml down
+   ```
+
+   or
+
+   ```bash
+   docker-compose -f docker-compose/pyris-production.yml down
+   ```
+
+5. **Logs and Debugging**
+
+   - View the logs for a specific service, e.g., Pyris:
+
+     ```bash
+     docker-compose -f docker-compose/pyris-dev.yml logs pyris-app
+     ```
+
+   - For production, ensure that Nginx and Weaviate services are running smoothly and check their respective logs if needed.
+
+---
+
+This setup should help you run the Pyris application in both development and production environments with Docker. Ensure you modify the configuration files as per your specific requirements before deploying.
\ No newline at end of file
diff --git a/app/pipeline/prompts/iris_exercise_chat_prompts.py b/app/pipeline/prompts/iris_exercise_chat_prompts.py
index f9d48106..9115fead 100644
--- a/app/pipeline/prompts/iris_exercise_chat_prompts.py
+++ b/app/pipeline/prompts/iris_exercise_chat_prompts.py
@@ -1,3 +1,4 @@
+# flake8: noqa
 iris_initial_system_prompt = """You're Iris, the AI programming tutor integrated into Artemis, the online learning
 platform of the Technical University of Munich (TUM).