Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix competency extraction feature #145

Merged
merged 14 commits into from
Aug 27, 2024
Merged
3 changes: 3 additions & 0 deletions app/domain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from .pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
from .chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO
from .chat.chat_pipeline_execution_base_data_dto import ChatPipelineExecutionBaseDataDTO
from .competency_extraction_pipeline_execution_dto import (
CompetencyExtractionPipelineExecutionDTO,
)
MichaelOwenDyer marked this conversation as resolved.
Show resolved Hide resolved
from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import (
ExerciseChatPipelineExecutionDTO,
)
Expand Down
7 changes: 1 addition & 6 deletions app/domain/chat/chat_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,11 @@

from pydantic import Field

from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
from app.domain import PipelineExecutionDTO
from app.domain.pyris_message import PyrisMessage
from app.domain.data.user_dto import UserDTO
from app.domain.status.stage_dto import StageDTO


class ChatPipelineExecutionDTO(PipelineExecutionDTO):
chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[])
user: Optional[UserDTO]
settings: Optional[PipelineExecutionSettingsDTO]
initial_stages: Optional[List[StageDTO]] = Field(
default=None, alias="initialStages"
)
19 changes: 19 additions & 0 deletions app/domain/competency_extraction_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import List, Optional

from pydantic import Field, BaseModel

from . import PipelineExecutionDTO
from .data.competency_dto import CompetencyTaxonomy


class CompetencyExtractionPipelineExecutionDTO(BaseModel):
execution: PipelineExecutionDTO
course_description: Optional[str] = Field(alias="courseDescription")
taxonomy_options: List[CompetencyTaxonomy] = Field(
alias="taxonomyOptions", default=[]
)
max_n: int = Field(
alias="maxN",
description="Maximum number of competencies to extract from the course description",
default=10,
)
27 changes: 27 additions & 0 deletions app/domain/data/competency_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Optional

from pydantic import BaseModel, Field
from pydantic.v1 import validator


class CompetencyTaxonomy(str, Enum):
Expand All @@ -21,3 +22,29 @@ class CompetencyDTO(BaseModel):
taxonomy: Optional[CompetencyTaxonomy] = None
soft_due_date: Optional[datetime] = Field(default=None, alias="softDueDate")
optional: Optional[bool] = None


class Competency(BaseModel):
title: str = Field(
description="Title of the competency that contains no more than 4 words",
)
description: str = Field(
description="Description of the competency as plain string. DO NOT RETURN A LIST OF STRINGS."
)
taxonomy: CompetencyTaxonomy = Field(
description="Selected taxonomy based on bloom's taxonomy"
)

@validator("title")
def validate_title(cls, field):
"""Validate the subject of the competency."""
if len(field.split()) > 4:
raise ValueError("Title must contain no more than 4 words")
return field

@validator("taxonomy")
def validate_selected_taxonomy(cls, field):
"""Validate the selected taxonomy."""
if field not in CompetencyTaxonomy.__members__:
raise ValueError(f"Invalid taxonomy: {field}")
return field
9 changes: 2 additions & 7 deletions app/domain/ingestion/ingestion_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
from typing import List, Optional
from typing import List

from pydantic import Field

from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
from app.domain import PipelineExecutionDTO
from app.domain.data.lecture_unit_dto import LectureUnitDTO
from app.domain.status.stage_dto import StageDTO


class IngestionPipelineExecutionDto(PipelineExecutionDTO):
lecture_units: List[LectureUnitDTO] = Field(
..., alias="pyrisLectureUnitWebhookDTOS"
)
settings: Optional[PipelineExecutionSettingsDTO]
initial_stages: Optional[List[StageDTO]] = Field(
default=None, alias="initialStages"
)
12 changes: 10 additions & 2 deletions app/domain/pipeline_execution_dto.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from pydantic import BaseModel
from typing import Optional

from pydantic import BaseModel, Field

from app.domain.pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
from app.domain.status.stage_dto import StageDTO


class PipelineExecutionDTO(BaseModel):
pass
settings: Optional[PipelineExecutionSettingsDTO]
initial_stages: Optional[list[StageDTO]] = Field(
default=None, alias="initialStages"
)

class Config:
populate_by_name = True
6 changes: 6 additions & 0 deletions app/domain/status/competency_extraction_status_update_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from app.domain.data.competency_dto import Competency
from app.domain.status.status_update_dto import StatusUpdateDTO


class CompetencyExtractionStatusUpdateDTO(StatusUpdateDTO):
result: list[Competency] = []
2 changes: 1 addition & 1 deletion app/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ..pipeline.pipeline import Pipeline
from app.pipeline.pipeline import Pipeline
3 changes: 2 additions & 1 deletion app/pipeline/chat/course_chat_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ def get_competency_list() -> list:
def lecture_content_retrieval() -> str:
"""
Retrieve content from indexed lecture slides.
This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most relevant paragraphs.
This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most
relevant paragraphs.
Use this if you think it can be useful to answer the student's question, or if the student explicitly asks
a question about the lecture content or slides.
Only use this once.
Expand Down
90 changes: 90 additions & 0 deletions app/pipeline/competency_extraction_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import logging
from typing import Optional

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import (
ChatPromptTemplate,
)

from app.domain import (
CompetencyExtractionPipelineExecutionDTO,
PyrisMessage,
IrisMessageRole,
)
from app.domain.data.text_message_content_dto import TextMessageContentDTO
from app.domain.data.competency_dto import Competency
from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
from app.pipeline import Pipeline
from app.web.status.status_update import CompetencyExtractionCallback
from app.pipeline.prompts.competency_extraction import system_prompt

logger = logging.getLogger(__name__)


class CompetencyExtractionPipeline(Pipeline):
callback: CompetencyExtractionCallback
request_handler: CapabilityRequestHandler
output_parser: PydanticOutputParser

def __init__(self, callback: Optional[CompetencyExtractionCallback] = None):
super().__init__(
implementation_id="competency_extraction_pipeline_reference_impl"
)
self.callback = callback
self.request_handler = CapabilityRequestHandler(requirements=RequirementList())
self.output_parser = PydanticOutputParser(pydantic_object=Competency)

def __call__(
self,
dto: CompetencyExtractionPipelineExecutionDTO,
prompt: Optional[ChatPromptTemplate] = None,
**kwargs,
):
if not dto.course_description:
raise ValueError("Course description is required")
if not dto.taxonomy_options:
raise ValueError("Taxonomy options are required")
if not dto.max_n:
raise ValueError("Non-zero max_n is required")

taxonomy_options = ", ".join(dto.taxonomy_options)

prompt = system_prompt.format(
taxonomy_list=taxonomy_options,
course_description=dto.course_description,
max_n=dto.max_n,
)
prompt = PyrisMessage(
sender=IrisMessageRole.SYSTEM,
contents=[TextMessageContentDTO(text_content=prompt)],
)

response = self.request_handler.chat(
[prompt], CompletionArguments(temperature=0.4)
)
response = response.contents[0].text_content

MichaelOwenDyer marked this conversation as resolved.
Show resolved Hide resolved
generated_competencies: list[Competency] = []

# Find all competencies in the response up to the max_n
competencies = response.split("\n\n")[: dto.max_n]
for i, competency in enumerate(competencies):
logger.debug(f"Processing competency {i + 1}: {competency}")
if "{" not in competency or "}" not in competency:
logger.debug("Skipping competency without JSON")
continue
# Get the competency JSON object
start = competency.index("{")
end = competency.index("}") + 1
competency = competency[start:end]
try:
competency = self.output_parser.parse(competency)
except Exception as e:
logger.debug(f"Error parsing competency: {e}")
continue
logger.debug(f"Generated competency: {competency}")
generated_competencies.append(competency)
MichaelOwenDyer marked this conversation as resolved.
Show resolved Hide resolved
self.callback.done(final_result=generated_competencies)
# Mark all remaining competencies as skipped
for i in range(len(generated_competencies), len(competencies)):
self.callback.skip(f"Skipping competency {i + 1}")
43 changes: 43 additions & 0 deletions app/pipeline/prompts/competency_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
system_prompt = """
You are an expert in all topics of computer science and its practical applications.
Your task consists of three parts:
1. Read the provided curriculum description a university course.
2. Extract all learning goals ("competencies") from the course description.

Each competency must contain the following fields:

- title:
The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words.

- description:
A detailed description of the competency in 3 to 6 bullet points.
Each bullet point is a short sentence, at most 15 words.
Each bullet point illustrates a specific skill or concept of the competency.

- taxonomy:
The classification of the competency within Bloom's taxonomy.
You must choose from these options in Bloom's taxonomy: {taxonomy_list}

All competencies must meet the following requirements:

- is mentioned in the course description.
- corresponds to exactly one subject or skill covered in the course description.
- is assigned to exactly one level of Bloom's taxonomy.
- is small and fine-grained. Large topics should be broken down into smaller competencies.
- does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed.

Here is an example competency whose structure you should follow:

{{
"title": "Recursion",
"description": "- You understand the concept of recursion.
- You are able to understand complex recursive implementations.
- You are able to implement recursive solutions of medium difficulty independently.",
"taxonomy": "ANALYZE"
}}

Here is the provided course description: {course_description}

Respond with up to {max_n} competencies extracted from the course description,
each in JSON format, split by two newlines.
"""
42 changes: 42 additions & 0 deletions app/web/routers/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
from app.domain import (
ExerciseChatPipelineExecutionDTO,
CourseChatPipelineExecutionDTO,
CompetencyExtractionPipelineExecutionDTO,
)
from app.web.status.status_update import (
ExerciseChatStatusCallback,
CourseChatStatusCallback,
CompetencyExtractionCallback,
)
from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline
from app.pipeline.chat.exercise_chat_pipeline import ExerciseChatPipeline
from app.dependencies import TokenValidator
from app.pipeline.competency_extraction_pipeline import CompetencyExtractionPipeline

router = APIRouter(prefix="/api/v1/pipelines", tags=["pipelines"])
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -86,6 +89,45 @@ def run_course_chat_pipeline(variant: str, dto: CourseChatPipelineExecutionDTO):
thread.start()


def run_competency_extraction_pipeline_worker(
dto: CompetencyExtractionPipelineExecutionDTO, _variant: str
):
try:
callback = CompetencyExtractionCallback(
run_id=dto.execution.settings.authentication_token,
base_url=dto.execution.settings.artemis_base_url,
initial_stages=dto.execution.initial_stages,
num_iterations=dto.max_n,
)
pipeline = CompetencyExtractionPipeline(callback=callback)
except Exception as e:
logger.error(f"Error preparing competency extraction pipeline: {e}")
logger.error(traceback.format_exc())
capture_exception(e)
return

try:
pipeline(dto=dto)
except Exception as e:
logger.error(f"Error running competency extraction pipeline: {e}")
logger.error(traceback.format_exc())
callback.error("Fatal error.", exception=e)


@router.post(
"/competency-extraction/{variant}/run",
status_code=status.HTTP_202_ACCEPTED,
dependencies=[Depends(TokenValidator())],
)
def run_competency_extraction_pipeline(
variant: str, dto: CompetencyExtractionPipelineExecutionDTO
):
thread = Thread(
target=run_competency_extraction_pipeline_worker, args=(dto, variant)
)
thread.start()


@router.get("/{feature}")
def get_pipeline(feature: str):
"""
Expand Down
Loading
Loading