Skip to content

Commit

Permalink
Fix competency extraction feature (#145)
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelOwenDyer authored Aug 27, 2024
1 parent 2e4f640 commit 5765c9c
Show file tree
Hide file tree
Showing 14 changed files with 292 additions and 37 deletions.
3 changes: 3 additions & 0 deletions app/domain/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
from .pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
from .chat.chat_pipeline_execution_dto import ChatPipelineExecutionDTO
from .chat.chat_pipeline_execution_base_data_dto import ChatPipelineExecutionBaseDataDTO
from .competency_extraction_pipeline_execution_dto import (
CompetencyExtractionPipelineExecutionDTO,
)
from app.domain.chat.exercise_chat.exercise_chat_pipeline_execution_dto import (
ExerciseChatPipelineExecutionDTO,
)
Expand Down
7 changes: 1 addition & 6 deletions app/domain/chat/chat_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,11 @@

from pydantic import Field

from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
from app.domain import PipelineExecutionDTO
from app.domain.pyris_message import PyrisMessage
from app.domain.data.user_dto import UserDTO
from app.domain.status.stage_dto import StageDTO


class ChatPipelineExecutionDTO(PipelineExecutionDTO):
chat_history: List[PyrisMessage] = Field(alias="chatHistory", default=[])
user: Optional[UserDTO]
settings: Optional[PipelineExecutionSettingsDTO]
initial_stages: Optional[List[StageDTO]] = Field(
default=None, alias="initialStages"
)
22 changes: 22 additions & 0 deletions app/domain/competency_extraction_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from typing import List

from pydantic import Field, BaseModel

from . import PipelineExecutionDTO
from .data.competency_dto import CompetencyTaxonomy, Competency


class CompetencyExtractionPipelineExecutionDTO(BaseModel):
execution: PipelineExecutionDTO
course_description: str = Field(alias="courseDescription")
current_competencies: list[Competency] = Field(
alias="currentCompetencies", default=[]
)
taxonomy_options: List[CompetencyTaxonomy] = Field(
alias="taxonomyOptions", default=[]
)
max_n: int = Field(
alias="maxN",
description="Maximum number of competencies to extract from the course description",
default=10,
)
27 changes: 27 additions & 0 deletions app/domain/data/competency_dto.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from typing import Optional

from pydantic import BaseModel, Field
from pydantic.v1 import validator


class CompetencyTaxonomy(str, Enum):
Expand All @@ -21,3 +22,29 @@ class CompetencyDTO(BaseModel):
taxonomy: Optional[CompetencyTaxonomy] = None
soft_due_date: Optional[datetime] = Field(default=None, alias="softDueDate")
optional: Optional[bool] = None


class Competency(BaseModel):
title: str = Field(
description="Title of the competency that contains no more than 4 words",
)
description: str = Field(
description="Description of the competency as plain string. DO NOT RETURN A LIST OF STRINGS."
)
taxonomy: CompetencyTaxonomy = Field(
description="Selected taxonomy based on bloom's taxonomy"
)

@validator("title")
def validate_title(cls, field):
"""Validate the subject of the competency."""
if len(field.split()) > 4:
raise ValueError("Title must contain no more than 4 words")
return field

@validator("taxonomy")
def validate_selected_taxonomy(cls, field):
"""Validate the selected taxonomy."""
if field not in CompetencyTaxonomy.__members__:
raise ValueError(f"Invalid taxonomy: {field}")
return field
9 changes: 2 additions & 7 deletions app/domain/ingestion/ingestion_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
from typing import List, Optional
from typing import List

from pydantic import Field

from app.domain import PipelineExecutionDTO, PipelineExecutionSettingsDTO
from app.domain import PipelineExecutionDTO
from app.domain.data.lecture_unit_dto import LectureUnitDTO
from app.domain.status.stage_dto import StageDTO


class IngestionPipelineExecutionDto(PipelineExecutionDTO):
lecture_units: List[LectureUnitDTO] = Field(
..., alias="pyrisLectureUnitWebhookDTOS"
)
settings: Optional[PipelineExecutionSettingsDTO]
initial_stages: Optional[List[StageDTO]] = Field(
default=None, alias="initialStages"
)
12 changes: 10 additions & 2 deletions app/domain/pipeline_execution_dto.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,16 @@
from pydantic import BaseModel
from typing import Optional

from pydantic import BaseModel, Field

from app.domain.pipeline_execution_settings_dto import PipelineExecutionSettingsDTO
from app.domain.status.stage_dto import StageDTO


class PipelineExecutionDTO(BaseModel):
pass
settings: Optional[PipelineExecutionSettingsDTO]
initial_stages: Optional[list[StageDTO]] = Field(
default=None, alias="initialStages"
)

class Config:
populate_by_name = True
6 changes: 6 additions & 0 deletions app/domain/status/competency_extraction_status_update_dto.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from app.domain.data.competency_dto import Competency
from app.domain.status.status_update_dto import StatusUpdateDTO


class CompetencyExtractionStatusUpdateDTO(StatusUpdateDTO):
result: list[Competency] = []
1 change: 0 additions & 1 deletion app/llm/external/openai_chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from openai import OpenAI
from openai.lib.azure import AzureOpenAI
from openai.types.chat import ChatCompletionMessage, ChatCompletionMessageParam
from openai.types.chat.completion_create_params import ResponseFormat
from openai.types.shared_params import ResponseFormatJSONObject

from ...common.message_converters import map_str_to_role, map_role_to_str
Expand Down
2 changes: 1 addition & 1 deletion app/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from ..pipeline.pipeline import Pipeline
from app.pipeline.pipeline import Pipeline
3 changes: 2 additions & 1 deletion app/pipeline/chat/course_chat_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ def get_competency_list() -> list:
def lecture_content_retrieval() -> str:
"""
Retrieve content from indexed lecture slides.
This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most relevant paragraphs.
This will run a RAG retrieval based on the chat history on the indexed lecture slides and return the most
relevant paragraphs.
Use this if you think it can be useful to answer the student's question, or if the student explicitly asks
a question about the lecture content or slides.
Only use this once.
Expand Down
96 changes: 96 additions & 0 deletions app/pipeline/competency_extraction_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import logging
from typing import Optional

from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import (
ChatPromptTemplate,
)

from app.domain import (
CompetencyExtractionPipelineExecutionDTO,
PyrisMessage,
IrisMessageRole,
)
from app.domain.data.text_message_content_dto import TextMessageContentDTO
from app.domain.data.competency_dto import Competency
from app.llm import CapabilityRequestHandler, RequirementList, CompletionArguments
from app.pipeline import Pipeline
from app.web.status.status_update import CompetencyExtractionCallback
from app.pipeline.prompts.competency_extraction import system_prompt

logger = logging.getLogger(__name__)


class CompetencyExtractionPipeline(Pipeline):
callback: CompetencyExtractionCallback
request_handler: CapabilityRequestHandler
output_parser: PydanticOutputParser

def __init__(self, callback: Optional[CompetencyExtractionCallback] = None):
super().__init__(
implementation_id="competency_extraction_pipeline_reference_impl"
)
self.callback = callback
self.request_handler = CapabilityRequestHandler(requirements=RequirementList())
self.output_parser = PydanticOutputParser(pydantic_object=Competency)

def __call__(
self,
dto: CompetencyExtractionPipelineExecutionDTO,
prompt: Optional[ChatPromptTemplate] = None,
**kwargs,
):
if not dto.course_description:
raise ValueError("Course description is required")
if not dto.taxonomy_options:
raise ValueError("Taxonomy options are required")
if not dto.max_n:
raise ValueError("Non-zero max_n is required")

taxonomy_options = ", ".join(dto.taxonomy_options)
current_competencies = "\n\n".join(
[c.model_dump_json(indent=4) for c in dto.current_competencies]
)
if current_competencies:
current_competencies = (
f"\nHere are the current competencies in the course:\n{current_competencies}\n"
f"Do not repeat these competencies.\n"
)

prompt = system_prompt.format(
taxonomy_list=taxonomy_options,
course_description=dto.course_description,
max_n=dto.max_n,
current_competencies=current_competencies,
)
prompt = PyrisMessage(
sender=IrisMessageRole.SYSTEM,
contents=[TextMessageContentDTO(text_content=prompt)],
)

response = self.request_handler.chat(
[prompt], CompletionArguments(temperature=0.4)
)
response = response.contents[0].text_content

generated_competencies: list[Competency] = []

# Find all competencies in the response up to the max_n
competencies = response.split("\n\n")[: dto.max_n]
for i, competency in enumerate(competencies):
logger.debug(f"Processing competency {i + 1}: {competency}")
if "{" not in competency or "}" not in competency:
logger.debug("Skipping competency without JSON")
continue
# Get the competency JSON object
start = competency.index("{")
end = competency.index("}") + 1
competency = competency[start:end]
try:
competency = self.output_parser.parse(competency)
except Exception as e:
logger.debug(f"Error parsing competency: {e}")
continue
logger.debug(f"Generated competency: {competency}")
generated_competencies.append(competency)
self.callback.done(final_result=generated_competencies)
44 changes: 44 additions & 0 deletions app/pipeline/prompts/competency_extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
system_prompt = """
You are an expert in all topics of computer science and its practical applications.
Your task consists of three parts:
1. Read the provided curriculum description a university course.
2. Extract all learning goals ("competencies") from the course description.
Each competency must contain the following fields:
- title:
The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words.
- description:
A detailed description of the competency in 2 to 5 bullet points.
Each bullet point illustrates a specific skill or concept of the competency.
Each bullet point is a complete sentence containing at most 15 words.
Each bullet point is on a new line and starts with "- ".
- taxonomy:
The classification of the competency within Bloom's taxonomy.
You must choose from these options in Bloom's taxonomy: {taxonomy_list}
All competencies must meet the following requirements:
- is mentioned in the course description.
- corresponds to exactly one subject or skill covered in the course description.
- is assigned to exactly one level of Bloom's taxonomy.
- is small and fine-grained. Large topics should be broken down into smaller competencies.
- does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed.
Here is the provided course description: {course_description}
Here is a template competency in JSON format:
{{
"title": "Competency Title",
"description": "- You understand this.\n- You are proficient in doing that.\n- You know how to do this.",
"taxonomy": "ANALYZE"
}}
{current_competencies}
Respond with 0 to {max_n} competencies extracted from the course description,
each in JSON format, split by two newlines.
"""
41 changes: 41 additions & 0 deletions app/web/routers/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,17 @@
from app.domain import (
ExerciseChatPipelineExecutionDTO,
CourseChatPipelineExecutionDTO,
CompetencyExtractionPipelineExecutionDTO,
)
from app.web.status.status_update import (
ExerciseChatStatusCallback,
CourseChatStatusCallback,
CompetencyExtractionCallback,
)
from app.pipeline.chat.course_chat_pipeline import CourseChatPipeline
from app.pipeline.chat.exercise_chat_pipeline import ExerciseChatPipeline
from app.dependencies import TokenValidator
from app.pipeline.competency_extraction_pipeline import CompetencyExtractionPipeline

router = APIRouter(prefix="/api/v1/pipelines", tags=["pipelines"])
logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -86,6 +89,44 @@ def run_course_chat_pipeline(variant: str, dto: CourseChatPipelineExecutionDTO):
thread.start()


def run_competency_extraction_pipeline_worker(
dto: CompetencyExtractionPipelineExecutionDTO, _variant: str
):
try:
callback = CompetencyExtractionCallback(
run_id=dto.execution.settings.authentication_token,
base_url=dto.execution.settings.artemis_base_url,
initial_stages=dto.execution.initial_stages,
)
pipeline = CompetencyExtractionPipeline(callback=callback)
except Exception as e:
logger.error(f"Error preparing competency extraction pipeline: {e}")
logger.error(traceback.format_exc())
capture_exception(e)
return

try:
pipeline(dto=dto)
except Exception as e:
logger.error(f"Error running competency extraction pipeline: {e}")
logger.error(traceback.format_exc())
callback.error("Fatal error.", exception=e)


@router.post(
"/competency-extraction/{variant}/run",
status_code=status.HTTP_202_ACCEPTED,
dependencies=[Depends(TokenValidator())],
)
def run_competency_extraction_pipeline(
variant: str, dto: CompetencyExtractionPipelineExecutionDTO
):
thread = Thread(
target=run_competency_extraction_pipeline_worker, args=(dto, variant)
)
thread.start()


@router.get("/{feature}")
def get_pipeline(feature: str):
"""
Expand Down
Loading

0 comments on commit 5765c9c

Please sign in to comment.