Skip to content

Commit

Permalink
Avoid generating duplicate competencies
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelOwenDyer committed Aug 21, 2024
1 parent 8a7da20 commit 0d590b7
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 7 deletions.
9 changes: 6 additions & 3 deletions app/domain/competency_extraction_pipeline_execution_dto.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from typing import List, Optional
from typing import List

from pydantic import Field, BaseModel

from . import PipelineExecutionDTO
from .data.competency_dto import CompetencyTaxonomy
from .data.competency_dto import CompetencyTaxonomy, Competency


class CompetencyExtractionPipelineExecutionDTO(BaseModel):
execution: PipelineExecutionDTO
course_description: Optional[str] = Field(alias="courseDescription")
course_description: str = Field(alias="courseDescription")
current_competencies: list[Competency] = Field(
alias="currentCompetencies", default=[]
)
taxonomy_options: List[CompetencyTaxonomy] = Field(
alias="taxonomyOptions", default=[]
)
Expand Down
9 changes: 9 additions & 0 deletions app/pipeline/competency_extraction_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,20 @@ def __call__(
raise ValueError("Non-zero max_n is required")

taxonomy_options = ", ".join(dto.taxonomy_options)
current_competencies = "\n\n".join(
[c.model_dump_json(indent=4) for c in dto.current_competencies]
)
if current_competencies:
current_competencies = (
f"\nHere are the current competencies in the course:\n{current_competencies}\n"
f"Do not repeat these competencies.\n"
)

prompt = system_prompt.format(
taxonomy_list=taxonomy_options,
course_description=dto.course_description,
max_n=dto.max_n,
current_competencies=current_competencies,
)
prompt = PyrisMessage(
sender=IrisMessageRole.SYSTEM,
Expand Down
10 changes: 6 additions & 4 deletions app/pipeline/prompts/competency_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@
The title of the competency, which is a specific topic or skill. This should be a short phrase of at most 4 words.
- description:
A detailed description of the competency in 3 to 5 bullet points.
A detailed description of the competency in 2 to 5 bullet points.
Each bullet point illustrates a specific skill or concept of the competency.
Each bullet point is a complete sentence starting with "You" and containing at most 15 words.
Each bullet point is a complete sentence containing at most 15 words.
Each bullet point is on a new line and starts with "- ".
- taxonomy:
Expand All @@ -27,6 +27,8 @@
- is small and fine-grained. Large topics should be broken down into smaller competencies.
- does not overlap with other competencies: each competency is unique. Expanding on a previous competency is allowed.
Here is the provided course description: {course_description}
Here is a template competency in JSON format:
{{
Expand All @@ -35,8 +37,8 @@
"taxonomy": "ANALYZE"
}}
Here is the provided course description: {course_description}
{current_competencies}
Respond with up to {max_n} competencies extracted from the course description,
Respond with 0 to {max_n} competencies extracted from the course description,
each in JSON format, split by two newlines.
"""

0 comments on commit 0d590b7

Please sign in to comment.