Skip to content

Commit

Permalink
Merge pull request #92 from Clinical-Genomics-Lund/91-add-emmtyper
Browse files Browse the repository at this point in the history
Add emmtyper, update shigapass and fix quast
  • Loading branch information
ryanjameskennedy authored Oct 10, 2024
2 parents eef32b6 + b25c37a commit ba8c9b3
Show file tree
Hide file tree
Showing 15 changed files with 126 additions and 10 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,24 @@

### Added

- Added emmtyper and parser
- Added pytests for emmtyper

### Fixed

### Changed

- Changed Shigapass models to be consistent with other typing models
- Changed Shigapass parsers to be consistent with other typing parsers
- Changed ref genome related variables to be optional in quast

## [0.10.1]

### Added

### Fixed

- Updated parsing of ChewBBACA allele calling annotations and novel alleles. This adds support for annotations introduced in v3.
- Updated parsing of ChewBBACA allele calling annotations and novel alleles. This adds support for annotations introduced in v3.

### Changed

Expand Down
10 changes: 10 additions & 0 deletions prp/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
parse_amrfinder_amr_pred,
parse_amrfinder_vir_pred,
parse_cgmlst_results,
parse_emmtyper_pred,
parse_kraken_result,
parse_mlst_results,
parse_mykrobe_amr_pred,
Expand Down Expand Up @@ -116,6 +117,7 @@ def cli(silent, debug):
)
@click.option("-p", "--quality", type=click.Path(), help="postalignqc qc results")
@click.option("-k", "--mykrobe", type=click.Path(), help="mykrobe results")
@click.option("-e", "--emmtyper", type=click.Path(), help="Emmtyper m-type prediction results")
@click.option("-g", "--shigapass", type=click.Path(), help="shigapass results")
@click.option("-t", "--tbprofiler", type=click.Path(), help="tbprofiler results")
@click.option("--bam", type=click.Path(), help="Read mapping to reference genome")
Expand Down Expand Up @@ -153,6 +155,7 @@ def create_bonsai_input(
serotypefinder,
quality,
mykrobe,
emmtyper,
shigapass,
tbprofiler,
bam,
Expand Down Expand Up @@ -246,6 +249,13 @@ def create_bonsai_input(
if res is not None:
results["typing_result"].extend(res)

if emmtyper:
LOG.info("Parse emmtyper results")
# Emmtyping
res: MethodIndex | None = parse_emmtyper_pred(emmtyper)
if res is not None:
results["typing_result"].extend(res)

if shigapass:
LOG.info("Parse shigapass results")
# Shigatyping
Expand Down
2 changes: 1 addition & 1 deletion prp/models/phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ class GeneBase(BaseModel):
default=None, description="Reference sequence name"
)
element_type: ElementType = Field(
description="The predominant function fo the gene."
description="The predominant function of the gene."
)
element_subtype: Union[
ElementStressSubtype,
Expand Down
6 changes: 3 additions & 3 deletions prp/models/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ class QuastQcResult(BaseModel):
"""Assembly QC metrics."""

total_length: int
reference_length: int
reference_length: int | None = None
largest_contig: int
n_contigs: int
n50: int
assembly_gc: float
reference_gc: float
duplication_ratio: float
reference_gc: float | None = None
duplication_ratio: float | None = None


class PostAlignQcResult(BaseModel):
Expand Down
3 changes: 2 additions & 1 deletion prp/models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .typing import (
ResultLineageBase,
ShigaTypingMethodIndex,
EmmTypingMethodIndex,
TbProfilerLineage,
TypingMethod,
TypingResultCgMlst,
Expand Down Expand Up @@ -80,7 +81,7 @@ class PipelineResult(SampleBase):

schema_version: Literal[1] = 1
# optional typing
typing_result: list[Union[ShigaTypingMethodIndex, MethodIndex]] = Field(
typing_result: list[Union[ShigaTypingMethodIndex, EmmTypingMethodIndex, MethodIndex]] = Field(
..., alias="typingResult"
)
# optional phenotype prediction
Expand Down
19 changes: 19 additions & 0 deletions prp/models/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class TypingSoftware(str, Enum):
VIRULENCEFINDER = "virulencefinder"
SEROTYPEFINDER = "serotypefinder"
SHIGAPASS = "shigapass"
EMMTYPER = "emmtyper"


class TypingMethod(str, Enum):
Expand All @@ -31,6 +32,7 @@ class TypingMethod(str, Enum):
OTYPE = "O_type"
HTYPE = "H_type"
SHIGATYPE = "shigatype"
EMMTYPE = "emmtype"


class ChewbbacaErrors(str, Enum):
Expand Down Expand Up @@ -97,6 +99,23 @@ class ShigaTypingMethodIndex(RWModel):
result: TypingResultShiga


class TypingResultEmm(RWModel):
"""Container for emmtype gene information"""

cluster_count: int
emmtype: str
emm_like_alleles: list[str]
emm_cluster: str


class EmmTypingMethodIndex(RWModel):
"""Method Index Emm."""

type: Literal[TypingMethod.EMMTYPE]
software: Literal[TypingSoftware.EMMTYPER]
result: TypingResultEmm


class ResultLineageBase(RWModel):
"""Lineage results"""

Expand Down
1 change: 1 addition & 0 deletions prp/parse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from .phenotype import (
parse_amrfinder_amr_pred,
parse_amrfinder_vir_pred,
parse_emmtyper_pred,
parse_mykrobe_amr_pred,
parse_resfinder_amr_pred,
parse_shigapass_pred,
Expand Down
1 change: 1 addition & 0 deletions prp/parse/phenotype/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Module for parsing resistance prediction results."""

from .amrfinder import parse_amrfinder_amr_pred, parse_amrfinder_vir_pred
from .emmtyper import parse_emmtyper_pred
from .mykrobe import parse_mykrobe_amr_pred
from .resfinder import parse_resfinder_amr_pred
from .shigapass import parse_shigapass_pred
Expand Down
41 changes: 41 additions & 0 deletions prp/parse/phenotype/emmtyper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""Functions for parsing emmtyper result."""

import logging
import pandas as pd

from typing import Any

from ...models.typing import EmmTypingMethodIndex, TypingMethod, TypingResultEmm
from ...models.typing import TypingSoftware as Software

LOG = logging.getLogger(__name__)

def parse_emmtyper_pred(path: str) -> EmmTypingMethodIndex:
"""Parse emmtyper's output re emm-typing"""
LOG.info("Parsing emmtyper results")
pred_result = []
df = pd.read_csv(path, sep='\t', header=None)
df.columns = ["sample_name", "cluster_count", "emmtype", "emm_like_alleles", "emm_cluster"]
df_loa = df.to_dict(orient="records")
for emmtype_array in df_loa:
emmtype_results = _parse_emmtyper_results(emmtype_array)
pred_result.append(
EmmTypingMethodIndex(
type=TypingMethod.EMMTYPE,
result=emmtype_results,
software=Software.EMMTYPER,
)
)
return pred_result


def _parse_emmtyper_results(info: dict[str, Any]) -> TypingResultEmm:
"""Parse emm gene prediction results."""
emm_like_alleles = info["emm_like_alleles"].split(";")
return TypingResultEmm(
# info
cluster_count=info["cluster_count"],
emmtype=info["emmtype"],
emm_like_alleles=emm_like_alleles,
emm_cluster=info["emm_cluster"],
)
6 changes: 3 additions & 3 deletions prp/parse/qc.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,13 @@ def parse_quast_results(tsv_fpath: str) -> QcMethodIndex:
raw = [dict(zip(header, row)) for row in creader]
qc_res = QuastQcResult(
total_length=int(raw[0]["Total length"]),
reference_length=raw[0]["Reference length"],
reference_length=raw[0].get("Reference length", None),
largest_contig=raw[0]["Largest contig"],
n_contigs=raw[0]["# contigs"],
n50=raw[0]["N50"],
assembly_gc=raw[0]["GC (%)"],
reference_gc=raw[0]["Reference GC (%)"],
duplication_ratio=raw[0]["Duplication ratio"],
reference_gc=raw[0].get("Reference GC (%)", None),
duplication_ratio=raw[0].get("Duplication ratio", None),
)
return QcMethodIndex(software=QcSoftware.QUAST, result=qc_res)

Expand Down
4 changes: 3 additions & 1 deletion prp/parse/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,7 @@ def parse_mykrobe_lineage_results(pred_res: dict) -> MethodIndex | None:

def parse_virulencefinder_stx_typing(path: str) -> MethodIndex | None:
"""Parse virulencefinder's output re stx typing"""
LOG.info("Parsing virulencefinder stx results")
with open(path, "rb") as inpt:
pred_obj = json.load(inpt)
# if has valid results
Expand Down Expand Up @@ -230,7 +231,8 @@ def parse_virulencefinder_stx_typing(path: str) -> MethodIndex | None:


def parse_serotypefinder_oh_typing(path: str) -> MethodIndex | None:
"""Parse serotypefinder's output re OH typing"""
"""Parse 's output re OH typing"""
LOG.info("Parsing serotypefinder oh type results")
with open(path, "rb") as inpt:
pred_obj = json.load(inpt)
# if has valid results
Expand Down
1 change: 1 addition & 0 deletions tests/fixtures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .mtuberculosis import *
from .saureus import *
from .shigella import *
from .streptococcus import *
10 changes: 10 additions & 0 deletions tests/fixtures/streptococcus/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""Fixtures for Streptococcus."""
import pytest

from ..fixtures import data_path


@pytest.fixture()
def streptococcus_emmtyper_path(data_path):
"""Get path for Emmtyper results for streptococcus."""
return str(data_path.joinpath("streptococcus", "emmtyper.tsv"))
1 change: 1 addition & 0 deletions tests/fixtures/streptococcus/emmtyper.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
test1_240920_nb000000_0000_test 2 EMM169.3 EMM164.2~* E4
22 changes: 22 additions & 0 deletions tests/parse/test_emmtyper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
"""Test functions for parsing Emmtyper results."""

import pytest

from prp.parse.phenotype.emmtyper import parse_emmtyper_pred


def test_parse_emmtyper_results(streptococcus_emmtyper_path):
"""Test parsing of emmtyper result files."""

# test parsing the output of an streptococcus.
result = parse_emmtyper_pred(streptococcus_emmtyper_path)
expected_streptococcus = {
"cluster_count": 2,
"emmtype": "EMM169.3",
"emm_like_alleles": [
"EMM164.2~*"
],
"emm_cluster": "E4"
}
# check if data matches
assert expected_streptococcus == result[0].result.model_dump()

0 comments on commit ba8c9b3

Please sign in to comment.