Skip to content

Commit

Permalink
Store AMR class with phenotypic info
Browse files Browse the repository at this point in the history
  • Loading branch information
mhkc committed Dec 12, 2023
1 parent 2c10588 commit af20d4e
Show file tree
Hide file tree
Showing 3 changed files with 233 additions and 26 deletions.
18 changes: 14 additions & 4 deletions prp/models/phenotype.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ class ElementVirulenceSubtype(Enum):
VIR = "VIRULENCE"


class PhenotypeInfo(RWModel):
"""Refernece to a database."""

type: ElementType
res_class: str
name: str


class DatabaseReference(RWModel):
"""Refernece to a database."""

Expand Down Expand Up @@ -110,15 +118,17 @@ class GeneBase(BaseModel):
)
close_seq_name: Optional[str] = Field(
default=None,
description=("Name of the closest competing hit if there "
"are multiple equaly good hits"),
description=(
"Name of the closest competing hit if there "
"are multiple equaly good hits"
),
)


class ResistanceGene(GeneBase, DatabaseReference):
"""Container for resistance gene information"""

phenotypes: List[str] = []
phenotypes: List[PhenotypeInfo] = []


class VirulenceGene(GeneBase, DatabaseReference):
Expand Down Expand Up @@ -169,7 +179,7 @@ class VariantBase(DatabaseReference):
class ResistanceVariant(VariantBase):
"""Container for resistance variant information"""

phenotypes: List[str]
phenotypes: List[PhenotypeInfo] = []


class ElementTypeResult(BaseModel):
Expand Down
40 changes: 34 additions & 6 deletions prp/parse/phenotype/amrfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from ...models.phenotype import ElementType, ElementTypeResult
from ...models.phenotype import PredictionSoftware as Software
from ...models.phenotype import ResistanceGene, VirulenceGene
from ...models.phenotype import ResistanceGene, VirulenceGene, PhenotypeInfo
from ...models.sample import MethodIndex

LOG = logging.getLogger(__name__)
Expand All @@ -16,6 +16,27 @@ def _parse_amrfinder_amr_results(predictions: dict) -> Tuple[ResistanceGene, ...
"""Parse amrfinder prediction results from amrfinderplus."""
genes = []
for prediction in predictions:
element_type = ElementType(prediction["element_type"])
res_class = prediction["Class"]
res_sub_class = prediction["Subclass"]

# classification to phenotype object
phenotypes = []
if res_class is None:
phenotypes.append(
PhenotypeInfo(
type=element_type,
res_class=element_type,
name=element_type,
))
elif isinstance(res_sub_class, str):
phenotypes.extend([
PhenotypeInfo(
type=element_type,
res_class=res_class.lower(),
name=annot.lower(),
) for annot in res_sub_class.split("/")])
# store resistance gene
gene = ResistanceGene(
accession=prediction["close_seq_accn"],
identity=prediction["ref_seq_identity"],
Expand All @@ -28,16 +49,23 @@ def _parse_amrfinder_amr_results(predictions: dict) -> Tuple[ResistanceGene, ...
ass_start_pos=prediction["Start"],
ass_end_pos=prediction["Stop"],
strand=prediction["Strand"],
element_type=prediction["element_type"],
element_type=element_type,
element_subtype=prediction["element_subtype"],
target_length=prediction["target_length"],
res_class=prediction["Class"],
res_subclass=prediction["Subclass"],
res_class=res_class,
res_subclass=res_sub_class,
method=prediction["Method"],
close_seq_name=prediction["close_seq_name"],
phenotypes=phenotypes,
)
genes.append(gene)
return ElementTypeResult(phenotypes={}, genes=genes, mutations=[])

# concat resistance profile
sr_profile = {
"susceptible": [],
"resistant": [pheno.name for gene in genes for pheno in gene.phenotypes]
}
return ElementTypeResult(phenotypes=sr_profile, genes=genes, mutations=[])


def parse_amrfinder_amr_pred(file: str, element_type: ElementType) -> ElementTypeResult:
Expand All @@ -64,7 +92,7 @@ def parse_amrfinder_amr_pred(file: str, element_type: ElementType) -> ElementTyp
hits = hits.drop(columns=["Protein identifier", "HMM id", "HMM description"])
hits = hits.where(pd.notnull(hits), None)
# group predictions based on their element type
predictions = hits.loc[lambda row: row.element_type == element_type].to_dict(
predictions = hits.loc[lambda row: row.element_type == element_type.value].to_dict(
orient="records"
)
results: ElementTypeResult = _parse_amrfinder_amr_results(predictions)
Expand Down
201 changes: 185 additions & 16 deletions prp/parse/phenotype/resfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
ElementTypeResult,
)
from ...models.phenotype import PredictionSoftware as Software
from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType
from ...models.phenotype import ResistanceGene, ResistanceVariant, VariantType, PhenotypeInfo
from ...models.sample import MethodIndex
from .utils import _default_resistance

Expand All @@ -30,6 +30,152 @@
}


def lookup_antibiotic_class(antibiotic: str) -> str:
"""Lookup antibiotic class for antibiotic name.
Antibiotic classes are sourced from resfinder db v2.2.1
"""
lookup_table = {
"unknown aminocyclitol": "aminocyclitol",
"spectinomycin": "aminocyclitol",
"unknown aminoglycoside": "aminoglycoside",
"gentamicin": "aminoglycoside",
"gentamicin c": "aminoglycoside",
"tobramycin": "aminoglycoside",
"streptomycin": "aminoglycoside",
"amikacin": "aminoglycoside",
"kanamycin": "aminoglycoside",
"kanamycin a": "aminoglycoside",
"neomycin": "aminoglycoside",
"paromomycin": "aminoglycoside",
"kasugamycin": "aminoglycoside",
"g418": "aminoglycoside",
"capreomycin": "aminoglycoside",
"isepamicin": "aminoglycoside",
"dibekacin": "aminoglycoside",
"lividomycin": "aminoglycoside",
"ribostamycin": "aminoglycoside",
"butiromycin": "aminoglycoside",
"butirosin": "aminoglycoside",
"hygromycin": "aminoglycoside",
"netilmicin": "aminoglycoside",
"apramycin": "aminoglycoside",
"sisomicin": "aminoglycoside",
"arbekacin": "aminoglycoside",
"astromicin": "aminoglycoside",
"fortimicin": "aminoglycoside",
"unknown analog of d-alanine": "analog of d-alanine",
"d-cycloserine": "analog of d-alanine",
"unknown beta-lactam": "beta-lactam",
"amoxicillin": "beta-lactam",
"amoxicillin+clavulanic acid": "beta-lactam",
"ampicillin": "beta-lactam",
"ampicillin+clavulanic acid": "beta-lactam",
"aztreonam": "beta-lactam",
"cefazolin": "beta-lactam",
"cefepime": "beta-lactam",
"cefixime": "beta-lactam",
"cefotaxime": "beta-lactam",
"cefotaxime+clavulanic acid": "beta-lactam",
"cefoxitin": "beta-lactam",
"ceftaroline": "beta-lactam",
"ceftazidime": "beta-lactam",
"ceftazidime+avibactam": "beta-lactam",
"ceftriaxone": "beta-lactam",
"cefuroxime": "beta-lactam",
"cephalothin": "beta-lactam",
"ertapenem": "beta-lactam",
"imipenem": "beta-lactam",
"meropenem": "beta-lactam",
"penicillin": "beta-lactam",
"piperacillin": "beta-lactam",
"piperacillin+tazobactam": "beta-lactam",
"temocillin": "beta-lactam",
"ticarcillin": "beta-lactam",
"ticarcillin+clavulanic acid": "beta-lactam",
"cephalotin": "beta-lactam",
"piperacillin+clavulanic acid": "beta-lactam",
"unknown diarylquinoline": "diarylquinoline",
"bedaquiline": "diarylquinoline",
"unknown quinolone": "quinolone",
"ciprofloxacin": "quinolone",
"nalidixic acid": "quinolone",
"fluoroquinolone": "quinolone",
"unknown folate pathway antagonist": "folate pathway antagonist",
"sulfamethoxazole": "folate pathway antagonist",
"trimethoprim": "folate pathway antagonist",
"unknown fosfomycin": "fosfomycin",
"fosfomycin": "fosfomycin",
"unknown glycopeptide": "glycopeptide",
"vancomycin": "glycopeptide",
"teicoplanin": "glycopeptide",
"bleomycin": "glycopeptide",
"unknown ionophores": "ionophores",
"narasin": "ionophores",
"salinomycin": "ionophores",
"maduramicin": "ionophores",
"unknown iminophenazine": "iminophenazine",
"clofazimine": "iminophenazine",
"unknown isonicotinic acid hydrazide": "isonicotinic acid hydrazide",
"isoniazid": "isonicotinic acid hydrazide",
"unknown lincosamide": "lincosamide",
"lincomycin": "lincosamide",
"clindamycin": "lincosamide",
"unknown macrolide": "macrolide",
"carbomycin": "macrolide",
"azithromycin": "macrolide",
"oleandomycin": "macrolide",
"spiramycin": "macrolide",
"tylosin": "macrolide",
"telithromycin": "macrolide",
"erythromycin": "macrolide",
"unknown nitroimidazole": "nitroimidazole",
"metronidazole": "nitroimidazole",
"unknown oxazolidinone": "oxazolidinone",
"linezolid": "oxazolidinone",
"unknown amphenicol": "amphenicol",
"chloramphenicol": "amphenicol",
"florfenicol": "amphenicol",
"unknown pleuromutilin": "pleuromutilin",
"tiamulin": "pleuromutilin",
"unknown polymyxin": "polymyxin",
"colistin": "polymyxin",
"unknown pseudomonic acid": "pseudomonic acid",
"mupirocin": "pseudomonic acid",
"unknown rifamycin": "rifamycin",
"rifampicin": "rifamycin",
"unknown salicylic acid - anti-folate": "salicylic acid - anti-folate",
"para-aminosalicyclic acid": "salicylic acid - anti-folate",
"unknown steroid antibacterial": "steroid antibacterial",
"fusidic acid": "steroid antibacterial",
"unknown streptogramin a": "streptogramin a",
"dalfopristin": "streptogramin a",
"pristinamycin iia": "streptogramin a",
"virginiamycin m": "streptogramin a",
"quinupristin+dalfopristin": "streptogramin a",
"unknown streptogramin b": "streptogramin b",
"quinupristin": "streptogramin b",
"pristinamycin ia": "streptogramin b",
"virginiamycin s": "streptogramin b",
"unknown synthetic derivative of nicotinamide": "synthetic derivative of nicotinamide",
"pyrazinamide": "synthetic derivative of nicotinamide",
"unknown tetracycline": "tetracycline",
"tetracycline": "tetracycline",
"doxycycline": "tetracycline",
"minocycline": "tetracycline",
"tigecycline": "tetracycline",
"unknown thioamide": "thioamide",
"ethionamide": "thioamide",
"unknown unspecified": "unspecified",
"ethambutol": "unspecified",
"cephalosporins": "under_development",
"carbapenem": "under_development",
"norfloxacin": "under_development",
"ceftiofur": "under_development",
}
return lookup_table.get(antibiotic, "unknown")


def _assign_res_subtype(
prediction: Dict[str, Any], element_type: ElementType
) -> ElementStressSubtype | None:
Expand Down Expand Up @@ -88,9 +234,15 @@ def _parse_resfinder_amr_genes(
continue

# get element type by peeking at first phenotype
pheno = info["phenotypes"][0]
res_category = resfinder_result["phenotypes"][pheno]["category"].upper()
category = ElementType(res_category)
first_pheno = info["phenotypes"][0]
res_category = ElementType(resfinder_result["phenotypes"][first_pheno]["category"].upper())
element_subtype = _assign_res_subtype(info, res_category)

# format phenotypes
phenotype = [
PhenotypeInfo(type=res_category, res_class=lookup_antibiotic_class(phe), name=phe)
for phe in info["phenotypes"]
]

# store results
gene = ResistanceGene(
Expand All @@ -103,11 +255,11 @@ def _parse_resfinder_amr_genes(
ref_end_pos=info["ref_end_pos"],
ref_gene_length=info["ref_seq_length"],
alignment_length=info["alignment_length"],
phenotypes=info["phenotypes"],
phenotypes=phenotype,
ref_database=info["ref_database"][0],
ref_id=info["ref_id"],
element_type=category,
element_subtype=_assign_res_subtype(info, category),
element_type=res_category,
element_subtype=element_subtype,
)
results.append(gene)
return results
Expand All @@ -124,7 +276,7 @@ def get_nt_change(ref_codon: str, alt_codon: str) -> Tuple[str, str]:
:type str: str
:return: Returns nucleotide changed from the reference.
:rtype: Tuple[str, str]
"""
"""
ref_nt = ""
alt_nt = ""
for ref, alt in zip(ref_codon, alt_codon):
Expand All @@ -134,7 +286,13 @@ def get_nt_change(ref_codon: str, alt_codon: str) -> Tuple[str, str]:
return ref_nt.upper(), alt_nt.upper()


def format_nt_change(ref: str, alt: str, var_type: VariantType, start_pos: int, end_pos: int = None, ) -> str:
def format_nt_change(
ref: str,
alt: str,
var_type: VariantType,
start_pos: int,
end_pos: int = None,
) -> str:
"""Format nucleotide change
:param ref: Reference sequence
Expand Down Expand Up @@ -190,25 +348,36 @@ def _parse_resfinder_amr_variants(
if not "seq_regions" in info:
# igenes = _default_resistance().genes
igenes = [""]

# get gene symbol and accession nr
gene_symbol, _, gene_accnr = info['seq_regions'][0].split(';;')
gene_symbol, _, gene_accnr = info["seq_regions"][0].split(";;")

ref_nt, alt_nt = get_nt_change(info["ref_codon"], info["var_codon"])
nt_change = format_nt_change(ref=ref_nt, alt=alt_nt, start_pos=info['ref_start_pos'], end_pos=info['ref_end_pos'], var_type=var_type)
nt_change = format_nt_change(
ref=ref_nt,
alt=alt_nt,
start_pos=info["ref_start_pos"],
end_pos=info["ref_end_pos"],
var_type=var_type,
)
phenotype = [
PhenotypeInfo(type=ElementType.AMR, res_class=lookup_antibiotic_class(phe), name=phe)
for phe in info["phenotypes"]
]
variant = ResistanceVariant(
variant_type=var_type,
gene_symbol=gene_symbol,
accession=gene_accnr,
close_seq_name=gene_accnr,
genes=igenes,
phenotypes=info["phenotypes"],
phenotypes=phenotype,
position=info["ref_start_pos"],
ref_nt=ref_nt,
alt_nt=alt_nt,
ref_aa=info['ref_aa'],
alt_aa=info['var_aa'],
ref_aa=info["ref_aa"],
alt_aa=info["var_aa"],
nucleotide_change=nt_change,
protein_change=info['seq_var'],
protein_change=info["seq_var"],
depth=info["depth"],
ref_database=info["ref_database"],
ref_id=info["ref_id"],
Expand Down

0 comments on commit af20d4e

Please sign in to comment.