Skip to content

Commit

Permalink
Merge pull request #196 from phac-nml/dev/columns
Browse files Browse the repository at this point in the history
Adding Additional Columns to Pointfinder Output
  • Loading branch information
apetkau authored Aug 17, 2023
2 parents b158915 + 5585aa2 commit abae05f
Show file tree
Hide file tree
Showing 11 changed files with 125 additions and 49 deletions.
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

* Fixed an issue where the string "None" in the drug table would be parsed differently by different versions of pandas (#175).
* Upgraded to pandas version 2.
* Added CGE-predicted phenotypes to Pointfinder output.
* Added the CGE-predicted phenotypes, PMID, Mechanism, and the Required Mutation columns to the Pointfinder output.
* The resfinder.tsv and pointfinder.tsv outputs now contain a Notes column.
* Updated the help description of the --mlst-scheme parameter to include a more useful link for available schemas.
* Switched to only officially supporting Python 3.7+ due to recent incompatibilities with Python 3.6 and some Python packages (numpy, biopython, and others).
Expand Down
45 changes: 40 additions & 5 deletions staramr/blast/pointfinder/PointfinderBlastDatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,27 +94,62 @@ def get_organism(self):
def get_name(self):
return 'pointfinder'

def get_notes(self, gene, mutation):
def get_cge_notes(self, gene, mutation):
"""
Gets the note associated with a particular mutation from the Pointfinder Database table.
:param gene: The gene.
:param mutation: The mutation.
:return: A string containtain the Note, if it exists.
:return: A string containing the note, if it exists.
"""

return self._pointfinder_info.get_notes(gene, mutation)
return self._pointfinder_info.get_value(gene, mutation, "Notes")

def get_cge_phenotype(self, gene, mutation):
"""
Gets the phenotype associated with a particular mutation from the Pointfinder Database table.
:param gene: The gene.
:param mutation: The mutation.
:return: A string containtain the phenotype, if it exists.
:return: A string containing the phenotype, if it exists.
"""

return self._pointfinder_info.get_phenotype(gene, mutation)
return self._pointfinder_info.get_value(gene, mutation, "Resistance")

def get_cge_pmid(self, gene, mutation):
"""
Gets the PMID associated with a particular mutation from the Pointfinder Database table.
:param gene: The gene.
:param mutation: The mutation.
:return: A string (not integer) containing the PMID, if it exists.
"""

return self._pointfinder_info.get_value(gene, mutation, "PMID")

def get_cge_required_mutation(self, gene, mutation):
"""
Gets the required mutation associated with a particular mutation from
the Pointfinder Database table.
:param gene: The gene.
:param mutation: The mutation.
:return: A string containing the required mutation, if it exists.
"""

return self._pointfinder_info.get_value(gene, mutation, "Required_mut")

def get_cge_mechanism(self, gene, mutation):
"""
Gets the mechanism associated with a particular mutation from the
Pointfinder Database table.
:param gene: The gene.
:param mutation: The mutation.
:return: A string containing the mechanism, if it exists.
"""

return self._pointfinder_info.get_value(gene, mutation, "Mechanism")

@classmethod
def get_available_organisms(cls):
Expand Down
30 changes: 7 additions & 23 deletions staramr/blast/pointfinder/PointfinderDatabaseInfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,38 +187,22 @@ def get_resistance_nucleotides(self, gene, nucleotide_mutations):

return resistance_mutations

def get_notes(self, gene, mutation):
"""
Gets the note associated with a particular mutation from the Pointfinder Database table.
:param gene: The gene.
:param mutation: The mutation.
:return: A string containtain the Notes, if they exist, or the empty string ("") if
there are no Notes.
"""

matches = self._get_resistance_codon_match(gene, mutation)
matches = matches.fillna("")

# There's a chance of having multiple matches:
notes = ';'.join(matches["Notes"])

return notes

def get_phenotype(self, gene, mutation):
def get_value(self, gene, mutation, attribute):
"""
Gets the phenotype associated with a particular mutation from the Pointfinder Database table.
:param gene: The gene.
:param mutation: The mutation.
:return: A string containing the phenotype, if it exists, or the empty string ("") if
there is no phenotype.
:param attribute: The attribute to get (ex: "Resistance", "PMID", etc.)
:return: A string containing the attribute for the passed gene and
mutation, if it exists, or the empty string ("") if the
attribute is missing.
"""

matches = self._get_resistance_codon_match(gene, mutation)
matches = matches.fillna("")

# There's a chance of having multiple matches:
resistances = ';'.join(matches["Resistance"])
results = ';'.join(matches[attribute])

return resistances
return results
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def _get_result(self, hit, db_mutation):
hit.get_genome_contig_start(),
hit.get_genome_contig_end(),
db_mutation.get_pointfinder_mutation_string(),
self._blast_database.get_notes(hit.get_amr_gene_id(), db_mutation)
self._blast_database.get_cge_notes(hit.get_amr_gene_id(), db_mutation)
]

return result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ class BlastResultsParserPointfinderResistance(BlastResultsParserPointfinder):
End
Pointfinder Position
CGE Notes
CGE Required Mutation
CGE Mechanism
CGE PMID
'''.strip().split('\n')]

def __init__(self, file_blast_map, arg_drug_table, blast_database, pid_threshold, plength_threshold,
Expand Down Expand Up @@ -84,7 +87,10 @@ def _get_result(self, hit, db_mutation):
hit.get_genome_contig_start(),
hit.get_genome_contig_end(),
db_mutation.get_pointfinder_mutation_string(),
self._blast_database.get_notes(hit.get_amr_gene_id(), db_mutation)
self._blast_database.get_cge_notes(hit.get_amr_gene_id(), db_mutation),
self._blast_database.get_cge_required_mutation(hit.get_amr_gene_id(), db_mutation),
self._blast_database.get_cge_mechanism(hit.get_amr_gene_id(), db_mutation),
self._blast_database.get_cge_pmid(hit.get_amr_gene_id(), db_mutation),
]

return result
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,6 @@ def _get_result_rows(self, hit, database_name):
hit.get_genome_contig_end(),
hit.get_amr_gene_accession(),
hit.get_genome_contig_hsp_seq(),
self._cge_drug_table.get_notes(hit.get_amr_gene_name_with_variant(),
self._cge_drug_table.get_cge_notes(hit.get_amr_gene_name_with_variant(),
hit.get_amr_gene_accession())
]]
2 changes: 1 addition & 1 deletion staramr/databases/resistance/cge/CGEDrugTableResfinder.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def get_drug(self, drug_class, gene_plus_variant, accession):
else:
return drug.iloc[0]

def get_notes(self, gene_plus_variant, accession):
def get_cge_notes(self, gene_plus_variant, accession):
"""
Gets the notes for a gene (plus variant of gene encoded in ResFinder database) and accession.
:param gene_plus_variant: The gene plus variant (e.g., {gene}_{variant} = {blaIMP-58}_{1}).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_matches(self, results_table, hit):
result = [hit.get_genome_id(),
", ".join(intersection), # Technically these are also Pointfinder co-ords
row.phenotype,
None, # CGE-predicted phenotype
pd.NA, # CGE-predicted phenotype
"complex", # Type
", ".join(mutation_positions),
"complex", # Creating a mutation string would be confusing for this.
Expand All @@ -65,7 +65,10 @@ def get_matches(self, results_table, hit):
hit.get_genome_contig_start(),
hit.get_genome_contig_end(),
", ".join(intersection),
"This mutation represents a combination of multiple individual mutations."] # The notes.
"This mutation represents a combination of multiple individual mutations.", # The notes.
pd.NA,
pd.NA,
pd.NA]

matches.append(result)

Expand Down
Loading

0 comments on commit abae05f

Please sign in to comment.