From 3d8ae967ff3a5725832f2630afb314ff981c62aa Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Wed, 16 Oct 2024 11:46:44 +0200 Subject: [PATCH 1/8] Update to v0.11.0 --- CHANGELOG.md | 8 ++++++++ prp/__version__.py | 2 +- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b398c8a..094873a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,13 @@ ## [Unreleased] +### Added + +### Fixed + +### Changed + +## [0.11.0] + ### Added - Added emmtyper and parser diff --git a/prp/__version__.py b/prp/__version__.py index 5351733..721fd46 100644 --- a/prp/__version__.py +++ b/prp/__version__.py @@ -1,2 +1,2 @@ """PRP version""" -VERSION = "0.10.1" +VERSION = "0.11.0" From 161dd2d8d44bf5473301cf1a454ab159749c7fd7 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Mon, 11 Nov 2024 16:40:54 +0100 Subject: [PATCH 2/8] Handle alt types for emmtyper --- prp/models/typing.py | 6 +++--- prp/parse/phenotype/emmtyper.py | 7 ++++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/prp/models/typing.py b/prp/models/typing.py index 4d04e00..4307ee8 100644 --- a/prp/models/typing.py +++ b/prp/models/typing.py @@ -103,9 +103,9 @@ class TypingResultEmm(RWModel): """Container for emmtype gene information""" cluster_count: int - emmtype: str - emm_like_alleles: list[str] - emm_cluster: str + emmtype: str | None = None + emm_like_alleles: list[str] | None = None + emm_cluster: str | None = None class EmmTypingMethodIndex(RWModel): diff --git a/prp/parse/phenotype/emmtyper.py b/prp/parse/phenotype/emmtyper.py index 0e4049a..667948b 100644 --- a/prp/parse/phenotype/emmtyper.py +++ b/prp/parse/phenotype/emmtyper.py @@ -2,6 +2,7 @@ import logging import pandas as pd +import numpy as np from typing import Any @@ -16,6 +17,7 @@ def parse_emmtyper_pred(path: str) -> EmmTypingMethodIndex: pred_result = [] df = pd.read_csv(path, sep='\t', header=None) df.columns = ["sample_name", "cluster_count", "emmtype", "emm_like_alleles", "emm_cluster"] + df.replace(["-", ""], None, inplace=True) df_loa = df.to_dict(orient="records") for emmtype_array in df_loa: emmtype_results = _parse_emmtyper_results(emmtype_array) @@ -31,10 +33,9 @@ def parse_emmtyper_pred(path: str) -> EmmTypingMethodIndex: def _parse_emmtyper_results(info: dict[str, Any]) -> TypingResultEmm: """Parse emm gene prediction results.""" - emm_like_alleles = info["emm_like_alleles"].split(";") + emm_like_alleles = info["emm_like_alleles"].split(";") if not pd.isna(info["emm_like_alleles"]) else None return TypingResultEmm( - # info - cluster_count=info["cluster_count"], + cluster_count=int(info["cluster_count"]), emmtype=info["emmtype"], emm_like_alleles=emm_like_alleles, emm_cluster=info["emm_cluster"], From 69326bc2de76d8a7fd2b821632e9b7e7cf904800 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Mon, 11 Nov 2024 16:45:13 +0100 Subject: [PATCH 3/8] Handle emmtyper alt types --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 094873a..dea76e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ### Added ### Fixed + - Handle alt types for emmtyper ### Changed From 1a2f147ef43cbc6a1d1a5fe3e63c616ef785bfc2 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Mon, 11 Nov 2024 16:47:15 +0100 Subject: [PATCH 4/8] Rm numpy import --- prp/parse/phenotype/emmtyper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/prp/parse/phenotype/emmtyper.py b/prp/parse/phenotype/emmtyper.py index 667948b..65eefa3 100644 --- a/prp/parse/phenotype/emmtyper.py +++ b/prp/parse/phenotype/emmtyper.py @@ -2,7 +2,6 @@ import logging import pandas as pd -import numpy as np from typing import Any From 253ad8dd1b797db0653ed41fcdf62e9fe30cae62 Mon Sep 17 00:00:00 2001 From: ryanjameskennedy Date: Mon, 11 Nov 2024 16:59:47 +0100 Subject: [PATCH 5/8] Update to v0.11.1 --- CHANGELOG.md | 9 +++++++++ prp/__version__.py | 2 +- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dea76e4..2c35279 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,15 @@ ### Added ### Fixed + +### Changed + +## [0.11.1] + +### Added + +### Fixed + - Handle alt types for emmtyper ### Changed diff --git a/prp/__version__.py b/prp/__version__.py index 721fd46..697e539 100644 --- a/prp/__version__.py +++ b/prp/__version__.py @@ -1,2 +1,2 @@ """PRP version""" -VERSION = "0.11.0" +VERSION = "0.11.1" From b9a646262ec376c70d90f1e9609eaf0c907e9ab3 Mon Sep 17 00:00:00 2001 From: Markus Johansson Date: Tue, 19 Nov 2024 14:56:34 +0100 Subject: [PATCH 6/8] Added mypy as dev dependency --- CHANGELOG.md | 2 ++ pyproject.toml | 1 + 2 files changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c35279..8407c5d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ ### Changed +- Added mypy as test dependency + ## [0.11.1] ### Added diff --git a/pyproject.toml b/pyproject.toml index 61aff58..4dbbef4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ dev = [ ] test = [ "pytest-cov ~=4.1.0", + "mypy == 1.13.0" ] [build-system] From 301a5f065c2912c00a973a96aa07298b45ba5564 Mon Sep 17 00:00:00 2001 From: Markus Johansson Date: Tue, 19 Nov 2024 15:47:19 +0100 Subject: [PATCH 7/8] Fixed crash caused by mlst results with no calls --- prp/parse/typing.py | 5 ++++- tests/fixtures/__init__.py | 5 +++++ tests/fixtures/mlst.nocall.json | 9 +++++++++ tests/parse/test_typing.py | 28 ++++++++++++++++++++++++++-- 4 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 tests/fixtures/mlst.nocall.json diff --git a/prp/parse/typing.py b/prp/parse/typing.py index 6b4cec8..5512b8a 100644 --- a/prp/parse/typing.py +++ b/prp/parse/typing.py @@ -43,6 +43,9 @@ def parse_mlst_results(mlst_fpath: str) -> TypingResultMlst: LOG.info("Parsing mlst results") with open(mlst_fpath, "r", encoding="utf-8") as jsonfile: result = json.load(jsonfile)[0] + # get raw allele info + alleles = {} if result.get("alleles") is None else result["alleles"] + # create typing result object result_obj = TypingResultMlst( scheme=result["scheme"], sequence_type=( @@ -50,7 +53,7 @@ def parse_mlst_results(mlst_fpath: str) -> TypingResultMlst: ), alleles={ gene: _process_allele_call(allele) - for gene, allele in result["alleles"].items() + for gene, allele in alleles.items() }, ) return MethodIndex( diff --git a/tests/fixtures/__init__.py b/tests/fixtures/__init__.py index 214fa3e..527fa3c 100644 --- a/tests/fixtures/__init__.py +++ b/tests/fixtures/__init__.py @@ -5,3 +5,8 @@ from .saureus import * from .shigella import * from .streptococcus import * + +@pytest.fixture() +def mlst_result_path_no_call(data_path): + """Get path for mlst file where alleles was not called.""" + return str(data_path.joinpath("mlst.nocall.json")) \ No newline at end of file diff --git a/tests/fixtures/mlst.nocall.json b/tests/fixtures/mlst.nocall.json new file mode 100644 index 0000000..8aa1b81 --- /dev/null +++ b/tests/fixtures/mlst.nocall.json @@ -0,0 +1,9 @@ +[ + { + "sequence_type" : "-", + "scheme" : "-", + "alleles" : null, + "id" : "unique_file_id", + "filename" : "assembly_file_name.fasta" + } +] \ No newline at end of file diff --git a/tests/parse/test_typing.py b/tests/parse/test_typing.py index 74000b9..c2015b8 100644 --- a/tests/parse/test_typing.py +++ b/tests/parse/test_typing.py @@ -2,7 +2,7 @@ import pytest import logging -from prp.parse.typing import replace_cgmlst_errors +from prp.parse.typing import replace_cgmlst_errors, parse_mlst_results from prp.models.typing import ChewbbacaErrors # build test cases for handeling chewbacca allele caller errors and annotations @@ -84,4 +84,28 @@ def test_replace_cgmlst_errors_warnings(caplog): # run test that a warning was triggered if input is unknown string allele = "A_STRANGE_STRING" replace_cgmlst_errors(allele, include_novel_alleles=True, correct_alleles=True) - assert allele in caplog.text \ No newline at end of file + assert allele in caplog.text + + +def test_parse_mlst_result(ecoli_mlst_path): + """Test parsing of MLST result file.""" + # FIRST run result parser + res_obj = parse_mlst_results(ecoli_mlst_path) + + # THEN verify result type + assert res_obj.type == 'mlst' + # THEN verify software + assert res_obj.software == 'mlst' + # THEN verify sequence type and allele assignment + assert res_obj.result.sequence_type == 58 + assert len(res_obj.result.alleles) == 8 + + +def test_parse_mlst_result_w_no_call(mlst_result_path_no_call): + """Test parsing of MLST results file where the alleles was not called.""" + # FIRST run result parser + res_obj = parse_mlst_results(mlst_result_path_no_call) + + # THEN verify that sequence type is None + assert res_obj.result.sequence_type is None + assert len(res_obj.result.alleles) == 0 \ No newline at end of file From dbf5795792dd5744693fc4d3f6ef4bedbf0ce90a Mon Sep 17 00:00:00 2001 From: Markus Johansson Date: Tue, 19 Nov 2024 15:47:47 +0100 Subject: [PATCH 8/8] Updated CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8407c5d..894d9a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,8 @@ ### Fixed +- Fixed issue where mlst results with no calls crashed PRP. + ### Changed - Added mypy as test dependency