diff --git a/CHANGELOG.md b/CHANGELOG.md index 24a895d..70c8e4f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -91,6 +91,10 @@ Removed the following entries 9. Chichiri,"6,14,24","z4,z24",-,,H,TRUE,enterica 10. II 4:a:z39,"1,4,12,[27]",a,z39,,B,FALSE,salamae +### New fields +- Added `antigenic_formula` field that aggregates the O, H1 and H2 antigen values in a single location for convenience +- Added `--list-of-serovars` option allowing user to provide a single column text file listing all serovars of interest to match against the SISTR prediction. This could be useful for cases when only a certain list of serovars could be reported + # 1.1.1 * Fixed issue with sorting of BLAST results (causing cgMLST types to be different between BLAST versions). Pull request #43. diff --git a/README.rst b/README.rst index 6686626..ce6ef37 100644 --- a/README.rst +++ b/README.rst @@ -138,7 +138,7 @@ These are the external dependencies required for ``sistr_cmd``: - Python (>= v2.7 OR >= v3.4) - BLAST+ (>= v2.2.30) - MAFFT (>=v7.271 (2016/1/6)) -- `Mash v1.0+ `_ [optional] +- `Mash v2.0+ `_ [optional] Python Dependencies ------------------- @@ -219,6 +219,11 @@ If you run ``sistr -h``, you should see the following usage info: serovar prediction results. -t THREADS, --threads THREADS Number of parallel threads to run sistr_cmd analysis. + -l LIST_OF_SEROVARS, --list-of-serovars LIST_OF_SEROVARS + A path to a single column text file containing list of + serovar(s) to check serovar prediction against. Report + predicted serovar is Y (present) and N (absent) in the + list -v, --verbose Logging verbosity level (-v == show warnings; -vvv == show debug info) -V, --version show program's version number and exit diff --git a/sistr/src/serovar_prediction/__init__.py b/sistr/src/serovar_prediction/__init__.py index fe30e03..6ecacf6 100644 --- a/sistr/src/serovar_prediction/__init__.py +++ b/sistr/src/serovar_prediction/__init__.py @@ -133,6 +133,8 @@ def __init__(self, blast_runner): def search_for_wzx(self): self.wzx_prediction = self.get_antigen_gene_blast_results(self.wzx_prediction, WZX_FASTA_PATH) + #'blast_results', 'is_missing', 'is_perfect_match', 'is_trunc', 'serogroup', 'top_result' + #print( self.wzx_prediction.top_result); raise Exception() if not self.wzx_prediction.is_missing and not self.wzx_prediction.top_result is None : top_result = self.wzx_prediction.top_result top_result_pident = top_result['pident'] @@ -379,7 +381,7 @@ def get_serovar(df, sg, h1, h2, spp): df_prediction = df[(b_spp & b_sg & b_h1 & b_h2)] - logging.debug(f"Antigen to serovar dataframe filtered {spp} {sg} {h1} {h2}:\n{df_prediction}\n") + logging.debug(f"Antigen to serovar dataframe filtered spp={spp} sg={sg} h1={h1} h2{h2}\n Total of {df_prediction.shape[0]} serovar hits:\n{df_prediction}\n") logging.debug('Rough antigenic serovar(s) prediction for subspecies %s sg=%s:h1=%s:h2=%s is %s serovar(s)', spp, sg, h1, h2, list(df_prediction['Serovar'])) if df_prediction.shape[0] > 0: return '|'.join(list(df_prediction['Serovar']))