Skip to content

Commit

Permalink
Merge pull request #22 from ryanjameskennedy/20-pylinting-fix
Browse files Browse the repository at this point in the history
Fix pylinting errors and add pylinting workflow
  • Loading branch information
ryanjameskennedy authored Jan 17, 2024
2 parents 2727e54 + 7422c55 commit 53839f8
Show file tree
Hide file tree
Showing 18 changed files with 596 additions and 324 deletions.
24 changes: 24 additions & 0 deletions .github/workflows/pylint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
name: Pylint

on: [push]

jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pylint pytest
pip install -e .
- name: Analysing the code with pylint
run: |
pylint --disable=W1401,R0914,W0718 --fail-under 9 $(git ls-files '*.py')
4 changes: 3 additions & 1 deletion jasentool/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Information regarding jasentool for setup.py"""

__author__ = 'Ryan James Kennedy'
__author_email__ = '[email protected]'
__copyright__ = 'Copyright 2023'
Expand All @@ -6,7 +8,7 @@
__license__ = 'GPL3'
__maintainer__ = 'Ryan James Kennedy'
__maintainer_email__ = '[email protected]'
__name__ = 'jasentool'
__software_name__ = 'jasentool'
__python_requires__ = '>=3.11'
__status__ = 'Production'
__title__ = 'jasentool'
Expand Down
21 changes: 12 additions & 9 deletions jasentool/__main__.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
"""__main__ file that handles help and cli execution"""

import sys
import os

from jasentool import __author__, __copyright__, __version__
from jasentool.cli import get_main_parser
from jasentool.main import OptionsParser

def print_help():
print('''
"""Print help string for jasentool software"""
print(f'''
...::: Jasentool v%s :::...
Author(s): %s
...::: Jasentool v{__version__} :::...
Author(s): {__author__}
Description:
This software is a mongodb tool that fetches, inserts and
Expand All @@ -32,9 +34,10 @@ def print_help():
fix Fix output files from bjorn.
converge Converge tuberculosis mutation catlogues.
qc Extract QC values after alignment.
''' % (__version__, __author__))
''')

def main():
"""Main function that handles cli"""
args = None
if len(sys.argv) == 1:
print_help()
Expand All @@ -57,14 +60,14 @@ def main():
except KeyboardInterrupt:
print('Controlled exit resulting from interrupt signal.')
sys.exit(1)
except Exception as e:
except Exception as error_code:
error_message = 'Uncontrolled exit resulting from an unexpected error.\n\n'
error_message += '-' * 80 + '\n'
error_message += 'EXCEPTION: {}\n'.format(type(e).__name__)
error_message += 'MESSAGE: {}\n'.format(e)
error_message += f'EXCEPTION: {type(error_code).__name__}\n'
error_message += f'MESSAGE: {error_code}\n'
error_message += '-' * 80 + '\n\n'
print(error_message)
sys.exit(1)

if __name__ == "__main__":
main()
main()
115 changes: 91 additions & 24 deletions jasentool/cli.py
Original file line number Diff line number Diff line change
@@ -1,117 +1,184 @@
import os
"""Command line interface module"""

import argparse
from contextlib import contextmanager

@contextmanager
def subparser(parser, name, desc):
yield parser.add_parser(name, conflict_handler='resolve', help=desc, formatter_class=argparse.RawDescriptionHelpFormatter)
"""Yield subparser"""
yield parser.add_parser(name, conflict_handler='resolve', help=desc,
formatter_class=argparse.RawDescriptionHelpFormatter)

@contextmanager
def mutex_group(parser, required):
group = parser.add_argument_group(f'mutually exclusive {"required" if required else "optional"} arguments')
"""Yield mutually exclusive group"""
arg_type = "required" if required else "optional"
group = parser.add_argument_group(f'mutually exclusive {arg_type} arguments')
yield group.add_mutually_exclusive_group(required=required)

@contextmanager
def arg_group(parser, name):
"""Yield mutually argument group"""
yield parser.add_argument_group(name)

def __query(group, required):
"""Add query argument to group"""
group.add_argument('-q', '--query', required=required, nargs='+', help='sample query')

def __sample_id(group, required):
"""Add sample_id argument to group"""
group.add_argument('--sample_id', required=required, type=str, help='sample ID')

def __input_dir(group, required, help):
"""Add input_dir argument to group"""
group.add_argument('--input_dir', required=required, help=help)

def __input_file(group, required, help):
group.add_argument('-i', '--input_file', nargs='+', help=help)
"""Add input_file argument to group"""
group.add_argument('-i', '--input_file', required=required, nargs='+', help=help)

def __csv_file(group, required, help):
"""Add csv_file argument to group"""
group.add_argument('--csv_file', required=required, help=help)

def __sh_file(group, required, help):
"""Add sh_file argument to group"""
group.add_argument('--sh_file', required=required, help=help)

def __bam_file(group, required):
"""Add bam_file argument to group"""
group.add_argument('--bam_file', required=required, type=str, help='input bam file')

def __bed_file(group, required):
"""Add bed_file argument to group"""
group.add_argument('--bed_file', required=required, type=str, help='input bed file')

def __baits_file(group, required):
group.add_argument('--baits_file', required=required, type=str, default=None, help='input bam file')
"""Add baits_file argument to group"""
group.add_argument('--baits_file', required=required, type=str, default=None,
help='input baits file')

def __reference(group, required, help):
"""Add reference argument to group"""
group.add_argument('--reference', required=required, type=str, help=help)

def __output_file(group, required, help):
"""Add output_file argument to group"""
group.add_argument('-o', '--output_file', required=required, type=str, help=help)

def __output_dir(group, required):
group.add_argument('--output_dir', required=required, type=str, help='directory to output files')
"""Add output_dir argument to group"""
group.add_argument('--output_dir', required=required, type=str,
help='directory to output files')

def __analysis_dir(group, required):
group.add_argument('--analysis_dir', required=required, type=str, help='analysis results dir containing jasen results')
"""Add analysis_dir argument to group"""
group.add_argument('--analysis_dir', required=required, type=str,
help='analysis results dir containing jasen results')

def __restore_dir(group, required):
group.add_argument('--restore_dir', required=required, type=str, default='/fs2/seqdata/restored', help='directory user wishes spring files to be restored to')
"""Add restore_dir argument to group"""
group.add_argument('--restore_dir', required=required, type=str,
default='/fs2/seqdata/restored',
help='directory user wishes spring files to be restored to')

def __remote_dir(group, required):
group.add_argument('--remote_dir', required=required, type=str, default='/fs1/bjorn/jasen', help='directory user wishes spring files to be restored to')
"""Add remote_dir argument to group"""
group.add_argument('--remote_dir', required=required, type=str,
default='/fs1/bjorn/jasen',
help='directory user wishes spring files to be restored to')

def __restore_file(group, required):
group.add_argument('--restore_file', required=required, type=str, help='filepath bash shell script (.sh) to be output')
"""Add restore_file argument to group"""
group.add_argument('--restore_file', required=required, type=str,
help='filepath bash shell script (.sh) to be output')

def __missing_log(group, required):
group.add_argument('--missing_log', required=required, type=str, default='missing_samples.log', help='file containing missing files')
"""Add missing_log argument to group"""
group.add_argument('--missing_log', required=required, type=str,
default='missing_samples.log',
help='file containing missing files')

def __assay(group, required):
group.add_argument('--assay', required=required, type=str, default='jasen-saureus-dev', help='assay for jasen to run')
"""Add assay argument to group"""
group.add_argument('--assay', required=required, type=str,
default='jasen-saureus-dev',
help='assay for jasen to run')

def __platform(group, required):
group.add_argument('--platform', required=required, type=str, default='illumina', help='sequencing platform for jasen to run')
"""Add platform argument to group"""
group.add_argument('--platform', required=required, type=str,
default='illumina',
help='sequencing platform for jasen to run')

def __uri(group):
group.add_argument('--address', '--uri', default='mongodb://localhost:27017/', help='Mongodb host address. Use: `sudo lsof -iTCP -sTCP:LISTEN | grep mongo` to get address')
"""Add mongodb address argument to group"""
group.add_argument('--address', '--uri',
default='mongodb://localhost:27017/',
help='Mongodb host address. \
Use: `sudo lsof -iTCP -sTCP:LISTEN | grep mongo` to get address')

def __db_name(group, required):
group.add_argument('--db_name', required=required, help='Mongodb database name address. Use: `show dbs` to get db name')
"""Add db_name argument to group"""
group.add_argument('--db_name', required=required,
help='Mongodb database name address. \
Use: `show dbs` to get db name')

def __db_collection(group, required):
group.add_argument('--db_collection', required=required, help='Mongodb collection name. Use: `show collections` to get db collection')
"""Add db_collection argument to group"""
group.add_argument('--db_collection', required=required,
help='Mongodb collection name. \
Use: `show collections` to get db collection')

def __out_format(group, required):
group.add_argument('-f', '--out_format', required=required, type=str, default="bed", help='output format')
"""Add out_format argument to group"""
group.add_argument('-f', '--out_format', required=required, type=str,
default="bed", help='output format')

def __accession(group, required):
"""Add accession argument to group"""
group.add_argument('-a', '--accession', required=required, type=str, help='accession number')

def __remote_hostname(group, required):
group.add_argument('--remote_hostname', required=required, type=str, default='rs-fs1.lunarc.lu.se', help='remote hostname')
"""Add remote_hostname argument to group"""
group.add_argument('--remote_hostname', required=required, type=str,
default='rs-fs1.lunarc.lu.se', help='remote hostname')

def __prefix(group):
group.add_argument('--prefix', type=str, default='jasentool_results_', help='prefix for all output files')
"""Add prefix argument to group"""
group.add_argument('--prefix', type=str, default='jasentool_results_',
help='prefix for all output files')

def __auto_start(group, required):
group.add_argument('--auto_start', required=required, dest='auto_start', action='store_true', default=False, help='automatically start')
"""Add auto_start argument to group"""
group.add_argument('--auto_start', required=required, dest='auto_start', action='store_true',
default=False, help='automatically start')

def __remote(group, required):
group.add_argument('--remote', required=required, dest='remote', action='store_true', default=False, help='remote copy')
"""Add remote argument to group"""
group.add_argument('--remote', required=required, dest='remote', action='store_true',
default=False, help='remote copy')

def __combined_output(group):
group.add_argument('--combined_output', dest='combined_output', action='store_true', help='combine all of the outputs into one output')
"""Add combined_output argument to group"""
group.add_argument('--combined_output', dest='combined_output', action='store_true',
help='combine all of the outputs into one output')

def __sample_sheet(group, required):
group.add_argument('--sample_sheet', required=required, dest='sample_sheet', action='store_true', help='sample sheet input')
"""Add sample_sheet argument to group"""
group.add_argument('--sample_sheet', required=required, dest='sample_sheet',
action='store_true', help='sample sheet input')

def __cpus(group):
"""Add cpus argument to group"""
group.add_argument('--cpus', dest='cpus', type=int, default=2, help='input cpus')

def __help(group):
"""Add help argument to group"""
group.add_argument('-h', '--help', action='help', help='show help message')

def get_main_parser():
"""Get/build the main argument parser"""
main_parser = argparse.ArgumentParser(prog='jasentool', conflict_handler='resolve')
sub_parsers = main_parser.add_subparsers(help='--', dest='subparser_name')
with subparser(sub_parsers, 'find', 'Find sample from given mongo db') as parser:
Expand Down Expand Up @@ -209,4 +276,4 @@ def get_main_parser():
__cpus(group)
__help(group)

return main_parser
return main_parser
20 changes: 14 additions & 6 deletions jasentool/converge.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
"""Module to converge mutation catalogues"""

import os
import pandas as pd
from jasentool.who import WHO
from jasentool.genome import Genome
from jasentool.tbprofiler import Tbprofiler
from jasentool.utils import Utils

class Converge(object):
class Converge:
"""Class that converges mutation catalogues"""
def __init__(self, download_dir):
self.download_dir = download_dir
self.fohm_fpath = os.path.join(os.path.dirname(__file__), "data/dbs/fohm.csv")
Expand Down Expand Up @@ -45,6 +48,7 @@ def compare_columns(self, tbdb_df, who_df, column_names):
return intersection_df, unique_tbdb_df, unique_who_df

def run(self):
"""Run the retrieval and convergance of mutation catalogues"""
utils = Utils()
# Download the genome
mycobacterium_genome = Genome("NC_000962.3", "AL123456.3", self.download_dir, "h37rv")
Expand All @@ -55,14 +59,18 @@ def run(self):
tbprofiler = Tbprofiler(self.tbdb_filepath)
#h37rv_gb_filepath = mycobacterium_genome.download_genbank()
who_df = who._parse(fasta_filepath, gff_filepath, self.download_dir)
tbdb_df = tbprofiler._parse(fasta_filepath, gff_filepath, self.download_dir)
#tbdb_df, who_df = pd.read_csv("/data/bnf/dev/ryan/pipelines/jasen/converge/tbdb.csv"), pd.read_csv("/data/bnf/dev/ryan/pipelines/jasen/converge/who.csv")
tbdb_df = tbprofiler._parse(self.download_dir)
#tbdb_df = pd.read_csv("/data/bnf/dev/ryan/pipelines/jasen/converge/tbdb.csv")
#who_df = pd.read_csv("/data/bnf/dev/ryan/pipelines/jasen/converge/who.csv")
fohm_df = pd.read_csv(self.fohm_fpath)
intersection_df, unique_tbdb_df, unique_who_df = self.compare_columns(tbdb_df, who_df, ['Drug', 'Gene', 'Mutation'])
fohm_tbdb_df = pd.concat([intersection_df, unique_tbdb_df, fohm_df], ignore_index=True).drop_duplicates()
column_names = ['Drug', 'Gene', 'Mutation']
intersection_df, unique_tbdb_df, unique_who_df = self.compare_columns(tbdb_df, who_df, column_names)
dfs_to_concat = [intersection_df, unique_tbdb_df, fohm_df]
fohm_tbdb_df = pd.concat(dfs_to_concat, ignore_index=True).drop_duplicates()
intersection_df.to_csv(self.intersection_outfpath, index=False)
unique_tbdb_df.to_csv(self.unique_tbdb_outfpath, index=False)
unique_who_df.to_csv(self.unique_who_outfpath, index=False)
fohm_tbdb_df.to_csv(self.fohm_tbdb_outfpath, index=False)
converged_df = pd.concat([intersection_df, unique_tbdb_df, unique_who_df, fohm_df], ignore_index=True).drop_duplicates()
dfs_to_converge = [intersection_df, unique_tbdb_df, unique_who_df, fohm_df]
converged_df = pd.concat(dfs_to_converge, ignore_index=True).drop_duplicates()
converged_df.to_csv(self.convereged_outfpath, index=False)
10 changes: 7 additions & 3 deletions jasentool/convert.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
class Convert(object):
"""Module that converts file type"""

class Convert:
"""Convert class for converting files into desired format"""
@staticmethod
def targets2bed(target_file, accn):
"""Convert cgmlst locus targets to bed file format"""
bed_output = ""
with open(target_file, 'r') as fin:
with open(target_file, 'r', encoding="utf-8") as fin:
for line in fin:
if line.startswith("Locus"):
continue
Expand All @@ -11,4 +15,4 @@ def targets2bed(target_file, accn):
length = int(line_split[4])
end = start + length
bed_output += f"{accn}\t{start}\t{end}\n"
return bed_output
return bed_output
Loading

0 comments on commit 53839f8

Please sign in to comment.