Skip to content

Commit

Permalink
Merge pull request #46 from Darcy220606/dev
Browse files Browse the repository at this point in the history
Add release 0.1.5 updates
  • Loading branch information
Darcy220606 authored Oct 27, 2022
2 parents 6e5319e + 311d8c0 commit 45f073f
Show file tree
Hide file tree
Showing 11 changed files with 180 additions and 31 deletions.
13 changes: 12 additions & 1 deletion CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1 +1,12 @@
v<version>, <date> -- Initial release.
v<0.1.0>, <date> -- Initial release.
v<0.1.1>, <date> -- minor changes.
v<0.1.2>, <date> -- minor changes.
v<0.1.3>, <10.10.2022> -- PyPi package and conda-recipe / biocontainer release.
v<0.1.4>, <18.10.2022>
-- Included a new optional argument "--complete_summary" to concatenate the results from multiple samples in one table
-- Added a universal log file, to append to an existing log file rather than creating multiple new ones every time a sample is run.
-- The "--path_list" can be called multiple times to include a list of files from individual samples in multiple lists
v<0.1.5>, <27.10.2022> -- Initial release.
-- adapt reading of hmmer_hmmsearch output to deal with varying header lines
-- fix syntax in "if" statements in "check_input.py"
-- include "check_faa_path" function, to find .faa files also in subdirectories
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# AMPcombi : AntiMicrobial Peptides parsing and functional classification tool

# ![Logo](docs/amp-combi-logo.png)
<img src="https://raw.githubusercontent.com/Darcy220606/AMPcombi/main/docs/amp-combi-logo.png" width="620" height="200" />

This tool parses the results of antimicrobial peptide (AMP) prediction tools into a single table and aligns the hits against a reference AMP database for functional classifications.

Expand Down
12 changes: 5 additions & 7 deletions ampcombi/ampcombi.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
add_help=True)

parser.add_argument("--amp_results", dest="amp", nargs='?', help="Enter the path to the folder that contains the different tool's output files in sub-folders named by sample name. \n If paths are to be inferred, sub-folders in this results-directory have to be organized like '/amp_results/toolsubdir/samplesubdir/tool.sample.filetype' \n (default: %(default)s)",
type=str, default="./test_files/")
type=str, default='./test_files/')
parser.add_argument("--sample_list", dest="samples", nargs='*', help="Enter a list of sample-names, e.g. sample_1 sample_2 sample_n. \n If not given, the sample-names will be inferred from the folder structure",
default=[])
parser.add_argument("--path_list", dest="files", nargs='*', action='append', help="Enter the list of paths to the files to be summarized as a list of lists, e.g. --path_list path/to/my/sample1.ampir.tsv path/to/my/sample1.amplify.tsv --path_list path/to/my/sample2.ampir.ts path/to/my/sample2.amplify.tsv. \n If not given, the file-paths will be inferred from the folder structure",
Expand Down Expand Up @@ -73,7 +73,7 @@
# MAIN FUNCTION
#########################################
def main_workflow():
# print_header()
# print AMPcombi header
print_header()
# check input parameters
check_input_complete(path, samplelist_in, filepaths_in, tools)
Expand All @@ -83,7 +83,6 @@ def main_workflow():
filepaths = check_pathlist(filepaths_in, samplelist, fileending, path)
# check amp_ref_database filepaths and create a directory if input empty
db = check_ref_database(database)

# initiate a final_summary dataframe to concatenate each new sample-summary
if (complete_summary):
complete_summary_df = pd.DataFrame([])
Expand All @@ -98,11 +97,12 @@ def main_workflow():
os.makedirs(samplelist[i], exist_ok=True)
# fill main_list with tool-output filepaths for sample i
read_path(main_list, filepaths[i], p, tooldict, faa_path, samplelist[i])
# get the path to the samples' corresponding faa file
faa_name = check_faa_path(faa_path, samplelist[i])
# use main_list to create the summary file for sample i
summary_df = summary(main_list, samplelist[i], faa_path)
summary_df = summary(main_list, samplelist[i], faa_name)
# Generate the AMP-faa.fasta for sample i
out_path = samplelist[i] +'/'+samplelist[i]+'_amp.faa'
faa_name = faa_path+samplelist[i]+'.faa'
amp_fasta(summary_df, faa_name, out_path)
amp_faa_paths.append(out_path)
print(f'The fasta containing AMP sequences for {samplelist[i]} was saved to {samplelist[i]}/ \n')
Expand Down Expand Up @@ -131,12 +131,10 @@ def main_workflow():
def main():
if (args.log_file == True and not os.path.exists('ampcombi.log')):
with open(f'ampcombi.log', 'w') as f:
#print(f'AMPcombi version: {args.version}')
with redirect_stdout(f):
main_workflow()
elif(args.log_file == True and os.path.exists('ampcombi.log')):
with open(f'ampcombi.log', 'a') as f:
#print(f'AMPcombi version: {args.version}')
with redirect_stdout(f):
main_workflow()
else: main_workflow()
Expand Down
30 changes: 20 additions & 10 deletions ampcombi/check_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
import sys
import pathlib
from amp_database import download_DRAMP

def check_samplelist(samplelist, tools, path):
Expand All @@ -22,13 +23,21 @@ def check_pathlist(filepaths, samplelist, fileending, path):
pathlist = []
for dirpath, subdirs, files in os.walk(path):
for file in files:
if ((sample in dirpath)&((list(filter(file.endswith, fileending))!=[]))):
if ((sample in dirpath) and ((list(filter(file.endswith, fileending))!=[]))):
pathlist.append(dirpath+'/'+file)
filepaths.append(pathlist)
return filepaths
else:
return filepaths

def check_faa_path(faa_path, samplename):
path_list = list(pathlib.Path(faa_path).rglob(f"*{samplename}*.faa"))
if (len(path_list)>1):
sys.exit(f'AMPcombi interrupted: There is more than one .faa file for {samplename} in the folder given with --faa_path')
elif(not path_list):
sys.exit(f'AMPcombi interrupted: There is no .faa file containing {samplename} in the folder given with --faa_path')
return path_list[0]

def check_ref_database(database):
if(database==None):
print('<--AMP_database> was not given, the current DRAMP general-AMP database will be downloaded and used')
Expand All @@ -52,38 +61,39 @@ def check_directory_tree(path, tools, samplelist):
print(f'Checking directory tree {path} for sub-directories \n ')
# get first level of sub-directories, check if at least one is named by a tool-name
subdirs_1 = [x for x in os.listdir(path) if x in tools]
if (subdirs_1 == []):
if (not subdirs_1):
sys.exit(f'AMPcombi interrupted: First level sub-directories in {path} are not named by tool-names. Please check the directories names and the keys given in "--tooldict". \n ')
else:
print('First level sub-directories passed check.')
# get second level of sub-directories, check if at least one is named by a sample-name
subdirs_2 = []
for dir in subdirs_1:
subdirs = [x for x in os.listdir(path+dir) if x in samplelist]
if (subdirs != []):
if (subdirs):
subdirs_2.append(subdirs)
if (subdirs_2 == []):
if (not subdirs_2):
sys.exit(f'AMPcombi interrupted: Second level sub-directories in {path} are not named by sample-names. Please check the directories names and the names given as "--sample_list" \n ')
else:
print('Second level sub-directories passed check')
print('Finished directory check')

def check_input_complete(path, samplelist, filepaths, tools):
# 1. Head folder does not exist and filepaths-list was not given
if((check_path(path)==False)&(filepaths==[])):
if((not check_path(path)) and (not filepaths)):
sys.exit('AMPcombi interrupted: Please provide the correct path to either the folder containing all amp files to be summarized (--amp_results) or the list of paths to the files (--path_list)')
# 2. Head folder does not exist, filepaths-list was given but no samplelist
elif((check_path(path)==False)&(filepaths!=[])&(samplelist==[])):
elif((not check_path(path)) and (filepaths) and (not samplelist)):
sys.exit('AMPcombi interrupted: Please provide a list of sample-names (--sample_list) in addition to --path_list')
# 3. Head folder does not exist, filepaths- and samplelist are given:
elif((check_path(path)==False)&(filepaths!=[])&(samplelist!=[])):
elif((not check_path(path)) and (not filepaths) and (not samplelist)):
for file in filepaths:
print(f'in check_input_complete the file in filepath is:')
# 3.1. check if paths in filepath-list exist
if(check_path(file)==False):
if(not check_path(file)):
sys.exit(f'AMPcombi interrupted: The path {file} does not exist. Please check the --path_list input.')
# 3.2. check if paths contain sample-names from samplelist
if(any(n in file for n in samplelist)==False):
if(not any(n in file for n in samplelist)):
sys.exit(f'AMPcombi interrupted: The path {file} does not contain any of the sample-names given in --sample_list')
# 4. Head folder and sample-list are given
elif((check_path(path)==True)&(samplelist!=[])):
elif((check_path(path)) and (not samplelist)):
check_directory_tree(path, tools, samplelist)
1 change: 1 addition & 0 deletions ampcombi/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ dependencies:
- et_xmlfile=1.0.1
- icu=70.1
- idna=3.4
- jsonschema=4.16.0
- libblas=3.9.0
- libcblas=3.9.0
- libcxx=14.0.6
Expand Down
2 changes: 1 addition & 1 deletion ampcombi/print_header.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

def print_header():
print("""
$$$$$$\ $$\ $$\ $$$$$$$\ $$\ $$\|
$$$$$$\ $$\ $$\ $$$$$$$\ $$\ $$\|
$$ __$$\ $$$\ $$$ |$$ __$$\ $$ | \__|
$ / $$ |$$$$\ $$$$ |$$ | $$ | $$$$$$$\ $$$$$$\ $$$$$$\$$$$\ $$$$$$$\ $$\
$$$$$$$$ |$$\$$\$$ $$ |$$$$$$$ |$$ _____|$$ __$$\ $$ _$$ _$$\ $$ __$$\ $$ |
Expand Down
42 changes: 33 additions & 9 deletions ampcombi/reformat_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import pandas as pd
from Bio import SeqIO
import os

#########################################
# FUNCTION: KEEP ONLY LINES WITH KEYWORD
Expand Down Expand Up @@ -100,14 +101,37 @@ def neubi(path, p):
# AMP_hmmsearch
#########################################
def hmmsearch(path):
hmmer_dict = {'level_0':'evalue_hmmer', 'level_1':'score_hmmer', 'level_2':'bias', 'level_3':'eval_domain', 'level_4':'score_domain', 'level_5':'bias_domain', 'level_6':'exp_dom', '-------':'N_dom', '------':'contig_id'}
hmmer_df = pd.read_table(path, delim_whitespace=True, header=[15]).reset_index().rename(columns=hmmer_dict)
hmmer_df = hmmer_df.drop(hmmer_df.iloc[:,9:17], axis=1) #drop unnecessary columns
for index, row in hmmer_df.iterrows():
if (row.str.contains('Domain').any()): #identify index of first row with 'Domain'
i = index
break
hmmer_df = hmmer_df[hmmer_df.index<i] #only keep rows previous to index i
# list of words in header rows to be removed
key_words = ["# hmmsearch ::", "# HMMER ", "# Copyright (C) ", "# Freely distributed",
"# - - - ", "# query HMM file:", "# target sequence database:",
"# output directed to file:", "Query:", "Accession:",
"Description:", "Scores for complete sequences", "--- full sequence",
"# number of worker threads:", "inclusion threshold", "E-value", "-------"]
no_hits = "[No hits detected that satisfy reporting thresholds]"
hmmer_dict = {0:'evalue_hmmer', 1:'score_hmmer', 2:'bias', 3:'eval_domain', 4:'score_domain', 5:'bias_domain', 6:'exp_dom', 7:'N_dom', 8:'contig_id'}
# open the file and read line by line
with open(path, "r") as fp:
lines = fp.readlines()
# Open hmmer_tmp.txt file and only write lines not containing any of key_words
with open("hmmer_tmp.txt", "w") as fp:
for line in lines:
if not any(phrase in line for phrase in key_words):
fp.write(line)
with open('hmmer_tmp.txt') as tmp:
if no_hits in tmp.read():
print('The hmmersearch-file did not contain any hits')
hmmer_df = pd.DataFrame(columns=[val for val in hmmer_dict.values()])
else:
hmmer_df = pd.read_table("hmmer_tmp.txt", delim_whitespace=True, header=None).reset_index().rename(columns=hmmer_dict).drop(columns = [9,10,11,12,13,14,15,16]).dropna()
for index, row in hmmer_df.iterrows():
#identify the footer part of the file: index of first row with '#'
if (row.str.contains('#').any()):
i = index
break
# eliminate all rows with footer information
hmmer_df = hmmer_df[hmmer_df.index<i]
#remove the temporary file
os.remove('hmmer_tmp.txt')
return hmmer_df[['contig_id', 'evalue_hmmer']]

#########################################
Expand Down Expand Up @@ -158,7 +182,7 @@ def summary(df_list, samplename, faa_path):
#replace all NAs (where a tool did not identify the contig as AMP) with 0
merge_df = merge_df.fillna(0)
#add amino-acid sequences
faa_df = faa2table(faa_path+samplename+'.faa')
faa_df = faa2table(faa_path)
merge_df = merge_df.merge(faa_df, how='inner', on='contig_id')
# sort by sum of p-values over rows
merge_df = merge_df.set_index('contig_id')
Expand Down
2 changes: 1 addition & 1 deletion ampcombi/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = '0.1.4'
__version__ = '0.1.5'
Binary file removed dist/AMPcombi-0.1.4.tar.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name='AMPcombi',
version='0.1.4',
version='0.1.5',
author='Anan Ibrahim, Louisa Perelo',
author_email='[email protected], [email protected]',
packages=['ampcombi'],
Expand Down
105 changes: 105 additions & 0 deletions shinyapp/shinyapp_html_file.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env Rscript

##############################
# Rscript to visualise the complete summary tables generated by AMPcombi ####
##############################
# Date ####
# October, 19 2022
##############################
# Authors ####
# Anan Ibrahim
# [email protected]
# @darcy220606
##############################
# Working_directory ####
##############################
#setwd("/home/aibrahim/github/testing_ampcombi_on_deepevo")
##############################
# Libraries used + arguments ####
library(dplyr)
library(DT)
library(shiny)
library(data.table)
library(ggplot2)
library("optparse")
library(htmlwidgets)

option_list = list(
make_option(c("-f", "--file"), type="character", default=NULL,
help="AMpcombi complete summary table", metavar="character"),
make_option(c("-o", "--out"), type="character", default="AMPcombi_summary.html",
help="Provide the name of the output file [default= %default]", metavar="character")
);

opt_parser = OptionParser(option_list=option_list);
opt = parse_args(opt_parser);

##############################
# Generate the html file ####
##############################
#args[2] = "AMPcombi_summary.html"

table <-
readr::read_csv(opt$file) %>%
unique()

about_page <- tabPanel(
title = strong('About'),
br(),
includeMarkdown("https://raw.githubusercontent.com/Darcy220606/AMPcombi/dev/README.md")
)
## page 2: Summary table
summary_page <- tabPanel(title = strong('Summary table'),
mainPanel(DTOutput('tbl'),
width = 20))
## page 3: Plots and figures
plots_page <- tabPanel(title = strong('Plots'),
titlePanel("Analysis"),
sidebarLayout(
sidebarPanel(
),
mainPanel(
tabsetPanel(
tabPanel(
title = "Plot"
),
tabPanel(
title = "Statistics",
)
)
)
))


## Shiny app
ui <- navbarPage(
title = strong("AMPcombi"),
about_page,
summary_page,
plots_page,
tags$style(type = 'text/css', '.navbar { background-color: #a2d2ff;
font-family: Arial;
font-size: 15px;
color: #023047; }')
)
server <- function(input, output)
{output$tbl = renderDT(table,
class = 'cell-border stripe', ## add column border
options = list( paging = TRUE, ## paginate the output
pageLength = 100, ## number of rows to output for each page
scrollX = TRUE, ## enable scrolling on X axis
scrollY = TRUE, ## enable scrolling on Y axis
autoWidth = TRUE, ## use smart column width handling
#width = 200,
server = TRUE, ## use client-side processing only load the 100 on display
dom = 'Bfrtip',
#bordered = TRUE,
buttons = c('csv', 'excel'), ## the user can just download what on display because server=TRUE
columnDefs = list(list(targets = '_all', className = 'dt-center'),
list(targets = c(0, 8, 9), visible = TRUE))),
extensions = 'Buttons',
selection = 'multiple', ## enable selection of a single row
filter = 'top', ## include column filters at the bottom
rownames = FALSE ## don't show row numbers/names
)}
shinyApp(ui = ui, server = server)

0 comments on commit 45f073f

Please sign in to comment.