Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Patch release PR: 2.5.4 #585

Merged
merged 16 commits into from
Feb 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,22 @@
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## 2.5.4 - [2024-02-12]

### `Added`

### `Changed`

- [#581](https://github.com/nf-core/mag/pull/581) - Added explicit licence text to headers of all custom scripts (reported by @FriederikeHanssen and @maxibor, fix by @jfy133)

### `Fixed`

- [#583](https://github.com/nf-core/mag/pull/583) - Fix GTDB database input when directory supplied (fix by @jfy133)

### `Dependencies`

### `Deprecated`

## 2.5.3 - [2024-02-05]

### `Added`
Expand Down
4 changes: 2 additions & 2 deletions assets/multiqc_config.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
report_comment: >
This report has been generated by the <a href="https://github.com/nf-core/mag/releases/tag/2.5.3" target="_blank">nf-core/mag</a>
This report has been generated by the <a href="https://github.com/nf-core/mag/releases/tag/2.5.4" target="_blank">nf-core/mag</a>
analysis pipeline. For information about how to interpret these results, please see the
<a href="https://nf-co.re/mag/2.5.3/docs/output" target="_blank">documentation</a>.
<a href="https://nf-co.re/mag/2.5.4/docs/output" target="_blank">documentation</a>.
report_section_order:
"nf-core-mag-methods-description":
order: -1000
Expand Down
82 changes: 67 additions & 15 deletions bin/combine_tables.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
#!/usr/bin/env python

## Originally written by Daniel Straub and Sabrina Krakau and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.


import sys
import argparse
import os.path
Expand All @@ -8,11 +12,25 @@

def parse_args(args=None):
parser = argparse.ArgumentParser()
parser.add_argument("-d", "--depths_summary", required=True, metavar="FILE", help="Bin depths summary file.")
parser.add_argument("-b", "--busco_summary", metavar="FILE", help="BUSCO summary file.")
parser.add_argument("-c", "--checkm_summary", metavar="FILE", help="CheckM summary file.")
parser.add_argument("-q", "--quast_summary", metavar="FILE", help="QUAST BINS summary file.")
parser.add_argument("-g", "--gtdbtk_summary", metavar="FILE", help="GTDB-Tk summary file.")
parser.add_argument(
"-d",
"--depths_summary",
required=True,
metavar="FILE",
help="Bin depths summary file.",
)
parser.add_argument(
"-b", "--busco_summary", metavar="FILE", help="BUSCO summary file."
)
parser.add_argument(
"-c", "--checkm_summary", metavar="FILE", help="CheckM summary file."
)
parser.add_argument(
"-q", "--quast_summary", metavar="FILE", help="QUAST BINS summary file."
)
parser.add_argument(
"-g", "--gtdbtk_summary", metavar="FILE", help="GTDB-Tk summary file."
)
parser.add_argument("-a", "--cat_summary", metavar="FILE", help="CAT table file.")
parser.add_argument(
"-o",
Expand Down Expand Up @@ -45,7 +63,14 @@ def parse_cat_table(cat_table):
for line in f:
maxcol = max(maxcol, len(line.split("\t")))

header = ["bin", "classification", "reason", "lineage", "lineage scores", "full lineage names"]
header = [
"bin",
"classification",
"reason",
"lineage",
"lineage scores",
"full lineage names",
]

df = pd.read_table(
cat_table,
Expand All @@ -55,7 +80,11 @@ def parse_cat_table(cat_table):
skiprows=1,
)
# merge all rank columns into a single column
df["CAT_rank"] = df.filter(regex="rank_\d+").apply(lambda x: ";".join(x.dropna()), axis=1).str.lstrip()
df["CAT_rank"] = (
df.filter(regex="rank_\d+")
.apply(lambda x: ";".join(x.dropna()), axis=1)
.str.lstrip()
)
# remove rank_* columns
df.drop(df.filter(regex="rank_\d+").columns, axis=1, inplace=True)

Expand All @@ -65,21 +94,34 @@ def parse_cat_table(cat_table):
def main(args=None):
args = parse_args(args)

if not args.busco_summary and not args.checkm_summary and not args.quast_summary and not args.gtdbtk_summary:
sys.exit("No summary specified! Please specify at least BUSCO, CheckM or QUAST summary.")
if (
not args.busco_summary
and not args.checkm_summary
and not args.quast_summary
and not args.gtdbtk_summary
):
sys.exit(
"No summary specified! Please specify at least BUSCO, CheckM or QUAST summary."
)

# GTDB-Tk can only be run in combination with BUSCO or CheckM
if args.gtdbtk_summary and not (args.busco_summary or args.checkm_summary):
sys.exit("Invalid parameter combination: GTDB-TK summary specified, but no BUSCO or CheckM summary!")
sys.exit(
"Invalid parameter combination: GTDB-TK summary specified, but no BUSCO or CheckM summary!"
)

# handle bin depths
results = pd.read_csv(args.depths_summary, sep="\t")
results.columns = ["Depth " + str(col) if col != "bin" else col for col in results.columns]
results.columns = [
"Depth " + str(col) if col != "bin" else col for col in results.columns
]
bins = results["bin"].sort_values().reset_index(drop=True)

if args.busco_summary:
busco_results = pd.read_csv(args.busco_summary, sep="\t")
if not bins.equals(busco_results["GenomeBin"].sort_values().reset_index(drop=True)):
if not bins.equals(
busco_results["GenomeBin"].sort_values().reset_index(drop=True)
):
sys.exit("Bins in BUSCO summary do not match bins in bin depths summary!")
results = pd.merge(
results, busco_results, left_on="bin", right_on="GenomeBin", how="outer"
Expand Down Expand Up @@ -107,7 +149,9 @@ def main(args=None):
]
checkm_results = pd.read_csv(args.checkm_summary, usecols=use_columns, sep="\t")
checkm_results["Bin Id"] = checkm_results["Bin Id"] + ".fa"
if not bins.equals(checkm_results["Bin Id"].sort_values().reset_index(drop=True)):
if not bins.equals(
checkm_results["Bin Id"].sort_values().reset_index(drop=True)
):
sys.exit("Bins in CheckM summary do not match bins in bin depths summary!")
results = pd.merge(
results, checkm_results, left_on="bin", right_on="Bin Id", how="outer"
Expand All @@ -116,7 +160,9 @@ def main(args=None):

if args.quast_summary:
quast_results = pd.read_csv(args.quast_summary, sep="\t")
if not bins.equals(quast_results["Assembly"].sort_values().reset_index(drop=True)):
if not bins.equals(
quast_results["Assembly"].sort_values().reset_index(drop=True)
):
sys.exit("Bins in QUAST summary do not match bins in bin depths summary!")
results = pd.merge(
results, quast_results, left_on="bin", right_on="Assembly", how="outer"
Expand All @@ -134,7 +180,13 @@ def main(args=None):
cat_results = parse_cat_table(args.cat_summary)
if len(set(cat_results["bin"].to_list()).difference(set(bins))) > 0:
sys.exit("Bins in CAT summary do not match bins in bin depths summary!")
results = pd.merge(results, cat_results[["bin", "CAT_rank"]], left_on="bin", right_on="bin", how="outer")
results = pd.merge(
results,
cat_results[["bin", "CAT_rank"]],
left_on="bin",
right_on="bin",
how="outer",
)

results.to_csv(args.out, sep="\t")

Expand Down
4 changes: 2 additions & 2 deletions bin/domain_classification.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/usr/bin/env Rscript

# Written by Jim Downie and released under the MIT license.
# See git repository (https://github.com/nf-core/mag) for full license text.
## Written by Jim Downie and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

library(optparse)
library(tidyverse)
Expand Down
15 changes: 12 additions & 3 deletions bin/filter_ssu.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python

## Originally written by Hadrien Gourlé and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

from __future__ import print_function

import os
Expand Down Expand Up @@ -28,10 +31,16 @@ def filter(args):


def main():
parser = argparse.ArgumentParser(prog="filter_ssu.py", usage="filter ssu hits from refinem")
parser = argparse.ArgumentParser(
prog="filter_ssu.py", usage="filter ssu hits from refinem"
)
parser.add_argument("--evalue", help="evalue threshold")
parser.add_argument("ssu", metavar="ssu.tsv", help="ssu tsv file generated by refinem")
parser.add_argument("output", metavar="output.tsv", default="output.tsv", help="output file name")
parser.add_argument(
"ssu", metavar="ssu.tsv", help="ssu tsv file generated by refinem"
)
parser.add_argument(
"output", metavar="output.tsv", default="output.tsv", help="output file name"
)
parser.set_defaults(func=filter)
args = parser.parse_args()

Expand Down
35 changes: 28 additions & 7 deletions bin/get_mag_depths.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python

## Originally written by Sabrina Krakau and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

import sys
import argparse
import os.path
Expand All @@ -14,7 +17,12 @@
def parse_args(args=None):
parser = argparse.ArgumentParser()
parser.add_argument(
"-b", "--bins", required=True, nargs="+", metavar="FILE", help="Bins: FASTA containing all contigs."
"-b",
"--bins",
required=True,
nargs="+",
metavar="FILE",
help="Bins: FASTA containing all contigs.",
)
parser.add_argument(
"-d",
Expand All @@ -23,9 +31,15 @@ def parse_args(args=None):
metavar="FILE",
help="(Compressed) TSV file containing contig depths for each sample: contigName, contigLen, totalAvgDepth, sample1_avgDepth, sample1_var [, sample2_avgDepth, sample2_var, ...].",
)
parser.add_argument("-a", "--assembler", required=True, type=str, help="Assembler name.")
parser.add_argument("-i", "--id", required=True, type=str, help="Sample or group id.")
parser.add_argument("-m", "--binner", required=True, type=str, help="Binning method.")
parser.add_argument(
"-a", "--assembler", required=True, type=str, help="Assembler name."
)
parser.add_argument(
"-i", "--id", required=True, type=str, help="Sample or group id."
)
parser.add_argument(
"-m", "--binner", required=True, type=str, help="Binning method."
)
return parser.parse_args(args)


Expand Down Expand Up @@ -56,7 +70,9 @@ def main(args=None):

# Initialize output files
n_samples = len(sample_names)
with open(args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "w") as outfile:
with open(
args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "w"
) as outfile:
print("bin", "\t".join(sample_names), sep="\t", file=outfile)

# for each bin, access contig depths and compute mean bin depth (for all samples)
Expand All @@ -77,10 +93,15 @@ def main(args=None):
all_depths[sample].append(contig_depths[sample])

binname = os.path.basename(file)
with open(args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "a") as outfile:
with open(
args.assembler + "-" + args.binner + "-" + args.id + "-binDepths.tsv", "a"
) as outfile:
print(
binname,
"\t".join(str(statistics.median(sample_depths)) for sample_depths in all_depths),
"\t".join(
str(statistics.median(sample_depths))
for sample_depths in all_depths
),
sep="\t",
file=outfile,
)
Expand Down
3 changes: 3 additions & 0 deletions bin/get_mag_depths_summary.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python

## Originally written by Sabrina Krakau and released under the MIT license.
## See git repository (https://github.com/nf-core/mag) for full license text.

import sys
import argparse
import pandas as pd
Expand Down
28 changes: 21 additions & 7 deletions bin/multiqc_to_custom_tsv.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
#!/usr/bin/env python
# copied from nf-core/viralrecon and adjusted

## Copied from nf-core/viralrecon and adjusted
## See git repository (https://github.com/nf-core/viralrecon) for full license text.


import os
import sys
Expand All @@ -9,9 +12,7 @@


def parse_args(args=None):
Description = (
"Create custom spreadsheet for pertinent MultiQC bowtie 2 metrics generated by the nf-core/mag pipeline."
)
Description = "Create custom spreadsheet for pertinent MultiQC bowtie 2 metrics generated by the nf-core/mag pipeline."
Epilog = "Example usage: python multiqc_to_custom_tsv.py"
parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
parser.add_argument(
Expand Down Expand Up @@ -86,7 +87,9 @@ def metrics_dict_to_file(FileFieldList, MultiQCDataDir, OutFile, se):
for yamlFile, mappingList in FileFieldList:
yamlFile = os.path.join(MultiQCDataDir, yamlFile)
if os.path.exists(yamlFile):
MetricsDict = yaml_fields_to_dict(YAMLFile=yamlFile, AppendDict=MetricsDict, FieldMappingList=mappingList)
MetricsDict = yaml_fields_to_dict(
YAMLFile=yamlFile, AppendDict=MetricsDict, FieldMappingList=mappingList
)
FieldList += [x[0] for x in mappingList]
else:
print("WARNING: File does not exist: {}".format(yamlFile))
Expand All @@ -96,7 +99,15 @@ def metrics_dict_to_file(FileFieldList, MultiQCDataDir, OutFile, se):
with open(OutFile, "w") as fout:
if se:
fout.write(
"{}\n".format("\t".join(["Sample", "SE reads not mapped (kept)", "SE reads mapped (discarded)"]))
"{}\n".format(
"\t".join(
[
"Sample",
"SE reads not mapped (kept)",
"SE reads mapped (discarded)",
]
)
)
)
else:
fout.write(
Expand All @@ -118,7 +129,10 @@ def metrics_dict_to_file(FileFieldList, MultiQCDataDir, OutFile, se):
[
k,
str(MetricsDict[k][FieldList[0]]),
str(MetricsDict[k][FieldList[1]] + MetricsDict[k][FieldList[2]]),
str(
MetricsDict[k][FieldList[1]]
+ MetricsDict[k][FieldList[2]]
),
]
)
)
Expand Down
Loading
Loading