-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
9f8bed0
commit a9b3b7d
Showing
3 changed files
with
159 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
{ | ||
"samples": [ | ||
"longreads" | ||
], | ||
"tests": [ | ||
{ | ||
"id": "reference assembly", | ||
"test": "all exists", | ||
"regex": true, | ||
"directory": "bcftools", | ||
"match": ".*\\.consensus.fa$" | ||
}, | ||
{ | ||
"id": "abundance", | ||
"test": "ge", | ||
"value": 10, | ||
"regex": true, | ||
"directory": "top", | ||
"match": "*_mqc.tsv" | ||
}, | ||
{ | ||
"id": "meandepth", | ||
"test": "ge", | ||
"regex": true, | ||
"directory": "samtools", | ||
"value": 0.02, | ||
"column": "meandepth", | ||
"match": ".*\\.txt$" | ||
}, | ||
{ | ||
"id": "coverage", | ||
"test": "ge", | ||
"directory": "samtools", | ||
"value": 0.06, | ||
"regex": true, | ||
"coverage": "coverage", | ||
"match": ".*\\.txt$" | ||
|
||
} | ||
|
||
] | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,103 @@ | ||
import argparse | ||
import os | ||
import json | ||
import glob | ||
import re | ||
import pandas as pd | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-i", | ||
required = True, | ||
help="Input Samplesheet filename") | ||
parser.add_argument( | ||
"-t", | ||
type=str, | ||
required=False, | ||
default=None, | ||
help="Where is the top hits report? It is a directory of files with top_report.tsv with samplename in the front" | ||
) | ||
parser.add_argument( | ||
"-c", | ||
type=str, | ||
required=True, | ||
help="Where is the configuration file? it is a json that contains the publish directories and filenames for each test" | ||
) | ||
parser.add_argument( | ||
"-w", | ||
type=str, | ||
required=True, | ||
help="Where is the workding directory where your outputs are? " | ||
) | ||
# This script works at the species level to calls tests on each species/taxid associated with a samplesheet list of samples. | ||
# it reqres a samplesheet and a taxtriage working outdir with identifiable structures according to the -c argument (json). | ||
# see assets/pass.json for an example format of the json file on the simulated test data used in taxtriage's test profile | ||
|
||
|
||
|
||
args = parser.parse_args() | ||
|
||
file = args.i | ||
# tests = args.t | ||
|
||
|
||
|
||
def val_test(config): | ||
match = config['match'] | ||
sample = config['sample'] | ||
regex = config['regex'] | ||
directory = config['directory'] | ||
value = config['value'] | ||
test = config['test'] | ||
# if regex is true then check the -w directory for the file using the match. should return a file | ||
if regex: | ||
filename = os.path.join(args.w, directory, sample+match) | ||
# convert filename to regex format | ||
# use glob to find the file | ||
|
||
filenames = glob.glob(filename) | ||
# if the file is not found then return false | ||
if len(filenames) == 0: | ||
print("File not found, "+filename) | ||
return False | ||
else: | ||
filename = filenames[0] | ||
else: | ||
filename = os.path.join(args.w, directory, match) | ||
# # read the file into a dataframe | ||
df = pd.read_csv(filename, sep="\t") | ||
# file all df on rank == S and abundance greater or equal to value | ||
df = df[df['rank'].str.contains("^S$")] | ||
df = df[df['abundance'] >= value] | ||
|
||
# top_hits = os.path.join(top, sample + ".top_report.tsv") | ||
# # read top_hits into a dataframe | ||
# df_top = pd.read_csv(top_hits, sep="\t") | ||
# # sort the dataframe by the abundance column and only select those with rank contains S and an optional number like S, S1, S4, etc. | ||
# df_top = df_top[df_top['rank'].str.contains("^S$")] | ||
# # sort on abundance | ||
# df_top = df_top.sort_values(by=['abundance'], ascending=False) | ||
# # For each sample loop through the taxids and check each of the tests defined as functions. Tests are defined in the config dict imported | ||
|
||
|
||
|
||
def __main__(): | ||
top = args.t | ||
working_dir = args.w | ||
config = args.c | ||
|
||
if not top : | ||
print("No top hits file provided, opting for default location at working directory / top / samplename.tsv") | ||
top = os.path.join(working_dir, "top") | ||
df = pd.read_csv(file, sep=",") | ||
samplenames = df['sample'] | ||
data = df.to_dict(orient='records') | ||
# import the config -c argument as a dict | ||
with open(config) as f: | ||
config = json.load(f) | ||
for i in data: | ||
sample = i['sample'] | ||
conf_one = next(test for test in config['tests'] if test['id'] == 'abundance') | ||
conf_one['sample'] = sample | ||
val_test(conf_one) | ||
|
||
return | ||
__main__() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
reports: | ||
multiqc_report.html: | ||
display: "MultiQC HTML report" | ||
"**/convert/*merged.tsv": | ||
display: "Confidence Table" | ||
"**/krona/*.html": | ||
display: "Krona Plots" | ||
"**/fastqc/*_fastqc.html": | ||
display: "FASTQC output (Illumina Only)" | ||
"**/nanoplot/*_NanoPlot-report.html": | ||
display: "Nanoplot output (Nanopore Only)" | ||
"**/fastp/*.fastp.html": | ||
display: "fastp output (QC Trimming)" |