Skip to content

Commit

Permalink
adding tower yml file
Browse files Browse the repository at this point in the history
  • Loading branch information
Merritt-Brian committed Nov 2, 2023
1 parent 9f8bed0 commit a9b3b7d
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 0 deletions.
43 changes: 43 additions & 0 deletions assets/pass.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
{
"samples": [
"longreads"
],
"tests": [
{
"id": "reference assembly",
"test": "all exists",
"regex": true,
"directory": "bcftools",
"match": ".*\\.consensus.fa$"
},
{
"id": "abundance",
"test": "ge",
"value": 10,
"regex": true,
"directory": "top",
"match": "*_mqc.tsv"
},
{
"id": "meandepth",
"test": "ge",
"regex": true,
"directory": "samtools",
"value": 0.02,
"column": "meandepth",
"match": ".*\\.txt$"
},
{
"id": "coverage",
"test": "ge",
"directory": "samtools",
"value": 0.06,
"regex": true,
"coverage": "coverage",
"match": ".*\\.txt$"

}

]

}
103 changes: 103 additions & 0 deletions bin/make_metadata_file.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
import argparse
import os
import json
import glob
import re
import pandas as pd
parser = argparse.ArgumentParser()
parser.add_argument("-i",
required = True,
help="Input Samplesheet filename")
parser.add_argument(
"-t",
type=str,
required=False,
default=None,
help="Where is the top hits report? It is a directory of files with top_report.tsv with samplename in the front"
)
parser.add_argument(
"-c",
type=str,
required=True,
help="Where is the configuration file? it is a json that contains the publish directories and filenames for each test"
)
parser.add_argument(
"-w",
type=str,
required=True,
help="Where is the workding directory where your outputs are? "
)
# This script works at the species level to calls tests on each species/taxid associated with a samplesheet list of samples.
# it reqres a samplesheet and a taxtriage working outdir with identifiable structures according to the -c argument (json).
# see assets/pass.json for an example format of the json file on the simulated test data used in taxtriage's test profile



args = parser.parse_args()

file = args.i
# tests = args.t



def val_test(config):
match = config['match']
sample = config['sample']
regex = config['regex']
directory = config['directory']
value = config['value']
test = config['test']
# if regex is true then check the -w directory for the file using the match. should return a file
if regex:
filename = os.path.join(args.w, directory, sample+match)
# convert filename to regex format
# use glob to find the file

filenames = glob.glob(filename)
# if the file is not found then return false
if len(filenames) == 0:
print("File not found, "+filename)
return False
else:
filename = filenames[0]
else:
filename = os.path.join(args.w, directory, match)
# # read the file into a dataframe
df = pd.read_csv(filename, sep="\t")
# file all df on rank == S and abundance greater or equal to value
df = df[df['rank'].str.contains("^S$")]
df = df[df['abundance'] >= value]

# top_hits = os.path.join(top, sample + ".top_report.tsv")
# # read top_hits into a dataframe
# df_top = pd.read_csv(top_hits, sep="\t")
# # sort the dataframe by the abundance column and only select those with rank contains S and an optional number like S, S1, S4, etc.
# df_top = df_top[df_top['rank'].str.contains("^S$")]
# # sort on abundance
# df_top = df_top.sort_values(by=['abundance'], ascending=False)
# # For each sample loop through the taxids and check each of the tests defined as functions. Tests are defined in the config dict imported



def __main__():
top = args.t
working_dir = args.w
config = args.c

if not top :
print("No top hits file provided, opting for default location at working directory / top / samplename.tsv")
top = os.path.join(working_dir, "top")
df = pd.read_csv(file, sep=",")
samplenames = df['sample']
data = df.to_dict(orient='records')
# import the config -c argument as a dict
with open(config) as f:
config = json.load(f)
for i in data:
sample = i['sample']
conf_one = next(test for test in config['tests'] if test['id'] == 'abundance')
conf_one['sample'] = sample
val_test(conf_one)

return
__main__()
13 changes: 13 additions & 0 deletions tower.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
reports:
multiqc_report.html:
display: "MultiQC HTML report"
"**/convert/*merged.tsv":
display: "Confidence Table"
"**/krona/*.html":
display: "Krona Plots"
"**/fastqc/*_fastqc.html":
display: "FASTQC output (Illumina Only)"
"**/nanoplot/*_NanoPlot-report.html":
display: "Nanoplot output (Nanopore Only)"
"**/fastp/*.fastp.html":
display: "fastp output (QC Trimming)"

0 comments on commit a9b3b7d

Please sign in to comment.