diff --git a/packages/2021_CarlhoffNature/2021_CarlhoffNature.config b/packages/2021_CarlhoffNature/2021_CarlhoffNature.config index 010675a..57c4b92 100644 --- a/packages/2021_CarlhoffNature/2021_CarlhoffNature.config +++ b/packages/2021_CarlhoffNature/2021_CarlhoffNature.config @@ -1,4 +1,41 @@ -// Config template version: 0.1.0dev +// Keep track of config versions +minotaur_release='0.2.1' // The release tag of the poseidon-eager repository used for processing and config file retrieval +config_template_version='0.3.0dev' +package_config_version='0.3.0dev' +minotaur_config_base="https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/${minotaur_release}/conf" -// Load in main poseidon configuration file with default params and which loads other poseidon-specific profiles -includeConfig '../../conf/Poseidon.config' +// This configuration file is designed to be a used with the nf-core/eager pipeline. +// Instead of having to specify all other configurations for the Minotaur pipeline +// on runtime, they are all contained in this file and loaded automatically upon +// specifying this config file during runtime. Additionally, any parameters that +// need to be altered from the defaults can be specified here. +// +// The intention is to make it easy for users to understand and reproduce the output +// from processing with the Minotaur workflow processing from the contents of a +// single file. + +// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch. +includeConfig "${minotaur_config_base}/EVA_cluster.config" // Cluster-specific configurations for nf-core/eager execution at MPI-EVA +includeConfig "${minotaur_config_base}/Minotaur.config" // Default nf-core/eager parameters for Minotaur processing. + +// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping. +// TODO: Select the appropriate config for the CaptureType of the package. +includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config" + +params { + // Keep track of config file versions used when processing + config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}" + config_profile_contact = "Thiseas C. Lamnidis (@TCLamnidis)" + + /* + TODO: If you need to change any of the default processing parameters for this package + you can specify these parameters below. + Any parameters not specified in any of the config files default to their nf-core/eager default values. + + For information on all available parameters and their default values see: + https://nf-co.re/eager/2.4.6/parameters + + You can see the default values for parameters within poseidon-eager at: + https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config + */ +} diff --git a/packages/2021_CarlhoffNature/2021_CarlhoffNature.ssf b/packages/2021_CarlhoffNature/2021_CarlhoffNature.ssf index 2a38131..28e680a 100644 --- a/packages/2021_CarlhoffNature/2021_CarlhoffNature.ssf +++ b/packages/2021_CarlhoffNature/2021_CarlhoffNature.ssf @@ -2,10 +2,10 @@ sample_accession study_accession run_accession sample_alias poseidon_IDs udg lib SAMEA8270508 PRJEB43715 ERR5490520 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/000/ERR5490520/ERR5490520.fastq.gz 31485654 842ec55a3a0aff4979dea1b5dc529e7c ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/000/ERR5490520/ERR5490520.fastq.gz 945210 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490520/GUP001.A0101.MT1.1.fastq.truncated.gz SAMEA8270508 PRJEB43715 ERR5490521 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/001/ERR5490521/ERR5490521.fastq.gz 32643939 e4c9ae3aed81094a383c2c2003dd961f ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/001/ERR5490521/ERR5490521.fastq.gz 1040533 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490521/GUP001.A0101.MT1.2.fastq.truncated.gz SAMEA8270508 PRJEB43715 ERR5490522 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/002/ERR5490522/ERR5490522.fastq.gz 192243801 a4add9bc886418c913420eab0391297e ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/002/ERR5490522/ERR5490522.fastq.gz 6189559 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490522/GUP001.A0101.MT1.3.fastq.truncated.gz -SAMEA8270508 PRJEB43715 ERR5490523 GUP001 GUP001;GUP001_SG half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 WGS fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/003/ERR5490523/ERR5490523.fastq.gz 95526960 29a36d22db98919f96d32af631a9422c ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/003/ERR5490523/ERR5490523.fastq.gz 2888875 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490523/GUP001.A0101.SG1.1.fastq.truncated.gz -SAMEA8270508 PRJEB43715 ERR5490524 GUP001 GUP001;GUP001_TF half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/004/ERR5490524/ERR5490524.fastq.gz 562634908 ee58126a3880e12ac25ff3c1adf97aec ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/004/ERR5490524/ERR5490524.fastq.gz 17847250 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490524/GUP001.A0101.TF1.1.fastq.truncated.gz -SAMEA8270508 PRJEB43715 ERR5490525 GUP001 GUP001;GUP001_TF half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/005/ERR5490525/ERR5490525.fastq.gz 1541567827 640a45bc4956b647f5578a66763813d0 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/005/ERR5490525/ERR5490525.fastq.gz 50108944 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490525/GUP001.A0101.TF1.2.fastq.truncated.gz -SAMEA8270508 PRJEB43715 ERR5490526 GUP001 GUP001;GUP001_TF half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/006/ERR5490526/ERR5490526.fastq.gz 674880719 e58345ddc808d879d255f8d68065050d ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/006/ERR5490526/ERR5490526.fastq.gz 23288459 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490526/GUP001.A0101.TF1.3.fastq.truncated.gz +SAMEA8270508 PRJEB43715 ERR5490523 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 WGS fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/003/ERR5490523/ERR5490523.fastq.gz 95526960 29a36d22db98919f96d32af631a9422c ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/003/ERR5490523/ERR5490523.fastq.gz 2888875 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490523/GUP001.A0101.SG1.1.fastq.truncated.gz +SAMEA8270508 PRJEB43715 ERR5490524 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/004/ERR5490524/ERR5490524.fastq.gz 562634908 ee58126a3880e12ac25ff3c1adf97aec ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/004/ERR5490524/ERR5490524.fastq.gz 17847250 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490524/GUP001.A0101.TF1.1.fastq.truncated.gz +SAMEA8270508 PRJEB43715 ERR5490525 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/005/ERR5490525/ERR5490525.fastq.gz 1541567827 640a45bc4956b647f5578a66763813d0 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/005/ERR5490525/ERR5490525.fastq.gz 50108944 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490525/GUP001.A0101.TF1.2.fastq.truncated.gz +SAMEA8270508 PRJEB43715 ERR5490526 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/006/ERR5490526/ERR5490526.fastq.gz 674880719 e58345ddc808d879d255f8d68065050d ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/006/ERR5490526/ERR5490526.fastq.gz 23288459 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490526/GUP001.A0101.TF1.3.fastq.truncated.gz SAMEA8270508 PRJEB43715 ERR5490527 GUP001 GUP001 minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/007/ERR5490527/ERR5490527.fastq.gz 656969967 d16a8f18feef912878f0b56218581717 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/007/ERR5490527/ERR5490527.fastq.gz 21959304 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490527/GUP001.A0102.AA1.1.fastq.truncated.gz -SAMEA8270508 PRJEB43715 ERR5490528 GUP001 GUP001;GUP001_SG minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 WGS fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/008/ERR5490528/ERR5490528.fastq.gz 145599926 e2be513bf56c01591ecdb7dfbe55c5cb ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/008/ERR5490528/ERR5490528.fastq.gz 5086983 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490528/GUP001.A0102.SG1.1.fastq.truncated.gz -SAMEA8270508 PRJEB43715 ERR5490529 GUP001 GUP001;GUP001_TF minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/009/ERR5490529/ERR5490529.fastq.gz 674885210 189d55160fdfec278ddc8fba5296ad18 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/009/ERR5490529/ERR5490529.fastq.gz 23288459 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490529/GUP001.A0102.TF1.1.fastq.truncated.gz +SAMEA8270508 PRJEB43715 ERR5490528 GUP001 GUP001 minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 WGS fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/008/ERR5490528/ERR5490528.fastq.gz 145599926 e2be513bf56c01591ecdb7dfbe55c5cb ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/008/ERR5490528/ERR5490528.fastq.gz 5086983 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490528/GUP001.A0102.SG1.1.fastq.truncated.gz +SAMEA8270508 PRJEB43715 ERR5490529 GUP001 GUP001 minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/009/ERR5490529/ERR5490529.fastq.gz 674885210 189d55160fdfec278ddc8fba5296ad18 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/009/ERR5490529/ERR5490529.fastq.gz 23288459 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490529/GUP001.A0102.TF1.1.fastq.truncated.gz diff --git a/packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv b/packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv index 19fa9dc..c73e2e3 100644 --- a/packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv +++ b/packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv @@ -1,17 +1,11 @@ -Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM -GUP001 GUP001_GUP001.A0101 1 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L1_R1.fastq.gz NA NA -GUP001 GUP001_GUP001.A0101 2 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L2_R1.fastq.gz NA NA -GUP001 GUP001_GUP001.A0101 3 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L3_R1.fastq.gz NA NA -GUP001 GUP001_GUP001.A0101 4 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L4_R1.fastq.gz NA NA -GUP001_SG GUP001_SG_GUP001.A0101 1 4 SE Homo sapiens (modern human) double half /GUP001_SG_GUP001.A0101_L1_R1.fastq.gz NA NA -GUP001 GUP001_GUP001.A0101 5 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L5_R1.fastq.gz NA NA -GUP001_TF GUP001_TF_GUP001.A0101 1 4 SE Homo sapiens (modern human) double half /GUP001_TF_GUP001.A0101_L1_R1.fastq.gz NA NA -GUP001 GUP001_GUP001.A0101 6 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L6_R1.fastq.gz NA NA -GUP001_TF GUP001_TF_GUP001.A0101 2 4 SE Homo sapiens (modern human) double half /GUP001_TF_GUP001.A0101_L2_R1.fastq.gz NA NA -GUP001 GUP001_GUP001.A0101 7 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L7_R1.fastq.gz NA NA -GUP001_TF GUP001_TF_GUP001.A0101 3 4 SE Homo sapiens (modern human) double half /GUP001_TF_GUP001.A0101_L3_R1.fastq.gz NA NA -GUP001_ss GUP001_ss_GUP001.A0102_ss 1 4 SE Homo sapiens (modern human) single none /GUP001_ss_GUP001.A0102_ss_L1_R1.fastq.gz NA NA -GUP001_ss GUP001_ss_GUP001.A0102_ss 2 4 SE Homo sapiens (modern human) single none /GUP001_ss_GUP001.A0102_ss_L2_R1.fastq.gz NA NA -GUP001_SG_ss GUP001_SG_ss_GUP001.A0102_ss 1 4 SE Homo sapiens (modern human) single none /GUP001_SG_ss_GUP001.A0102_ss_L1_R1.fastq.gz NA NA -GUP001_ss GUP001_ss_GUP001.A0102_ss 3 4 SE Homo sapiens (modern human) single none /GUP001_ss_GUP001.A0102_ss_L3_R1.fastq.gz NA NA -GUP001_TF_ss GUP001_TF_ss_GUP001.A0102_ss 1 4 SE Homo sapiens (modern human) single none /GUP001_TF_ss_GUP001.A0102_ss_L1_R1.fastq.gz NA NA +Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM R1_target_file R2_target_file +GUP001 GUP001_GUP001.A0101 1 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L1_R1.fastq.gz NA NA ERR5490520.fastq.gz NA +GUP001 GUP001_GUP001.A0101 2 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L2_R1.fastq.gz NA NA ERR5490521.fastq.gz NA +GUP001 GUP001_GUP001.A0101 3 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L3_R1.fastq.gz NA NA ERR5490522.fastq.gz NA +GUP001 GUP001_GUP001.A0101 4 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L4_R1.fastq.gz NA NA ERR5490523.fastq.gz NA +GUP001 GUP001_GUP001.A0101 5 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L5_R1.fastq.gz NA NA ERR5490524.fastq.gz NA +GUP001 GUP001_GUP001.A0101 6 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L6_R1.fastq.gz NA NA ERR5490525.fastq.gz NA +GUP001 GUP001_GUP001.A0101 7 4 SE Homo sapiens (modern human) double half /GUP001_GUP001.A0101_L7_R1.fastq.gz NA NA ERR5490526.fastq.gz NA +GUP001_ss GUP001_ss_GUP001.A0102_ss 1 4 SE Homo sapiens (modern human) single none /GUP001_ss_GUP001.A0102_ss_L1_R1.fastq.gz NA NA ERR5490527.fastq.gz NA +GUP001_ss GUP001_ss_GUP001.A0102_ss 2 4 SE Homo sapiens (modern human) single none /GUP001_ss_GUP001.A0102_ss_L2_R1.fastq.gz NA NA ERR5490528.fastq.gz NA +GUP001_ss GUP001_ss_GUP001.A0102_ss 3 4 SE Homo sapiens (modern human) single none /GUP001_ss_GUP001.A0102_ss_L3_R1.fastq.gz NA NA ERR5490529.fastq.gz NA diff --git a/packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv_patch.sh b/packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv_patch.sh old mode 100644 new mode 100755 index 2fbb539..422e836 --- a/packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv_patch.sh +++ b/packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv_patch.sh @@ -1,7 +1,8 @@ #!/usr/bin/env bash +set -uo pipefail ## Pipefail, complain on new unassigned variables. ## Track the version of the TSV_patch template used -VERSION='0.1.0dev' +VERSION='0.2.0dev' ## This script is applied to the eager input TSV file locally to edit the dummy ## path to the fastQ files added by `create_eager_input.sh` to a real local @@ -14,8 +15,31 @@ VERSION='0.1.0dev' local_data_dir="$(readlink -f ${1})" input_tsv="$(readlink -f ${2})" output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv" +columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM") +source $(dirname ${2})/../../scripts/source_me.sh ## Load helper functions -sed -e "s||${local_data_dir}|g" ${input_tsv} > ${output_tsv} +## Index non-proliferated columns and exclude them from the finalised TSV +cut_selector='' +tsv_header=($(head -n1 ${input_tsv})) +for col_name in ${columns_to_keep[@]}; do + let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing + if [[ ! ${idx} -eq -1 ]]; then + cut_selector+="${idx}," + fi +done + +## Remove added columns, and put columns in right order +cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv} +sed -i -e "s||${local_data_dir}|g" ${output_tsv} ## Any further commands to edit the file before finalisation should be added below as shown # sed -ie 's/replace_this/with_this/g' ${output_tsv} + +## Keep track of versions +version_file="$(dirname ${input_tsv})/script_versions.txt" +## Remove versions from older run if there +grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new +## Then add new versions +echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new +echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new +mv ${version_file}.new ${version_file} diff --git a/packages/2021_CarlhoffNature/script_versions.txt b/packages/2021_CarlhoffNature/script_versions.txt index 465b755..be6857b 100644 --- a/packages/2021_CarlhoffNature/script_versions.txt +++ b/packages/2021_CarlhoffNature/script_versions.txt @@ -1 +1,2 @@ -create_eager_input.sh: 0.1.0dev +create_eager_input.sh: 0.2.1dev +source_me.sh for initial TSV: 0.2.1dev