Skip to content

Commit

Permalink
Merge pull request #8 from poseidon-framework/update_Carlhoff2021
Browse files Browse the repository at this point in the history
Update: 2021_CarlhoffNature
  • Loading branch information
TCLamnidis authored Jan 24, 2024
2 parents eb54a5d + 06cbf10 commit 27fd639
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 29 deletions.
43 changes: 40 additions & 3 deletions packages/2021_CarlhoffNature/2021_CarlhoffNature.config
Original file line number Diff line number Diff line change
@@ -1,4 +1,41 @@
// Config template version: 0.1.0dev
// Keep track of config versions
minotaur_release='0.2.1' // The release tag of the poseidon-eager repository used for processing and config file retrieval
config_template_version='0.3.0dev'
package_config_version='0.3.0dev'
minotaur_config_base="https://raw.githubusercontent.com/poseidon-framework/poseidon-eager/${minotaur_release}/conf"

// Load in main poseidon configuration file with default params and which loads other poseidon-specific profiles
includeConfig '../../conf/Poseidon.config'
// This configuration file is designed to be a used with the nf-core/eager pipeline.
// Instead of having to specify all other configurations for the Minotaur pipeline
// on runtime, they are all contained in this file and loaded automatically upon
// specifying this config file during runtime. Additionally, any parameters that
// need to be altered from the defaults can be specified here.
//
// The intention is to make it easy for users to understand and reproduce the output
// from processing with the Minotaur workflow processing from the contents of a
// single file.

// Load configuration profiles. They are loaded from the minotaur_config_base URL, main branch.
includeConfig "${minotaur_config_base}/EVA_cluster.config" // Cluster-specific configurations for nf-core/eager execution at MPI-EVA
includeConfig "${minotaur_config_base}/Minotaur.config" // Default nf-core/eager parameters for Minotaur processing.

// The following config file specifies BED files for on-target endogenous DNA calculation and mean coverage as well as pseudohaploid genotyping.
// TODO: Select the appropriate config for the CaptureType of the package.
includeConfig "${minotaur_config_base}/CaptureType_profiles/1240K.config"

params {
// Keep track of config file versions used when processing
config_profile_description = "${config_profile_description}\n - config_template_version: ${config_template_version}\n - package_config_version: ${package_config_version}"
config_profile_contact = "Thiseas C. Lamnidis (@TCLamnidis)"

/*
TODO: If you need to change any of the default processing parameters for this package
you can specify these parameters below.
Any parameters not specified in any of the config files default to their nf-core/eager default values.

For information on all available parameters and their default values see:
https://nf-co.re/eager/2.4.6/parameters

You can see the default values for parameters within poseidon-eager at:
https://github.com/poseidon-framework/poseidon-eager/blob/main/conf/Minotaur.config
*/
}
12 changes: 6 additions & 6 deletions packages/2021_CarlhoffNature/2021_CarlhoffNature.ssf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ sample_accession study_accession run_accession sample_alias poseidon_IDs udg lib
SAMEA8270508 PRJEB43715 ERR5490520 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/000/ERR5490520/ERR5490520.fastq.gz 31485654 842ec55a3a0aff4979dea1b5dc529e7c ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/000/ERR5490520/ERR5490520.fastq.gz 945210 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490520/GUP001.A0101.MT1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490521 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/001/ERR5490521/ERR5490521.fastq.gz 32643939 e4c9ae3aed81094a383c2c2003dd961f ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/001/ERR5490521/ERR5490521.fastq.gz 1040533 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490521/GUP001.A0101.MT1.2.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490522 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/002/ERR5490522/ERR5490522.fastq.gz 192243801 a4add9bc886418c913420eab0391297e ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/002/ERR5490522/ERR5490522.fastq.gz 6189559 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490522/GUP001.A0101.MT1.3.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490523 GUP001 GUP001;GUP001_SG half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 WGS fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/003/ERR5490523/ERR5490523.fastq.gz 95526960 29a36d22db98919f96d32af631a9422c ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/003/ERR5490523/ERR5490523.fastq.gz 2888875 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490523/GUP001.A0101.SG1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490524 GUP001 GUP001;GUP001_TF half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/004/ERR5490524/ERR5490524.fastq.gz 562634908 ee58126a3880e12ac25ff3c1adf97aec ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/004/ERR5490524/ERR5490524.fastq.gz 17847250 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490524/GUP001.A0101.TF1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490525 GUP001 GUP001;GUP001_TF half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/005/ERR5490525/ERR5490525.fastq.gz 1541567827 640a45bc4956b647f5578a66763813d0 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/005/ERR5490525/ERR5490525.fastq.gz 50108944 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490525/GUP001.A0101.TF1.2.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490526 GUP001 GUP001;GUP001_TF half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/006/ERR5490526/ERR5490526.fastq.gz 674880719 e58345ddc808d879d255f8d68065050d ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/006/ERR5490526/ERR5490526.fastq.gz 23288459 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490526/GUP001.A0101.TF1.3.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490523 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 WGS fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/003/ERR5490523/ERR5490523.fastq.gz 95526960 29a36d22db98919f96d32af631a9422c ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/003/ERR5490523/ERR5490523.fastq.gz 2888875 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490523/GUP001.A0101.SG1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490524 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/004/ERR5490524/ERR5490524.fastq.gz 562634908 ee58126a3880e12ac25ff3c1adf97aec ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/004/ERR5490524/ERR5490524.fastq.gz 17847250 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490524/GUP001.A0101.TF1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490525 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/005/ERR5490525/ERR5490525.fastq.gz 1541567827 640a45bc4956b647f5578a66763813d0 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/005/ERR5490525/ERR5490525.fastq.gz 50108944 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490525/GUP001.A0101.TF1.2.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490526 GUP001 GUP001 half ds ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0101 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/006/ERR5490526/ERR5490526.fastq.gz 674880719 e58345ddc808d879d255f8d68065050d ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/006/ERR5490526/ERR5490526.fastq.gz 23288459 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490526/GUP001.A0101.TF1.3.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490527 GUP001 GUP001 minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/007/ERR5490527/ERR5490527.fastq.gz 656969967 d16a8f18feef912878f0b56218581717 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/007/ERR5490527/ERR5490527.fastq.gz 21959304 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490527/GUP001.A0102.AA1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490528 GUP001 GUP001;GUP001_SG minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 WGS fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/008/ERR5490528/ERR5490528.fastq.gz 145599926 e2be513bf56c01591ecdb7dfbe55c5cb ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/008/ERR5490528/ERR5490528.fastq.gz 5086983 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490528/GUP001.A0102.SG1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490529 GUP001 GUP001;GUP001_TF minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/009/ERR5490529/ERR5490529.fastq.gz 674885210 189d55160fdfec278ddc8fba5296ad18 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/009/ERR5490529/ERR5490529.fastq.gz 23288459 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490529/GUP001.A0102.TF1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490528 GUP001 GUP001 minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 WGS fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/008/ERR5490528/ERR5490528.fastq.gz 145599926 e2be513bf56c01591ecdb7dfbe55c5cb ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/008/ERR5490528/ERR5490528.fastq.gz 5086983 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490528/GUP001.A0102.SG1.1.fastq.truncated.gz
SAMEA8270508 PRJEB43715 ERR5490529 GUP001 GUP001 minus ss ERS5956814 2021-06-18 2021-06-18 Illumina HiSeq 4000 SINGLE METAGENOMIC ILLUMINA GUP001.A0102 Targeted-Capture fasp.sra.ebi.ac.uk:/vol1/fastq/ERR549/009/ERR5490529/ERR5490529.fastq.gz 674885210 189d55160fdfec278ddc8fba5296ad18 ftp.sra.ebi.ac.uk/vol1/fastq/ERR549/009/ERR5490529/ERR5490529.fastq.gz 23288459 ftp.sra.ebi.ac.uk/vol1/run/ERR549/ERR5490529/GUP001.A0102.TF1.1.fastq.truncated.gz
28 changes: 11 additions & 17 deletions packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv
Original file line number Diff line number Diff line change
@@ -1,17 +1,11 @@
Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM
GUP001 GUP001_GUP001.A0101 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L1_R1.fastq.gz NA NA
GUP001 GUP001_GUP001.A0101 2 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L2_R1.fastq.gz NA NA
GUP001 GUP001_GUP001.A0101 3 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L3_R1.fastq.gz NA NA
GUP001 GUP001_GUP001.A0101 4 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L4_R1.fastq.gz NA NA
GUP001_SG GUP001_SG_GUP001.A0101 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_SG_GUP001.A0101_L1_R1.fastq.gz NA NA
GUP001 GUP001_GUP001.A0101 5 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L5_R1.fastq.gz NA NA
GUP001_TF GUP001_TF_GUP001.A0101 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_TF_GUP001.A0101_L1_R1.fastq.gz NA NA
GUP001 GUP001_GUP001.A0101 6 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L6_R1.fastq.gz NA NA
GUP001_TF GUP001_TF_GUP001.A0101 2 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_TF_GUP001.A0101_L2_R1.fastq.gz NA NA
GUP001 GUP001_GUP001.A0101 7 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L7_R1.fastq.gz NA NA
GUP001_TF GUP001_TF_GUP001.A0101 3 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_TF_GUP001.A0101_L3_R1.fastq.gz NA NA
GUP001_ss GUP001_ss_GUP001.A0102_ss 1 4 SE Homo sapiens (modern human) single none <PATH_TO_DATA>/GUP001_ss_GUP001.A0102_ss_L1_R1.fastq.gz NA NA
GUP001_ss GUP001_ss_GUP001.A0102_ss 2 4 SE Homo sapiens (modern human) single none <PATH_TO_DATA>/GUP001_ss_GUP001.A0102_ss_L2_R1.fastq.gz NA NA
GUP001_SG_ss GUP001_SG_ss_GUP001.A0102_ss 1 4 SE Homo sapiens (modern human) single none <PATH_TO_DATA>/GUP001_SG_ss_GUP001.A0102_ss_L1_R1.fastq.gz NA NA
GUP001_ss GUP001_ss_GUP001.A0102_ss 3 4 SE Homo sapiens (modern human) single none <PATH_TO_DATA>/GUP001_ss_GUP001.A0102_ss_L3_R1.fastq.gz NA NA
GUP001_TF_ss GUP001_TF_ss_GUP001.A0102_ss 1 4 SE Homo sapiens (modern human) single none <PATH_TO_DATA>/GUP001_TF_ss_GUP001.A0102_ss_L1_R1.fastq.gz NA NA
Sample_Name Library_ID Lane Colour_Chemistry SeqType Organism Strandedness UDG_Treatment R1 R2 BAM R1_target_file R2_target_file
GUP001 GUP001_GUP001.A0101 1 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L1_R1.fastq.gz NA NA ERR5490520.fastq.gz NA
GUP001 GUP001_GUP001.A0101 2 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L2_R1.fastq.gz NA NA ERR5490521.fastq.gz NA
GUP001 GUP001_GUP001.A0101 3 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L3_R1.fastq.gz NA NA ERR5490522.fastq.gz NA
GUP001 GUP001_GUP001.A0101 4 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L4_R1.fastq.gz NA NA ERR5490523.fastq.gz NA
GUP001 GUP001_GUP001.A0101 5 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L5_R1.fastq.gz NA NA ERR5490524.fastq.gz NA
GUP001 GUP001_GUP001.A0101 6 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L6_R1.fastq.gz NA NA ERR5490525.fastq.gz NA
GUP001 GUP001_GUP001.A0101 7 4 SE Homo sapiens (modern human) double half <PATH_TO_DATA>/GUP001_GUP001.A0101_L7_R1.fastq.gz NA NA ERR5490526.fastq.gz NA
GUP001_ss GUP001_ss_GUP001.A0102_ss 1 4 SE Homo sapiens (modern human) single none <PATH_TO_DATA>/GUP001_ss_GUP001.A0102_ss_L1_R1.fastq.gz NA NA ERR5490527.fastq.gz NA
GUP001_ss GUP001_ss_GUP001.A0102_ss 2 4 SE Homo sapiens (modern human) single none <PATH_TO_DATA>/GUP001_ss_GUP001.A0102_ss_L2_R1.fastq.gz NA NA ERR5490528.fastq.gz NA
GUP001_ss GUP001_ss_GUP001.A0102_ss 3 4 SE Homo sapiens (modern human) single none <PATH_TO_DATA>/GUP001_ss_GUP001.A0102_ss_L3_R1.fastq.gz NA NA ERR5490529.fastq.gz NA
28 changes: 26 additions & 2 deletions packages/2021_CarlhoffNature/2021_CarlhoffNature.tsv_patch.sh
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env bash
set -uo pipefail ## Pipefail, complain on new unassigned variables.

## Track the version of the TSV_patch template used
VERSION='0.1.0dev'
VERSION='0.2.0dev'

## This script is applied to the eager input TSV file locally to edit the dummy
## path to the fastQ files added by `create_eager_input.sh` to a real local
Expand All @@ -14,8 +15,31 @@ VERSION='0.1.0dev'
local_data_dir="$(readlink -f ${1})"
input_tsv="$(readlink -f ${2})"
output_tsv="$(dirname ${local_data_dir})/$(basename -s ".tsv" ${input_tsv}).finalised.tsv"
columns_to_keep=("Sample_Name" "Library_ID" "Lane" "Colour_Chemistry" "SeqType" "Organism" "Strandedness" "UDG_Treatment" "R1" "R2" "BAM")
source $(dirname ${2})/../../scripts/source_me.sh ## Load helper functions

sed -e "s|<PATH_TO_DATA>|${local_data_dir}|g" ${input_tsv} > ${output_tsv}
## Index non-proliferated columns and exclude them from the finalised TSV
cut_selector=''
tsv_header=($(head -n1 ${input_tsv}))
for col_name in ${columns_to_keep[@]}; do
let idx=$(get_index_of ${col_name} "${columns_to_keep[@]}")+1 ## awk uses 1-based indexing
if [[ ! ${idx} -eq -1 ]]; then
cut_selector+="${idx},"
fi
done

## Remove added columns, and put columns in right order
cut -f ${cut_selector%,} ${input_tsv} > ${output_tsv}
sed -i -e "s|<PATH_TO_DATA>|${local_data_dir}|g" ${output_tsv}

## Any further commands to edit the file before finalisation should be added below as shown
# sed -ie 's/replace_this/with_this/g' ${output_tsv}

## Keep track of versions
version_file="$(dirname ${input_tsv})/script_versions.txt"
## Remove versions from older run if there
grep -v -F -e "$(basename ${0})" -e "source_me.sh for final TSV" ${version_file} >${version_file}.new
## Then add new versions
echo -e "$(basename ${0}):\t${VERSION}" >> ${version_file}.new
echo -e "source_me.sh for final TSV:\t${HELPER_FUNCTION_VERSION}" >>${version_file}.new
mv ${version_file}.new ${version_file}
3 changes: 2 additions & 1 deletion packages/2021_CarlhoffNature/script_versions.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
create_eager_input.sh: 0.1.0dev
create_eager_input.sh: 0.2.1dev
source_me.sh for initial TSV: 0.2.1dev

0 comments on commit 27fd639

Please sign in to comment.