diff --git a/.gitignore b/.gitignore index 7ec8546e42..2c63d79039 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,13 @@ -data/chr17_testdata -Annotation +Annotation/ +data/chr17_testdata/ Preprocessing/ -Reports/ References/ +Reports/ VariantCalling/ work/ .*swp .nextflow* +*.img +*.tar.gz timeline.html* trace.txt* -*.tar.gz diff --git a/.travis.yml b/.travis.yml index c28f739da0..77be1bb5c7 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,11 +11,11 @@ env: - NXF_VER=0.25.6 SGT_VER=2.3.1 PROFILE=singularity TEST=ANNOTATESNPEFF TOOL_INSTALL=all - NXF_VER=0.25.6 PROFILE=docker TEST=ANNOTATESNPEFF TOOL_INSTALL=nextflow - NXF_VER=0.25.6 PROFILE=docker TEST=ANNOTATEVEP TOOL_INSTALL=nextflow - - NXF_VER=0.25.6 PROFILE=docker TEST=BUILDCONTAINERS TOOL_INSTALL=nextflow - NXF_VER=0.25.6 SGT_VER=2.3.1 PROFILE=singularity TEST=RECALIBRATE TOOL_INSTALL=all - NXF_VER=0.25.6 PROFILE=docker TEST=RECALIBRATE TOOL_INSTALL=nextflow - NXF_VER=0.25.6 SGT_VER=2.3.1 PROFILE=singularity TEST=REALIGN TOOL_INSTALL=all - NXF_VER=0.25.6 PROFILE=docker TEST=REALIGN TOOL_INSTALL=nextflow + - NXF_VER=0.25.6 PROFILE=docker TEST=BUILDCONTAINERS TOOL_INSTALL=nextflow - NXF_VER=0.25.6 SGT_VER=2.3.1 PROFILE=singularity TEST=MAPPING TOOL_INSTALL=all - NXF_VER=0.25.6 PROFILE=docker TEST=MAPPING TOOL_INSTALL=nextflow diff --git a/README.md b/README.md index 3198ed7bab..22d449cfdf 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![caw version][version-badge]][version-link] [![License][license-badge]][license-link] [![nextflow version][nextflow-badge]][nextflow-link] [![Join the chat at https://gitter.im/SciLifeLab/CAW][gitter-badge]][gitter-link] [![Travis status][travis-badge]][travis-link] - CAW is a complete open source pipeline to detect somatic variants from WGS data developed at the [National Genomics Infastructure][ngi-link] at [SciLifeLab Stockholm][scilifelab-stockholm-link], Sweden and [National Bioinformatics Infastructure Sweden][nbis-link] at [SciLifeLab][scilifelab-link]. The pipeline uses [Nextflow][nextflow-link], a bioinformatics domain specific language for workflow building and [Singularity](http://singularity.lbl.gov/), a container technology specific for high-performance computing. diff --git a/buildContainers.nf b/buildContainers.nf index 2c19dac666..77871bb3f6 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -36,7 +36,7 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '1.2.2' +version = '1.2.3' // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented @@ -58,6 +58,22 @@ if (params.version) exit 0, versionMessage() if (!isAllowedParams(params)) exit 1, "params is unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " +// Default params: +// Such params are overridden by command line or configuration definitions + +// containerPath is empty +params.containerPath = '' +// all containers to be build +params.containers = 'all' +// Docker will not be used +params.docker = false +// Containers will not be pushed on DockerHub +params.push = false +// DockerHub repository is maxulysse +params.repository = 'maxulysse' +// Singularity will not be used +params.singularity = false + verbose = params.verbose containersList = defineContainersList() containers = params.containers.split(',').collect {it.trim()} @@ -67,7 +83,9 @@ push = params.docker && params.push ? true : false repository = params.repository tag = params.tag ? params.tag : version singularity = params.singularity ? true : false -singularityPublishDir = params.singularity && params.singularityPublishDir ? params.singularityPublishDir : "." +containerPath = params.singularity && params.containerPath ? params.containerPath : "." + +if (!docker && !singularity) exit 1, 'No builder choose, specify --docker or --singularity, see --help for more information' if (!checkContainers(containers,containersList)) exit 1, 'Unknown container(s), see --help for more information' @@ -106,7 +124,7 @@ if (verbose) dockerContainersBuilt = dockerContainersBuilt.view { process PullSingularityContainers { tag {repository + "/" + container + ":" + tag} - publishDir singularityPublishDir, mode: 'move' + publishDir containerPath, mode: 'move' input: val container from singularityContainers @@ -188,6 +206,8 @@ def checkParams(it) { 'callName', 'contact-mail', 'contactMail', + 'container-path', + 'containerPath', 'containers', 'docker', 'genome', @@ -205,9 +225,7 @@ def checkParams(it) { 'sampleDir', 'single-CPUMem', 'singleCPUMem', - 'singularity-publish-dir', 'singularity', - 'singularityPublishDir', 'step', 'tag', 'test', @@ -241,7 +259,6 @@ def defineContainersList(){ 'snpeff', 'snpeffgrch37', 'snpeffgrch38', - 'vep', 'vepgrch37', 'vepgrch38' ] @@ -258,7 +275,7 @@ def helpMessage() { log.info " Usage:" log.info " nextflow run SciLifeLab/buildContainers.nf [--docker] [--push]" log.info " [--containers ] [--singularity]" - log.info " [--singularityPublishDir ]" + log.info " [--containerPath ]" log.info " [--tag ] [--repository ]" log.info " Example:" log.info " nextflow run . --docker --containers multiqc,fastqc" @@ -275,7 +292,7 @@ def helpMessage() { log.info " --repository: Build containers under given repository" log.info " Default: maxulysse" log.info " --singularity: Build containers using Singularity" - log.info " --singularityPublishDir: Select where to download containers" + log.info " --containerPath: Select where to download containers" log.info " Default: $PWD" log.info " --tag`: Build containers using given tag" log.info " Default (version number): " + version diff --git a/buildReferences.nf b/buildReferences.nf index 950f9059c0..b2e3092d01 100644 --- a/buildReferences.nf +++ b/buildReferences.nf @@ -39,7 +39,7 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '1.2.2' +version = '1.2.3' // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented @@ -61,8 +61,14 @@ if (params.version) exit 0, versionMessage() if (!isAllowedParams(params)) exit 1, "params is unknown, see --help for more information" if (!checkUppmaxProject()) exit 1, "No UPPMAX project ID found! Use --project " +// Default params: +// Such params are overridden by command line or configuration definitions + +// No download params.download = false -params.refDir = "" +// refDir is empty +params.refDir = '' + verbose = params.verbose download = params.download ? true : false @@ -309,6 +315,8 @@ def checkParams(it) { 'callName', 'contact-mail', 'contactMail', + 'container-path', + 'containerPath', 'containers', 'docker', 'download', @@ -331,9 +339,7 @@ def checkParams(it) { 'sampleDir', 'single-CPUMem', 'singleCPUMem', - 'singularity-publish-dir', 'singularity', - 'singularityPublishDir', 'step', 'tag', 'test', diff --git a/configuration/containers.config b/configuration/containers.config new file mode 100644 index 0000000000..d8015cca13 --- /dev/null +++ b/configuration/containers.config @@ -0,0 +1,44 @@ +/* +vim: syntax=groovy +-*- mode: groovy;-*- + * ------------------------------------------------- + * Nextflow config file for CAW project + * ------------------------------------------------- + * Images for every process + * All images will be pulled automatically + * ------------------------------------------------- + */ + +process { + $BuildBWAindexes.container = "${params.repository}/caw:${params.tag}" + $BuildPicardIndex.container = "${params.repository}/picard:${params.tag}" + $BuildSAMToolsIndex.container = "${params.repository}/caw:${params.tag}" + $BuildVCFIndex.container = "${params.repository}/igvtools:${params.tag}" + $ConcatVCF.container = "${params.repository}/caw:${params.tag}" + $CreateRecalibrationTable.container = "${params.repository}/gatk:${params.tag}" + $IndelRealigner.container = "${params.repository}/gatk:${params.tag}" + $MapReads.container = "${params.repository}/caw:${params.tag}" + $MarkDuplicates.container = "${params.repository}/picard:${params.tag}" + $MergeBams.container = "${params.repository}/caw:${params.tag}" + $RealignerTargetCreator.container = "${params.repository}/gatk:${params.tag}" + $RecalibrateBam.container = "${params.repository}/gatk:${params.tag}" + $RunAlleleCount.container = "${params.repository}/runallelecount:${params.tag}" + $RunAscat.container = "${params.repository}/r-base:${params.tag}" + $RunBamQC.container = "${params.repository}/qualimap:${params.tag}" + $RunBcftoolsStats.container = "${params.repository}/caw:${params.tag}" + $RunConvertAlleleCounts.container = "${params.repository}/r-base:${params.tag}" + $RunFastQC.container = "${params.repository}/fastqc:${params.tag}" + $RunFreeBayes.container = "${params.repository}/freebayes:${params.tag}" + $RunGenotypeGVCFs.container = "${params.repository}/gatk:${params.tag}" + $RunHaplotypecaller.container = "${params.repository}/gatk:${params.tag}" + $RunManta.container = "${params.repository}/caw:${params.tag}" + $RunMultiQC.container = "${params.repository}/multiqc:${params.tag}" + $RunMutect1.container = "${params.repository}/mutect1:${params.tag}" + $RunMutect2.container = "${params.repository}/gatk:${params.tag}" + $RunSamtoolsStats.container = "${params.repository}/caw:${params.tag}" + $RunSingleManta.container = "${params.repository}/caw:${params.tag}" + $RunSingleStrelka.container = "${params.repository}/caw:${params.tag}" + $RunSnpeff.container = {params.genome == "GRCh38" ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"} + $RunStrelka.container = "${params.repository}/caw:${params.tag}" + $RunVEP.container = {params.genome == "GRCh38" ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"} +} diff --git a/configuration/docker.config b/configuration/docker.config index 368e6132fd..69711042ad 100644 --- a/configuration/docker.config +++ b/configuration/docker.config @@ -14,36 +14,5 @@ docker { fixOwnership = true } -process { - $BuildBWAindexes.container = 'maxulysse/caw:1.2.1' - $BuildPicardIndex.container = 'maxulysse/picard:1.2.1' - $BuildSAMToolsIndex.container = 'maxulysse/caw:1.2.1' - $BuildVCFIndex.container = 'maxulysse/igvtools:1.2.1' - $ConcatVCF.container = 'maxulysse/caw:1.2.1' - $CreateRecalibrationTable.container = 'maxulysse/gatk:1.2.1' - $IndelRealigner.container = 'maxulysse/gatk:1.2.1' - $MapReads.container = 'maxulysse/caw:1.2.1' - $MarkDuplicates.container = 'maxulysse/picard:1.2.1' - $MergeBams.container = 'maxulysse/caw:1.2.1' - $RealignerTargetCreator.container = 'maxulysse/gatk:1.2.1' - $RecalibrateBam.container = 'maxulysse/gatk:1.2.1' - $RunAlleleCount.container = 'maxulysse/runallelecount:1.2.1' - $RunAscat.container = 'maxulysse/r-base:1.2.1' - $RunBamQC.container = 'maxulysse/qualimap:1.2.1' - $RunBcftoolsStats.container = 'maxulysse/caw:1.2.1' - $RunConvertAlleleCounts.container = 'maxulysse/r-base:1.2.1' - $RunFastQC.container = 'maxulysse/fastqc:1.2.1' - $RunFreeBayes.container = 'maxulysse/freebayes:1.2.1' - $RunGenotypeGVCFs.container = 'maxulysse/gatk:1.2.1' - $RunHaplotypecaller.container = 'maxulysse/gatk:1.2.1' - $RunManta.container = 'maxulysse/caw:1.2.1' - $RunMultiQC.container = 'maxulysse/multiqc:1.2.1' - $RunMutect1.container = 'maxulysse/mutect1:1.2.1' - $RunMutect2.container = 'maxulysse/gatk:1.2.1' - $RunSamtoolsStats.container = 'maxulysse/caw:1.2.1' - $RunSingleManta.container = 'maxulysse/caw:1.2.1' - $RunSingleStrelka.container = 'maxulysse/caw:1.2.1' - $RunSnpeff.container = {params.genome == "GRCh38" ? 'maxulysse/snpeffgrch38:1.2.1' : 'maxulysse/snpeffgrch37:1.2.1'} - $RunStrelka.container = 'maxulysse/caw:1.2.1' - $RunVEP.container = {params.genome == "GRCh38" ? 'maxulysse/vepgrch38:1.2.1' : 'maxulysse/vepgrch37:1.2.1'} -} +params.repository='maxulysse' +params.tag='1.2.3' diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index d1b05de5e0..4fb21e0ff2 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -12,39 +12,43 @@ vim: syntax=groovy singularity { enabled = true + runOptions = "--bind /scratch" } +params.containerPath='containers' +params.tag='1.2.3' + process { - $BuildBWAindexes.container = 'containers/caw-1.2.1.img' - $BuildPicardIndex.container = 'containers/picard-1.2.1.img' - $BuildSAMToolsIndex.container = 'containers/caw-1.2.1.img' - $BuildVCFIndex.container = 'containers/igvtools-1.2.1.img' - $ConcatVCF.container = 'containers/caw-1.2.1.img' - $CreateRecalibrationTable.container = 'containers/gatk-1.2.1.img' - $GenerateMultiQCconfig.container = 'containers/multiqc-1.2.1.img' - $IndelRealigner.container = 'containers/gatk-1.2.1.img' - $MapReads.container = 'containers/caw-1.2.1.img' - $MarkDuplicates.container = 'containers/picard-1.2.1.img' - $MergeBams.container = 'containers/caw-1.2.1.img' - $RealignerTargetCreator.container = 'containers/gatk-1.2.1.img' - $RecalibrateBam.container = 'containers/gatk-1.2.1.img' - $RunAlleleCount.container = 'containers/runallelecount-1.2.1.img' - $RunAscat.container = 'containers/r-base-1.2.1.img' - $RunBamQC.container = 'containers/qualimap-1.2.1.img' - $RunBcftoolsStats.container = 'containers/caw-1.2.1.img' - $RunConvertAlleleCounts.container = 'containers/r-base-1.2.1.img' - $RunFastQC.container = 'containers/fastqc-1.2.1.img' - $RunFreeBayes.container = 'containers/freebayes-1.2.1.img' - $RunGenotypeGVCFs.container = 'containers/gatk-1.2.1.img' - $RunHaplotypecaller.container = 'containers/gatk-1.2.1.img' - $RunManta.container = 'containers/caw-1.2.1.img' - $RunMultiQC.container = 'containers/multiqc-1.2.1.img' - $RunMutect1.container = 'containers/mutect1-1.2.1.img' - $RunMutect2.container = 'containers/gatk-1.2.1.img' - $RunSamtoolsStats.container = 'containers/caw-1.2.1.img' - $RunSingleManta.container = 'containers/caw-1.2.1.img' - $RunSingleStrelka.container = 'containers/caw-1.2.1.img' - $RunSnpeff.container = {params.genome == "GRCh38" ? 'containers/snpeffgrch38-1.2.1.img' : 'containers/snpeffgrch37-1.2.1.img'} - $RunStrelka.container = 'containers/caw-1.2.1.img' - $RunVEP.container = {params.genome == "GRCh38" ? 'containers/vepgrch38-1.2.1.img' : 'containers/vepgrch37-1.2.1.img'} + $BuildBWAindexes.container = "${params.containerPath}/caw-${params.tag}.img" + $BuildPicardIndex.container = "${params.containerPath}/picard-${params.tag}.img" + $BuildSAMToolsIndex.container = "${params.containerPath}/caw-${params.tag}.img" + $BuildVCFIndex.container = "${params.containerPath}/igvtools-${params.tag}.img" + $ConcatVCF.container = "${params.containerPath}/caw-${params.tag}.img" + $CreateRecalibrationTable.container = "${params.containerPath}/gatk-${params.tag}.img" + $GenerateMultiQCconfig.container = "${params.containerPath}/multiqc-${params.tag}.img" + $IndelRealigner.container = "${params.containerPath}/gatk-${params.tag}.img" + $MapReads.container = "${params.containerPath}/caw-${params.tag}.img" + $MarkDuplicates.container = "${params.containerPath}/picard-${params.tag}.img" + $MergeBams.container = "${params.containerPath}/caw-${params.tag}.img" + $RealignerTargetCreator.container = "${params.containerPath}/gatk-${params.tag}.img" + $RecalibrateBam.container = "${params.containerPath}/gatk-${params.tag}.img" + $RunAlleleCount.container = "${params.containerPath}/runallelecount-${params.tag}.img" + $RunAscat.container = "${params.containerPath}/r-params.base-${params.tag}.img" + $RunBamQC.container = "${params.containerPath}/qualimap-${params.tag}.img" + $RunBcftoolsStats.container = "${params.containerPath}/caw-${params.tag}.img" + $RunConvertAlleleCounts.container = "${params.containerPath}/r-params.base-${params.tag}.img" + $RunFastQC.container = "${params.containerPath}/fastqc-${params.tag}.img" + $RunFreeBayes.container = "${params.containerPath}/freebayes-${params.tag}.img" + $RunGenotypeGVCFs.container = "${params.containerPath}/gatk-${params.tag}.img" + $RunHaplotypecaller.container = "${params.containerPath}/gatk-${params.tag}.img" + $RunManta.container = "${params.containerPath}/caw-${params.tag}.img" + $RunMultiQC.container = "${params.containerPath}/multiqc-${params.tag}.img" + $RunMutect1.container = "${params.containerPath}/mutect1-${params.tag}.img" + $RunMutect2.container = "${params.containerPath}/gatk-${params.tag}.img" + $RunSamtoolsStats.container = "${params.containerPath}/caw-${params.tag}.img" + $RunSingleManta.container = "${params.containerPath}/caw-${params.tag}.img" + $RunSingleStrelka.container = "${params.containerPath}/caw-${params.tag}.img" + $RunSnpeff.container = {params.genome == "GRCh38" ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"} + $RunStrelka.container = "${params.containerPath}/caw-${params.tag}.img" + $RunVEP.container = {params.genome == "GRCh38" ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"} } diff --git a/configuration/singularity.config b/configuration/singularity.config index 496d8fb7c9..198b45a9e2 100644 --- a/configuration/singularity.config +++ b/configuration/singularity.config @@ -13,36 +13,5 @@ singularity { enabled = true } -process { - $BuildBWAindexes.container = 'docker://maxulysse/caw:1.2.1' - $BuildPicardIndex.container = 'docker://maxulysse/picard:1.2.1' - $BuildSAMToolsIndex.container = 'docker://maxulysse/caw:1.2.1' - $BuildVCFIndex.container = 'docker://maxulysse/igvtools:1.2.1' - $ConcatVCF.container = 'docker://maxulysse/caw:1.2.1' - $CreateRecalibrationTable.container = 'docker://maxulysse/gatk:1.2.1' - $IndelRealigner.container = 'docker://maxulysse/gatk:1.2.1' - $MapReads.container = 'docker://maxulysse/caw:1.2.1' - $MarkDuplicates.container = 'docker://maxulysse/picard:1.2.1' - $MergeBams.container = 'docker://maxulysse/caw:1.2.1' - $RealignerTargetCreator.container = 'docker://maxulysse/gatk:1.2.1' - $RecalibrateBam.container = 'docker://maxulysse/gatk:1.2.1' - $RunAlleleCount.container = 'docker://maxulysse/runallelecount:1.2.1' - $RunAscat.container = 'docker://maxulysse/r-base:1.2.1' - $RunBamQC.container = 'docker://maxulysse/qualimap:1.2.1' - $RunBcftoolsStats.container = 'docker://maxulysse/caw:1.2.1' - $RunConvertAlleleCounts.container = 'docker://maxulysse/r-base:1.2.1' - $RunFastQC.container = 'docker://maxulysse/fastqc:1.2.1' - $RunFreeBayes.container = 'docker://maxulysse/freebayes:1.2.1' - $RunGenotypeGVCFs.container = 'docker://maxulysse/gatk:1.2.1' - $RunHaplotypecaller.container = 'docker://maxulysse/gatk:1.2.1' - $RunManta.container = 'docker://maxulysse/caw:1.2.1' - $RunMultiQC.container = 'docker://maxulysse/multiqc:1.2.1' - $RunMutect1.container = 'docker://maxulysse/mutect1:1.2.1' - $RunMutect2.container = 'docker://maxulysse/gatk:1.2.1' - $RunSamtoolsStats.container = 'docker://maxulysse/caw:1.2.1' - $RunSingleManta.container = 'docker://maxulysse/caw:1.2.1' - $RunSingleStrelka.container = 'docker://maxulysse/caw:1.2.1' - $RunSnpeff.container = {params.genome == "GRCh38" ? 'docker://maxulysse/snpeffgrch38:1.2.1' : 'docker://maxulysse/snpeffgrch37:1.2.1'} - $RunStrelka.container = 'docker://maxulysse/caw:1.2.1' - $RunVEP.container = {params.genome == "GRCh38" ? 'docker://maxulysse/vepgrch38:1.2.1' : 'docker://maxulysse/vepgrch37:1.2.1'} -} +params.repository='docker://maxulysse' +params.tag='1.2.3' diff --git a/configuration/uppmax-localhost.config b/configuration/uppmax-localhost.config index d18f507b6e..94df4eb7d3 100644 --- a/configuration/uppmax-localhost.config +++ b/configuration/uppmax-localhost.config @@ -18,6 +18,11 @@ params { totalMemory = 104.GB // change to 240 on irma } +executor { + name = 'local' + cpus = 16 +} + process { // Default process resources diff --git a/containers/fastqc/Dockerfile b/containers/fastqc/Dockerfile index ad746292fd..65e2fa37b4 100644 --- a/containers/fastqc/Dockerfile +++ b/containers/fastqc/Dockerfile @@ -8,10 +8,8 @@ LABEL \ # Install libraries RUN \ apt-get update && apt-get install -y --no-install-recommends \ - cpanminus \ wget \ - && rm -rf /var/lib/apt/lists/* \ - && cpanm FindBin + && rm -rf /var/lib/apt/lists/* # Setup ENV variables ENV FASTQC_VERSION=0.11.5 diff --git a/containers/freebayes/Dockerfile b/containers/freebayes/Dockerfile index 5f4ac0be36..afe2bf0313 100644 --- a/containers/freebayes/Dockerfile +++ b/containers/freebayes/Dockerfile @@ -17,7 +17,7 @@ RUN \ && rm -rf /var/lib/apt/lists/* # Setup ENV variables -ENV FREEBAYES_VERSION="1.1.0" +ENV FREEBAYES_VERSION=1.1.0 # Install BCFTools RUN \ diff --git a/containers/gatk/Dockerfile b/containers/gatk/Dockerfile index 97864cd0cf..1cdfaf5dc2 100644 --- a/containers/gatk/Dockerfile +++ b/containers/gatk/Dockerfile @@ -5,8 +5,7 @@ LABEL \ description="GATK image for use in CAW" \ maintainer="maxime.garcia@scilifelab.se" -ENV \ -GATK_HOME=/usr +ENV GATK_HOME=/usr # Create UPPMAX directories RUN mkdir /pica /proj /scratch /sw diff --git a/containers/mutect1/Dockerfile b/containers/mutect1/Dockerfile index ae0f3ca4dd..7c0d2a3f6e 100644 --- a/containers/mutect1/Dockerfile +++ b/containers/mutect1/Dockerfile @@ -8,20 +8,20 @@ LABEL \ # Install libraries RUN \ apt-get update && apt-get install -y --no-install-recommends \ - wget \ + wget \ && rm -rf /var/lib/apt/lists/* # Setup ENV variables ENV \ - MUTECT_HOME="/opt/mutect-1.1.5" \ - MUTECT_VERSION="1.1.5" + MUTECT_HOME=/opt/mutect-1.1.5 \ + MUTECT_VERSION=1.1.5 # Install MuTect1 RUN \ wget --quiet -O muTect-${MUTECT_VERSION}-bin.zip \ https://github.com/broadinstitute/mutect/releases/download/${MUTECT_VERSION}/muTect-${MUTECT_VERSION}-bin.zip \ - && unzip muTect-${MUTECT_VERSION}-bin.zip -d $MUTECT_HOME \ + && unzip muTect-${MUTECT_VERSION}-bin.zip -d ${MUTECT_HOME} \ && rm muTect-${MUTECT_VERSION}-bin.zip \ -&& mv $MUTECT_HOME/muTect-$MUTECT_VERSION.jar $MUTECT_HOME/muTect.jar +&& mv ${MUTECT_HOME}/muTect-${MUTECT_VERSION}.jar ${MUTECT_HOME}/muTect.jar # Create UPPMAX directories RUN mkdir /pica /proj /scratch /sw diff --git a/containers/qualimap/Dockerfile b/containers/qualimap/Dockerfile index 1ed69957b9..cf0021840f 100644 --- a/containers/qualimap/Dockerfile +++ b/containers/qualimap/Dockerfile @@ -8,7 +8,7 @@ LABEL \ # Install libraries RUN \ apt-get update && apt-get install -y --no-install-recommends \ - wget \ + wget \ && rm -rf /var/lib/apt/lists/* # Setup ENV variables diff --git a/containers/runallelecount/Dockerfile b/containers/runallelecount/Dockerfile index e9d45a9506..3b6ff53b0a 100644 --- a/containers/runallelecount/Dockerfile +++ b/containers/runallelecount/Dockerfile @@ -14,14 +14,15 @@ ENV \ # Install libraries RUN \ apt-get update && apt-get install -y --no-install-recommends \ - build-essential \ - ca-certificates \ - libbz2-dev \ - liblzma-dev \ - libncurses5-dev \ - libncursesw5-dev \ - wget \ - zlib1g-dev + build-essential \ + ca-certificates \ + libbz2-dev \ + liblzma-dev \ + libncurses5-dev \ + libncursesw5-dev \ + wget \ + zlib1g-dev \ + && rm -rf /var/lib/apt/lists/* # Install alleleCount RUN \ diff --git a/containers/snpeff/Dockerfile b/containers/snpeff/Dockerfile index 3ab0c6d197..8d47a14090 100644 --- a/containers/snpeff/Dockerfile +++ b/containers/snpeff/Dockerfile @@ -18,15 +18,15 @@ RUN \ # Setup ENV variables ENV \ - SNPEFF_BIN="snpEff_v4_3r_core.zip" \ - SNPEFF_HOME="/opt/snpEff" + SNPEFF_VERSION=4_3r \ + SNPEFF_HOME=/opt/snpEff # Install snpEff RUN \ - wget --quiet -O $SNPEFF_BIN \ - http://downloads.sourceforge.net/project/snpeff/$SNPEFF_BIN \ - && unzip $SNPEFF_BIN -d /opt/ \ - && rm $SNPEFF_BIN + wget --quiet -O snpEff_v${SNPEFF_VERSION}_core.zip \ + http://downloads.sourceforge.net/project/snpeff/snpEff_v${SNPEFF_VERSION}_core.zip \ + && unzip snpEff_v${SNPEFF_VERSION}_core.zip -d /opt/ \ + && rm snpEff_v${SNPEFF_VERSION}_core.zip # Create UPPMAX directories RUN mkdir /pica /proj /scratch /sw diff --git a/containers/vepgrch37/Dockerfile b/containers/vepgrch37/Dockerfile index 906fb8e21c..9ba17e873c 100644 --- a/containers/vepgrch37/Dockerfile +++ b/containers/vepgrch37/Dockerfile @@ -1,4 +1,4 @@ -FROM willmclaren/ensembl-vep:release_90.5 +FROM willmclaren/ensembl-vep:release_90.6 LABEL \ author="Maxime Garcia" \ @@ -22,4 +22,3 @@ RUN \ # Create UPPMAX directories USER root RUN mkdir /pica /proj /scratch /sw -USER vep diff --git a/containers/vepgrch38/Dockerfile b/containers/vepgrch38/Dockerfile index 238e17d4eb..2ae9170e85 100644 --- a/containers/vepgrch38/Dockerfile +++ b/containers/vepgrch38/Dockerfile @@ -1,4 +1,4 @@ -FROM willmclaren/ensembl-vep:release_90.5 +FROM willmclaren/ensembl-vep:release_90.6 LABEL \ author="Maxime Garcia" \ @@ -22,4 +22,3 @@ RUN \ # Create UPPMAX directories USER root RUN mkdir /pica /proj /scratch /sw -USER vep diff --git a/data/tiny/tiny/normal/tiny_normal_L001_R1_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L001_R1_TEST.fastq.gz new file mode 100644 index 0000000000..b25487aa91 Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L001_R1_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L001_R2_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L001_R2_TEST.fastq.gz new file mode 100644 index 0000000000..b917c80cef Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L001_R2_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L002_R1_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L002_R1_TEST.fastq.gz new file mode 100644 index 0000000000..509295fdf7 Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L002_R1_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L002_R2_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L002_R2_TEST.fastq.gz new file mode 100644 index 0000000000..ae0e1e6ee7 Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L002_R2_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L004_R1_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L004_R1_TEST.fastq.gz new file mode 100644 index 0000000000..29e4a3bd15 Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L004_R1_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L004_R2_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L004_R2_TEST.fastq.gz new file mode 100644 index 0000000000..f645870a8f Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L004_R2_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L007_R1_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L007_R1_TEST.fastq.gz new file mode 100644 index 0000000000..8e3c3d295a Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L007_R1_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L007_R2_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L007_R2_TEST.fastq.gz new file mode 100644 index 0000000000..d03e2369c0 Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L007_R2_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L008_R1_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L008_R1_TEST.fastq.gz new file mode 100644 index 0000000000..ec28edfb5d Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L008_R1_TEST.fastq.gz differ diff --git a/data/tiny/tiny/normal/tiny_normal_L008_R2_TEST.fastq.gz b/data/tiny/tiny/normal/tiny_normal_L008_R2_TEST.fastq.gz new file mode 100644 index 0000000000..f02dc49a05 Binary files /dev/null and b/data/tiny/tiny/normal/tiny_normal_L008_R2_TEST.fastq.gz differ diff --git a/doc/BUILD.md b/doc/BUILD.md index 30d75b1f69..22a6638498 100644 --- a/doc/BUILD.md +++ b/doc/BUILD.md @@ -8,11 +8,12 @@ All the containers have built in UPPMAX directories, so there is no need to add ## Usage ```bash -nextflow run . [--docker] [--singularity] [--singularityPublishDir ] [--push] [--containers ] [--repository ] [--tag tag] +nextflow run . [--docker] [--singularity] [--containerPath ] [--push] [--containers ] [--repository ] [--tag tag] ``` - `--containers`: Choose which containers to build. Default: `all`. Possible values (to separate by commas): - `all` - Build all available containers. + - `caw` - `fastqc` - `freebayes` - `gatk` @@ -21,13 +22,11 @@ nextflow run . [--docker] [--singularity] [--singularityPublishDir ] [--pu - `mutect1` - `picard` - `qualimap` + - `r-base` - `runallelecount` - - `runascat` - - `runconvertallelecounts` - `snpeff` this container serves as a base for `snpeffgrch37` and `snpeffgrch38` - `snpeffgrch37` - `snpeffgrch38` - - `vep` this container serves as a base for `vepgrch37` and `vepgrch38` - `vepgrch37` - `vepgrch38` @@ -35,7 +34,7 @@ nextflow run . [--docker] [--singularity] [--singularityPublishDir ] [--pu - `--push`: Push containers to `DockerHub` - `--repository`: Build containers under given repository. Default: `maxulysse` - `--singularity`: Build containers using `Singularity`. -- `--singularityPublishDir`: Select where to download containers. Default: `$PWD` +- `--containerPath`: Select where to download containers. Default: `$PWD` - `--tag`: Build containers using given tag. Default is version number. ## Example diff --git a/doc/CONFIG.md b/doc/CONFIG.md index 4f44acad20..a8830a7a0f 100644 --- a/doc/CONFIG.md +++ b/doc/CONFIG.md @@ -10,6 +10,12 @@ We provides several configuration files and profiles for CAW. The standard ones Every configuration file can be modified for your own use. If you want you can specify the use of a config file using `-c ` +### [`containers.config`](../configuration/containers.config) + +Contain images for all process. +Images will be pulled automatically. +Use in your own profile if needed. + ### [`docker.config`](../configuration/docker.config) Contain Docker images for all process. diff --git a/doc/CONTAINERS.md b/doc/CONTAINERS.md index 5c58c6196e..ff7943f63c 100644 --- a/doc/CONTAINERS.md +++ b/doc/CONTAINERS.md @@ -1,5 +1,29 @@ # Containers +Subsets of all containers can be dowloaded: + +For normal-only processing + Reports + HaploTypeCaller, Manta and Strelka: + - [caw](#caw-) + - [fastqc](#fastqc-) + - [gatk](#gatk-) + - [multiqc](#multiqc-) + - [picard](#picard-) + - [qualimap](#qualimap-) + +For the rest of the variant callers, you will need also: + - [freebayes](#freebayes-) + - [mutect1](#mutect1-) + - [r-base](#r-base-) + - [runallelecount](#runallelecount-) + +For annotation for GRCh37, you will need: + - [snpeffgrch37](#snpeffgrch37-) + - [vepgrch37](#vepgrch37-) + +For annotation for GRCh38, you will need: + - [snpeffgrch38](#snpeffgrch38-) + - [vepgrch38](#vepgrch38-) + A container named after the process is made for each process. If a container can be reused, it will be named after the tool used. ## caw [![caw-docker status][caw-docker-badge]][caw-docker-link] @@ -85,13 +109,13 @@ A container named after the process is made for each process. If a container can ## vepgrch37 [![vepgrch37-docker status][vepgrch37-docker-badge]][vepgrch37-docker-link] -- Based on `willmclaren/ensembl-vep:release_90.5` +- Based on `willmclaren/ensembl-vep:release_90.6` - Contain **[VEP][vep-link]** 90.5 - Contain GRCh37 ## vepgrch38 [![vepgrch38-docker status][vepgrch38-docker-badge]][vepgrch38-docker-link] -- Based on `willmclaren/ensembl-vep:release_90.5` +- Based on `willmclaren/ensembl-vep:release_90.6` - Contain **[VEP][vep-link]** 90.5 - Contain GRCh38 diff --git a/doc/USAGE.md b/doc/USAGE.md index bc7cd80c76..316693ec04 100644 --- a/doc/USAGE.md +++ b/doc/USAGE.md @@ -86,9 +86,47 @@ Display more information about files being processed. Display version number and information. +## Containers + +### --containerPath `Path to the singularity containers (default=containers/)` + +### --repository `Docker-hub repository (default=maxulysse)` + +### --tag `tag of the containers to use (default=current version)` + +## References + +If needed, you can specify each reference file by command line. + +### --acLoci `acLoci file` + +### --bwaIndex `bwaIndex file` + +### --cosmic `cosmic file` + +### --cosmicIndex `cosmicIndex file` + +### --dbsnp `dbsnp file` + +### --dbsnpIndex `dbsnpIndex file` + +### --genomeDict `genomeDict file` + +### --genomeFile `genomeFile file` + +### --genomeIndex `genomeIndex file` + +### --intervals `intervals file` + +### --knownIndels `knownIndels file` + +### --knownIndelsIndex `knownIndelsIndex file` + +### --snpeffDb `snpeffDb file` + ## Parameters -Simpler to specify in the config file. +Simpler to specify in the configuration files, but it's still possible to specify every thing in the command line. ### --runTime `time` diff --git a/main.nf b/main.nf index 385129008d..cf4a002bda 100644 --- a/main.nf +++ b/main.nf @@ -41,7 +41,6 @@ kate: syntax groovy; space-indent on; indent-width 2; - CreateIntervalBeds - Create and sort intervals into bed files - RunHaplotypecaller - Run HaplotypeCaller for Germline Variant Calling (Parallelized processes) - RunGenotypeGVCFs - Run HaplotypeCaller for Germline Variant Calling (Parallelized processes) - - RunBcftoolsStats - Run BCFTools stats on vcf before annotation - RunMutect1 - Run MuTect1 for Variant Calling (Parallelized processes) - RunMutect2 - Run MuTect2 for Variant Calling (Parallelized processes) - RunFreeBayes - Run FreeBayes for Variant Calling (Parallelized processes) @@ -53,9 +52,9 @@ kate: syntax groovy; space-indent on; indent-width 2; - RunAlleleCount - Run AlleleCount to prepare for ASCAT - RunConvertAlleleCounts - Run convertAlleleCounts to prepare for ASCAT - RunAscat - Run ASCAT for CNV + - RunBcftoolsStats - Run BCFTools stats on vcf before annotation - RunSnpeff - Run snpEff for annotation of vcf files - RunVEP - Run VEP for annotation of vcf files - - RunBcftoolsStats - Run BCFTools stats on vcf files - GenerateMultiQCconfig - Generate a config file for MultiQC - RunMultiQC - Run MultiQC for report and QC ================================================================================ @@ -63,7 +62,7 @@ kate: syntax groovy; space-indent on; indent-width 2; ================================================================================ */ -version = '1.2.2' +version = '1.2.3' // Check that Nextflow version is up to date enough // try / throw / catch works for NF versions < 0.25 when this was implemented @@ -73,13 +72,39 @@ try { throw GroovyException('Nextflow version too old') } } catch (all) { - log.error "====================================================\n" + + log.error "============================================================\n" + " Nextflow version $nf_required_version required! You are running v$workflow.nextflow.version.\n" + " Pipeline execution will continue, but things may break.\n" + " Please update Nextflow.\n" + "============================================================" } +// Default params: +// Such params are overridden by command line or configuration definitions + +// No tools to annotate +params.annotateTools = '' +// No vcf to annotare +params.annotateVCF = '' +// For MultiQC reports +params.callName = '' +// For MultiQC reports +params.contactMail = '' +// GVCF are generated +params.noGVCF = false +// Reports are generated +params.noReports = false +// No sample is defined +params.sample = '' +// No sampleDir is defined +params.sampleDir = '' +// Step is mapping +params.step = 'mapping' +// Not testing +params.test = '' +// No tools to be used +params.tools = '' + if (params.help) exit 0, helpMessage() if (params.version) exit 0, versionMessage() if (!isAllowedParams(params)) exit 1, "params is unknown, see --help for more information" @@ -124,9 +149,9 @@ else explicitBqsrNeeded = tools.intersect(['manta', 'mutect1', 'mutect2', 'vardi tsvPath = '' if (params.sample) tsvPath = params.sample +// No need for tsv file for step annotate if (!params.sample && !params.sampleDir) { tsvPaths = [ - 'annotate': "$workflow.launchDir/$directoryMap.recalibrated/recalibrated.tsv", 'mapping': "$workflow.projectDir/data/tsv/tiny.tsv", 'realign': "$workflow.launchDir/$directoryMap.nonRealigned/nonRealigned.tsv", 'recalibrate': "$workflow.launchDir/$directoryMap.nonRecalibrated/nonRecalibrated.tsv", @@ -136,12 +161,12 @@ if (!params.sample && !params.sampleDir) { } // Set up the fastqFiles and bamFiles channels. One of them remains empty +// Except for step annotate, in which both stay empty fastqFiles = Channel.empty() bamFiles = Channel.empty() if (tsvPath) { tsvFile = file(tsvPath) switch (step) { - case 'annotate': bamFiles = extractBams(tsvFile); break case 'mapping': fastqFiles = extractFastq(tsvFile); break case 'realign': bamFiles = extractBams(tsvFile); break case 'recalibrate': bamFiles = extractRecal(tsvFile); break @@ -152,7 +177,7 @@ if (tsvPath) { if (step != 'mapping') exit 1, '--sampleDir does not support steps other than "mapping"' fastqFiles = extractFastqFromDir(params.sampleDir) tsvFile = params.sampleDir // used in the reports -} else exit 1, 'No sample were defined, see --help' +} else if (step != 'annotate') exit 1, 'No sample were defined, see --help' if (step == 'mapping') { (patientGenders, fastqFiles) = extractGenders(fastqFiles) @@ -633,7 +658,11 @@ process RunBamQC { script: """ - qualimap --java-mem-size=${task.memory.toGiga()}G bamqc -bam $bam -outdir $idSample -outformat HTML + qualimap --java-mem-size=${task.memory.toGiga()}G \ + bamqc \ + -bam $bam \ + -outdir $idSample \ + -outformat HTML """ } @@ -1383,7 +1412,7 @@ if (step == 'annotate' && annotateVCF == []) { vcfNotToAnnotate.close() -(vcfForBCF, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(3) +(vcfForBCFtools, vcfForSnpeff, vcfForVep) = vcfToAnnotate.into(3) process RunBcftoolsStats { tag {vcf} @@ -1391,7 +1420,7 @@ process RunBcftoolsStats { publishDir directoryMap.bcftoolsStats, mode: 'copy' input: - set variantCaller, file(vcf) from vcfForBCF + set variantCaller, file(vcf) from vcfForBCFtools output: file ("${vcf.baseName}.bcf.tools.stats.out") into bcfReport @@ -1419,7 +1448,7 @@ process RunSnpeff { val snpeffDb from Channel.value(params.genomes[params.genome].snpeffDb) output: - set file("${vcf.baseName}.ann.vcf"), file("${vcf.baseName}_snpEff_genes.txt"), file("${vcf.baseName}_snpEff.csv"), file("${vcf.baseName}_snpEff_summary.html") into snpeffReport + set file("${vcf.baseName}.snpEff.ann.vcf"), file("${vcf.baseName}.snpEff.genes.txt"), file("${vcf.baseName}.snpEff.csv"), file("${vcf.baseName}.snpEff.summary.html") into snpeffReport when: 'snpeff' in tools @@ -1428,13 +1457,14 @@ process RunSnpeff { java -Xmx${task.memory.toGiga()}g \ -jar \$SNPEFF_HOME/snpEff.jar \ $snpeffDb \ - -csvStats ${vcf.baseName}_snpEff.csv \ - -v -cancer \ + -csvStats ${vcf.baseName}.snpEff.csv \ + -nodownload \ + -cancer \ + -v \ ${vcf} \ - > ${vcf.baseName}.ann.vcf + > ${vcf.baseName}.snpEff.ann.vcf - mv snpEff_summary.html ${vcf.baseName}_snpEff_summary.html - mv ${vcf.baseName}_snpEff.genes.txt ${vcf.baseName}_snpEff_genes.txt + mv snpEff_summary.html ${vcf.baseName}.snpEff.summary.html """ } @@ -1452,45 +1482,25 @@ process RunVEP { set variantCaller, file(vcf) from vcfForVep output: - set file("${vcf.baseName}.ann.vcf"), file("${vcf.baseName}*summary*") into vepReport + set file("${vcf.baseName}.vep.ann.vcf"), file("${vcf.baseName}.vep.summary.html") into vepReport when: 'vep' in tools script: genome = params.genome == 'smallGRCh37' ? 'GRCh37' : params.genome - if (!workflow.container.isEmpty()) // test whether running in docker """ vep \ -i $vcf \ + -o ${vcf.baseName}.vep.ann.vcf \ + --stats_file ${vcf.baseName}.vep.summary.html \ + --cache \ + --everything \ + --filter_common \ --format vcf \ - --sift b \ - --polyphen b \ - --symbol \ - --numbers \ - --biotype \ - --total_length \ - -o ${vcf.baseName}.ann.vcf \ - --vcf \ - -offline \ - --fields Consequence,Codons,Amino_acids,Gene,SYMBOL,Feature,EXON,PolyPhen,SIFT,Protein_position,BIOTYPE - """ - else - """ - variant_effect_predictor.pl \ - -i $vcf \ - --vcf \ - --format vcf \ - --sift b \ - --polyphen b \ - --symbol \ - --numbers \ - --biotype \ + --offline \ + --pick \ --total_length \ - -o ${vcf.baseName}.ann.vcf \ - --cache --dir_cache /sw/data/uppnex/vep/89 \ - --assembly $genome \ - --fields Consequence,Codons,Amino_acids,Gene,SYMBOL,Feature,EXON,PolyPhen,SIFT,Protein_position,BIOTYPE \ - -offline + --vcf """ } @@ -1510,7 +1520,9 @@ process GenerateMultiQCconfig { when: reports script: - annotateString = annotateTools ? "- Annotate on : ${annotateTools.join(", ")}" : '' + annotateToolString = annotateTools ? "- Annotate on : ${annotateTools.join(", ")}" : '' + annotateVCFstring = annotateVCF ? "- Annotate on : ${annotateVCF.join(", ")}" : '' + tsvString = step != 'annotate' ? "- TSV file: ${tsvFile}" : '' """ touch multiqc_config.yaml echo "custom_logo: $baseDir/doc/images/CAW_logo.png" >> multiqc_config.yaml @@ -1518,14 +1530,16 @@ process GenerateMultiQCconfig { echo "custom_logo_title: 'Cancer Analysis Workflow'" >> multiqc_config.yaml echo "report_header_info:" >> multiqc_config.yaml echo "- CAW version: $version" >> multiqc_config.yaml + echo "- Contact Name: ${params.callName}" >> multiqc_config.yaml echo "- Contact E-mail: ${params.contactMail}" >> multiqc_config.yaml echo "- Command Line: ${workflow.commandLine}" >> multiqc_config.yaml echo "- Directory: ${workflow.launchDir}" >> multiqc_config.yaml - echo "- TSV file: ${tsvFile}" >> multiqc_config.yaml - echo "- Genome: "${params.genome} >> multiqc_config.yaml + echo ${tsvString} >> multiqc_config.yaml + echo "- Genome: "${params.genome} >> multiqc_config.yaml echo "- Step: "${step} >> multiqc_config.yaml echo "- Tools: "${tools.join(", ")} >> multiqc_config.yaml - echo ${annotateString} >> multiqc_config.yaml + echo ${annotateToolString} >> multiqc_config.yaml + echo ${annotateVCFstring} >> multiqc_config.yaml echo " acLoci : $referenceMap.acLoci" >> multiqc_config.yaml echo " bwaIndex : "${referenceMap.bwaIndex.join(", ")} >> multiqc_config.yaml echo " cosmic : $referenceMap.cosmic" >> multiqc_config.yaml @@ -1644,6 +1658,8 @@ def checkParams(it) { 'callName', 'contact-mail', 'contactMail', + 'container-path', + 'containerPath', 'containers', 'cosmic-index', 'cosmic', @@ -1679,9 +1695,7 @@ def checkParams(it) { 'sampleDir', 'single-CPUMem', 'singleCPUMem', - 'singularity-publish-dir', 'singularity', - 'singularityPublishDir', 'step', 'tag', 'test', @@ -2046,11 +2060,12 @@ def minimalInformationMessage() { log.info "Project Dir : $workflow.projectDir" log.info "Launch Dir : $workflow.launchDir" log.info "Work Dir : $workflow.workDir" - log.info "TSV file : $tsvFile" + if (step != 'annotate') log.info "TSV file : $tsvFile" log.info "Genome : " + params.genome log.info "Step : " + step - if (tools) {log.info "Tools : " + tools.join(', ')} - if (annotateTools) {log.info "Annotate on : " + annotateTools.join(', ')} + if (tools) log.info "Tools : " + tools.join(', ') + if (annotateTools) log.info "Annotate on : " + annotateTools.join(', ') + if (annotateVCF) log.info "VCF files : " +annotateVCF.join(',\n ') log.info "Reference files used:" log.info " acLoci : $referenceMap.acLoci" log.info " bwaIndex : " + referenceMap.bwaIndex.join(',\n ') diff --git a/nextflow.config b/nextflow.config index 1758f219d0..0144204ccf 100644 --- a/nextflow.config +++ b/nextflow.config @@ -12,28 +12,9 @@ manifest { } params { // default value for params - annotateTools = '' - annotateVCF = '' - build = false - callName = '' // For MultiQC reports - contactMail = '' // For MultiQC reports - containers = 'all' - docker = false genome = 'GRCh38' help = false - noGVCF = false - noReports = false project = '' - push = false - repository = 'maxulysse' - sample = '' - sampleDir = '' - singularity = false - singularityPublishDir = false - step = 'mapping' - tag = false - test = false - tools = '' vcflist ='' verbose = false version = false @@ -72,11 +53,13 @@ profiles { docker { // For small testing testing with Docker includeConfig 'configuration/travis.config' includeConfig 'configuration/docker.config' + includeConfig 'configuration/containers.config' } // Singularity images will be pulled automatically singularity { // For small testing includeConfig 'configuration/travis.config' includeConfig 'configuration/singularity.config' + includeConfig 'configuration/containers.config' } } diff --git a/scripts/do_all.sh b/scripts/do_all.sh index 82a2c5caba..6da2761064 100755 --- a/scripts/do_all.sh +++ b/scripts/do_all.sh @@ -1,38 +1,38 @@ #!/bin/bash set -xeuo pipefail -PROFILE="singularity" -PUSH="" -REPOSITORY="--repository maxulysse" -TAG="1.2.1" -TOOL="docker" +PROFILE=singularity +PUSH='' +REPOSITORY=maxulysse +TAG=1.2.3 +TOOL=docker while [[ $# -gt 0 ]] do - key="$1" + key=$1 case $key in - -r|--repository) - REPOSITORY="--repository $2" - shift # past argument - shift # past value - ;; - -t|--tag) - TAG="--tag $2" + -p|--profile) + PROFILE=$2 shift # past argument shift # past value ;; - -p|--profile) - PROFILE="$2" + --pull) + TOOL=singularity shift # past argument - shift # past value ;; --push) PUSH=--push shift # past argument ;; - --pull) - TOOL=singularity + -r|--repository) + REPOSITORY=$2 shift # past argument + shift # past value + ;; + -t|--tag) + TAG=$2 + shift # past argument + shift # past value ;; *) # unknown option shift # past argument @@ -42,8 +42,8 @@ done if [ $TOOL = docker ] then - nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} ${REPOSITORY} ${TAG} --containers caw,fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,snpeff - nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} ${REPOSITORY} ${TAG} --containers snpeffgrch37,snpeffgrch38,vepgrch37,vepgrch38 + nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers caw,fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,snpeff + nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers snpeffgrch37,snpeffgrch38,vepgrch37,vepgrch38 else - nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity ${REPOSITORY} ${TAG} --singularityPublishDir containers/ --containers caw,fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,snpeffgrch37,snpeffgrch38,vepgrch37,vepgrch38 + nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers caw,fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,snpeffgrch37,snpeffgrch38,vepgrch37,vepgrch38 fi diff --git a/scripts/skeleton_batch.sh b/scripts/skeleton_batch.sh index 8468b1651e..8b03efe15c 100644 --- a/scripts/skeleton_batch.sh +++ b/scripts/skeleton_batch.sh @@ -1,31 +1,99 @@ #!/bin/bash -#SBATCH -A sens2016004 #SBATCH -p node #SBATCH -t 168:00:00 + set -xeuo pipefail -# skeleton script to launch nextflow/singularity jobs with slurm on bianca -# save as whatever, and launch like: -# sbatch -J sample-MuTect2 ./whatever.sh sample.tsv MuTect2 -# -# PARAMETERS: -# $1 is the sample TSV file -# $2 the tool to run -# -# use the default deployed CAW version. "testing" versions should be at /castor/project/proj_nobackup/CAW/testing -CAW=/castor/project/proj_nobackup/CAW/default + +# skeleton script to launch CAW jobs on scratch with sbatch on bianca + +GENOME=GRCh38 +SAMPLE='' +STEP='' +TOOLS=false + +while [[ $# -gt 0 ]] +do + key=$1 + case $key in + -g|--genome) + GENOME=$2 + shift # past argument + shift # past value + ;; + -s|--sample) + SAMPLE=$2 + shift # past argument + shift # past value + ;; + --step) + STEP=$2 + shift # past argument + shift # past value + ;; + -t|--tools) + TOOLS=$2 + shift # past argument + shift # past value + ;; + *) # unknown option + shift # past argument + ;; + esac +done + +# Make a specific prefix for logs (trace + timeline) DATE=`date +%Y-%b-%d-%H%M` -PREFIX=${2}_${DATE} -ln -fs /castor/project/proj_nobackup/CAW/containers containers +if [[ $TOOLS ]] +then + PREFIX=${TOOLS}_${DATE} +else + PREFIX=${DATE} +fi + +# Use the default deployed CAW version. +# Other versions (including testing) are at /castor/project/proj_nobackup/CAW/ +CAW=/castor/project/proj_nobackup/CAW/default -# nextflow specific stuff to save everything on /scratch +# Link containers +ln -fs /castor/project/proj_nobackup/CAW/containers containers +# Configure Nextflow to save everything on /scratch export NXF_TEMP=/scratch export NXF_LAUNCHBASE=/scratch export NXF_WORK=/scratch export NXF_HOME=/castor/project/proj_nobackup/nextflow export PATH=${NXF_HOME}/bin:${PATH} -nextflow run ${CAW}/main.nf --step skipPreprocessing --tools $2 --sample $1 -with-timeline ${PREFIX}.timeline.html -with-trace ${PREFIX}.trace.txt + +# save as whatever, and launch like: +# sbatch -A [project name] -J sample-MuTect2 ./whatever.sh sample.tsv MuTect2 + +function run_caw() { + nextflow run ${CAW}/main.nf $@ -with-timeline ${PREFIX}.timeline.html -with-trace ${PREFIX}.trace.txt +} + +# MAPPING +if [[ $STEP == MAPPING ]] +then + run_caw() --sample ${SAMPLE} --step mapping +fi + +# RUN A SPECIFIC TOOL: +# -s or --sample the sample TSV file +# -t or --tools the tool to run +# Don't forget to specify the right SNIC project +# or add as the second line: +#SBATCH -A [project name] + +if [[ $STEP == VARIANTCALLING ]] +then + run_caw() --sample ${SAMPLE} --step variantcalling --tools ${TOOLS} +fi # for annotation run -# sbatch -J sample-Ann ./whatever.sh result.vcf[.gz] snpEff -# nextflow run ${CAW}/main.nf --step annotate --tools $2 --annotateVCF $1 --sample Preprocessing/Recalibrated/recalibrated.tsv -with-timeline ${PREFIX}.timeline.html -with-trace ${PREFIX}.trace.txt +# sbatch -A [project name] -J sample-Ann ./whatever.sh result.vcf[.gz] snpEff +# nextflow run ${CAW}/main.nf --step annotate --tools $2 --annotateVCF $1 --noReports --sample Preprocessing/Recalibrated/recalibrated.tsv -with-timeline ${PREFIX}.timeline.html -with-trace -with-trace ${PREFIX}.trace.txt + +if [[ $STEP == ANNOTATE ]] +then + run_caw() --step annotate --tools ${TOOLS} --annotateVCF ${SAMPLE} +fi diff --git a/scripts/test.sh b/scripts/test.sh index 44d8cb023b..ddbba94fd7 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -1,39 +1,39 @@ #!/bin/bash set -xeuo pipefail -GENOME="smallGRCh37" -PROFILE="singularity" -SAMPLE="data/tsv/tiny.tsv" -TAG="1.2.1" -TEST="ALL" +GENOME=smallGRCh37 +PROFILE=singularity +SAMPLE=data/tsv/tiny.tsv +TAG=1.2.3 +TEST=ALL TRAVIS=${TRAVIS:-false} while [[ $# -gt 0 ]] do - key="$1" + key=$1 case $key in -g|--genome) - GENOME="$2" + GENOME=$2 shift # past argument shift # past value ;; -p|--profile) - PROFILE="$2" + PROFILE=$2 shift # past argument shift # past value ;; -s|--sample) - SAMPLE="$2" + SAMPLE=$2 shift # past argument shift # past value ;; -t|--test) - TEST="$2" + TEST=$2 shift # past argument shift # past value ;; --tag) - TAG="$2" + TAG=$2 shift # past argument shift # past value ;; @@ -45,84 +45,74 @@ done function nf_test() { echo "$(tput setaf 1)nextflow run $@ -profile $PROFILE --genome $GENOME -resume --verbose$(tput sgr0)" - nextflow run "$@" -profile "$PROFILE" --genome $GENOME -resume --verbose + nextflow run $@ -profile $PROFILE --genome $GENOME -resume --verbose } # Build references only for smallGRCh37 -if [[ "$GENOME" == "smallGRCh37" ]] && [[ "$TEST" != "BUILDCONTAINERS" ]] +if [[ $GENOME == smallGRCh37 ]] && [[ $TEST != BUILDCONTAINERS ]] then nf_test buildReferences.nf --download # Remove images only on TRAVIS - if [[ "$PROFILE" == "docker" ]] && [[ "$TRAVIS" == true ]] + if [[ $PROFILE == docker ]] && [[ $TRAVIS == true ]] then docker rmi -f maxulysse/igvtools:${TAG} - elif [[ "$PROFILE" == singularity ]] && [[ "$TRAVIS" == true ]] + elif [[ $PROFILE == singularity ]] && [[ $TRAVIS == true ]] then rm -rf work/singularity/igvtools-${TAG}.img fi fi -if [[ "$TEST" = "MAPPING" ]] +if [[ ALL,MAPPING,REALIGN,RECALIBRATE =~ $TEST ]] then + nf_test . --step mapping --sampleDir data/tiny/tiny/normal nf_test . --step mapping --sample $SAMPLE fi -if [[ "$TEST" = "REALIGN" ]] || [[ "$TEST" = "ALL" ]] +if [[ ALL,REALIGN =~ $TEST ]] then - nf_test . --step mapping --sample $SAMPLE nf_test . --step realign --noReports nf_test . --step realign --tools HaplotypeCaller nf_test . --step realign --tools HaplotypeCaller --noReports --noGVCF fi -if [[ "$TEST" = "RECALIBRATE" ]] || [[ "$TEST" = "ALL" ]] +if [[ ALL,RECALIBRATE =~ $TEST ]] then - nf_test . --step mapping --sample $SAMPLE nf_test . --step recalibrate --noReports nf_test . --step recalibrate --tools FreeBayes,HaplotypeCaller,MuTect1,MuTect2,Strelka # Test whether restarting from an already recalibrated BAM works nf_test . --step variantCalling --tools Strelka --noReports fi -if [[ "$TEST" = "ANNOTATEVEP" ]] || [[ "$TEST" = "ALL" ]] +if [[ ALL,ANNOTATESNPEFF,ANNOTATEVEP =~ $TEST ]] then nf_test . --step mapping --sample data/tsv/tiny-single-manta.tsv --tools Manta,Strelka nf_test . --step mapping --sample data/tsv/tiny-manta.tsv --tools Manta,Strelka nf_test . --step mapping --sample $SAMPLE --tools MuTect2 # Remove images only on TRAVIS - if [[ "$PROFILE" == "docker" ]] && [[ "$TRAVIS" == true ]] + if [[ $PROFILE == docker ]] && [[ $TRAVIS == true ]] then docker rmi -f maxulysse/caw:${TAG} maxulysse/fastqc:${TAG} maxulysse/gatk:${TAG} maxulysse/picard:${TAG} - elif [[ "$PROFILE" == "singularity" ]] && [[ "$TRAVIS" == true ]] + elif [[ $PROFILE == singularity ]] && [[ $TRAVIS == true ]] then rm -rf work/singularity/caw-${TAG}.img work/singularity/fastqc-${TAG}.img work/singularity/gatk-${TAG}.img work/singularity/picard-${TAG}.img fi - nf_test . --step annotate --tools VEP --annotateTools Manta,Strelka - nf_test . --step annotate --tools VEP --annotateVCF VariantCalling/Manta/Manta_9876T_vs_1234N.diploidSV.vcf.gz,VariantCalling/Manta/Manta_9876T_vs_1234N.somaticSV.vcf.gz --noReports - nf_test . --step annotate --tools VEP --annotateVCF VariantCalling/Manta/Manta_9876T_vs_1234N.diploidSV.vcf.gz --noReports -fi - -if [[ "$TEST" = "ANNOTATESNPEFF" ]] || [[ "$TEST" = "ALL" ]] -then - nf_test . --step mapping --sample data/tsv/tiny-single-manta.tsv --tools Manta,Strelka - nf_test . --step mapping --sample data/tsv/tiny-manta.tsv --tools Manta,Strelka - nf_test . --step mapping --sample $SAMPLE --tools MuTect2 - - # Remove images only on TRAVIS - if [[ "$PROFILE" == "docker" ]] && [[ "$TRAVIS" == true ]] + if [[ $TEST = ANNOTATESNPEFF ]] then - docker rmi -f maxulysse/caw:${TAG} maxulysse/fastqc:${TAG} maxulysse/gatk:${TAG} maxulysse/picard:${TAG} - elif [[ "$PROFILE" == "singularity" ]] && [[ "$TRAVIS" == true ]] + ANNOTATOR=snpEFF + elif [[ $TEST = ANNOTATEVEP ]] then - rm -rf work/singularity/caw-${TAG}.img work/singularity/fastqc-${TAG}.img work/singularity/gatk-${TAG}.img work/singularity/picard-${TAG}.img + ANNOTATOR=VEP + elif [[ $TEST = ALL ]] + then + ANNOTATOR=snpEFF,VEP fi - nf_test . --step annotate --tools snpEff --annotateTools Manta,Strelka - nf_test . --step annotate --tools snpEff --annotateVCF VariantCalling/Manta/Manta_9876T_vs_1234N.diploidSV.vcf.gz,VariantCalling/Manta/Manta_9876T_vs_1234N.somaticSV.vcf.gz --noReports - nf_test . --step annotate --tools snpEff --annotateVCF VariantCalling/Manta/Manta_9876T_vs_1234N.diploidSV.vcf.gz --noReports + nf_test . --step annotate --tools ${ANNOTATOR} --annotateTools Manta,Strelka + nf_test . --step annotate --tools ${ANNOTATOR} --annotateVCF VariantCalling/Manta/Manta_9876T_vs_1234N.diploidSV.vcf.gz,VariantCalling/Manta/Manta_9876T_vs_1234N.somaticSV.vcf.gz --noReports + nf_test . --step annotate --tools ${ANNOTATOR} --annotateVCF VariantCalling/Manta/Manta_9876T_vs_1234N.diploidSV.vcf.gz --noReports fi -if [[ "$TEST" = "BUILDCONTAINERS" ]] || [[ "$TEST" = "ALL" ]] +if [[ ALL,BUILDCONTAINERS =~ $TEST ]] then nf_test buildContainers.nf --docker --containers caw,fastqc,gatk,igvtools,multiqc,mutect1,picard,qualimap,runallelecount,r-base,snpeff fi