From 593ffdcbd19c5542301ea56936e54ec1521bb0ac Mon Sep 17 00:00:00 2001 From: sicHiRsch Date: Fri, 3 Nov 2017 10:20:04 +0800 Subject: [PATCH 1/2] bunch of hacks for control flow The problem was that if one of the two processes (recentLTRs or olderLTRs) does not find any hits, that leads to a bunch of empty files being created. Those empty files then trip up other processes downstream and crash the whole pipeline. This should probably be solved using native nextflow control structures, but I worked around that by creating empty or dummy files wherever necessary. Now at least the pipeline runs through, but I am not sure if or how these dummy files influence other programs in the pipeline. --- main.nf | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/main.nf b/main.nf index f36ce03..10a8e64 100755 --- a/main.nf +++ b/main.nf @@ -194,6 +194,9 @@ process RepeatMasker1 { set age, 'seqfile.outinner.out', 'seqfile.outinner.masked' into repeatMasker1Unclean """ +// only run RepeatMasker if the previous processes have found anything, i.e. there is something in their output file +if [[ `wc -l repeats_to_mask_LTR.fasta | cut -c 1` != "0" ]]; then + RepeatMasker \ -lib repeats_to_mask_LTR.fasta \ -nolow \ @@ -203,6 +206,10 @@ RepeatMasker \ if [ ! -f seqfile.outinner.masked ]; then cp seqfile.outinner seqfile.outinner.masked +fi + +//otherwise create empty dummy files in order for following processes not to crash +else touch seqfile.outinner.masked seqfile.outinner.out; fi """ } @@ -251,6 +258,12 @@ outinner_blastx_parse.pl \ if [ ! -s passed_outinner_sequence.fasta ]; then echo -e '>dummy empty sequence\nACTACTAC' > passed_outinner_sequence.fasta +fi +/*this follows on from above, if 'old' or 'new' did not find anything, a dummy file needs to be created +in order for following processes not to crash*/ + +if [[ `wc -l passed_outinner_sequence.fasta | cut -c 1` == "0" ]]; then + echo -e '>dummy empty sequence\nACTACTAC' > passed_outinner_sequence.fasta fi """ } @@ -273,6 +286,9 @@ process buildExemplars { set age, 'LTR.lib' into exemplars """ +//Only run the scripts if previous processes found something, otherwise don't bother and create a dummy file +if [[ `wc -l CRL_Step3_Passed_Elements.fasta | cut -c 1` != "0" ]]; then + CRL_Step4.pl \ --step3 CRL_Step3_Passed_Elements.fasta \ --resultfile seqfile.result \ @@ -297,6 +313,12 @@ CRL_Step5.pl \ --final LTR.lib \ --pcoverage 90 \ --pidentity 80 + +/*This file needs to have at least one entry, hence the dummy entry, otherwise the pipeline crashes. +I am however not sure if this dummy sequence will influence anything else downstream and it may have +to be dealt with later */ +else echo -e '>dummy empty sequence\nACTACTAC' > LTR.lib; +fi """ } From 20966a1b46c038abed5b0ebb450d1820f51111a0 Mon Sep 17 00:00:00 2001 From: sicHiRsch Date: Wed, 14 Feb 2018 16:39:30 +0800 Subject: [PATCH 2/2] patches to control flow I used the wrong comment characters in my earlier patch. These added dummy files are still necessary in the latest iteration of nf-repeatmasking since the pipeline does crash otherwise. --- main.nf | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/main.nf b/main.nf index 10a8e64..dca2e62 100755 --- a/main.nf +++ b/main.nf @@ -194,7 +194,7 @@ process RepeatMasker1 { set age, 'seqfile.outinner.out', 'seqfile.outinner.masked' into repeatMasker1Unclean """ -// only run RepeatMasker if the previous processes have found anything, i.e. there is something in their output file +# only run RepeatMasker if the previous processes have found anything, i.e. there is something in their output file if [[ `wc -l repeats_to_mask_LTR.fasta | cut -c 1` != "0" ]]; then RepeatMasker \ @@ -208,7 +208,7 @@ if [ ! -f seqfile.outinner.masked ]; then cp seqfile.outinner seqfile.outinner.masked fi -//otherwise create empty dummy files in order for following processes not to crash +#otherwise create empty dummy files in order for following processes not to crash else touch seqfile.outinner.masked seqfile.outinner.out; fi """ @@ -259,8 +259,8 @@ outinner_blastx_parse.pl \ if [ ! -s passed_outinner_sequence.fasta ]; then echo -e '>dummy empty sequence\nACTACTAC' > passed_outinner_sequence.fasta fi -/*this follows on from above, if 'old' or 'new' did not find anything, a dummy file needs to be created -in order for following processes not to crash*/ +#this follows on from above, if 'old' or 'new' did not find anything, a dummy file needs to be created +#in order for following processes not to crash if [[ `wc -l passed_outinner_sequence.fasta | cut -c 1` == "0" ]]; then echo -e '>dummy empty sequence\nACTACTAC' > passed_outinner_sequence.fasta @@ -286,7 +286,7 @@ process buildExemplars { set age, 'LTR.lib' into exemplars """ -//Only run the scripts if previous processes found something, otherwise don't bother and create a dummy file +#Only run the scripts if previous processes found something, otherwise don't bother and create a dummy file if [[ `wc -l CRL_Step3_Passed_Elements.fasta | cut -c 1` != "0" ]]; then CRL_Step4.pl \ @@ -314,9 +314,9 @@ CRL_Step5.pl \ --pcoverage 90 \ --pidentity 80 -/*This file needs to have at least one entry, hence the dummy entry, otherwise the pipeline crashes. -I am however not sure if this dummy sequence will influence anything else downstream and it may have -to be dealt with later */ +#This file needs to have at least one entry, hence the dummy entry, otherwise the pipeline crashes. +#I am however not sure if this dummy sequence will influence anything else downstream and it may have +#to be dealt with later else echo -e '>dummy empty sequence\nACTACTAC' > LTR.lib; fi """