From 966f3e3e4e09167dd82ac1dc5bd30eedd04f7f8f Mon Sep 17 00:00:00 2001 From: FelixKrueger Date: Mon, 25 Jul 2016 15:58:41 +0100 Subject: [PATCH] Another bug fix for ambiguous alignments --- RELEASE_NOTES.txt | 19 ++++++++++++++++ bam2nuc | 2 +- bismark | 43 +++++++++++++++++------------------ bismark2bedGraph | 2 +- bismark2report | 2 +- bismark2summary | 2 +- bismark_genome_preparation | 2 +- bismark_methylation_extractor | 2 +- coverage2cytosine | 2 +- deduplicate_bismark | 2 +- 10 files changed, 48 insertions(+), 30 deletions(-) diff --git a/RELEASE_NOTES.txt b/RELEASE_NOTES.txt index c6b4feb..ee2e0c0 100644 --- a/RELEASE_NOTES.txt +++ b/RELEASE_NOTES.txt @@ -1,3 +1,22 @@ +RELEASE NOTES FOR Bismark v0.16.3 (25 07 2016) +------------------------------------------------- + +Bismark +======= + +Fixed another bug where a subset of ambiguous Bowtie 2 alignments where considered unique even though +they had been ambiguous in a different thread before, e.g.: +Read 1: AS:i:0 XS:i:0 +Read 2: AS:i:0 +In such cases the 'ambiguous within thread' variable is now only reset if the second alignment is truly better. This +also affects the 'ambig.bam' output. + +Added support for large Bowtie (1) index files ending in .ebwtl which had been added in Bowtie v1.1.0. + + + + + RELEASE NOTES FOR Bismark v0.16.2 (19 07 2016) ------------------------------------------------- diff --git a/bam2nuc b/bam2nuc index 2ce6168..510dd40 100755 --- a/bam2nuc +++ b/bam2nuc @@ -31,7 +31,7 @@ my %freqs; # keeping a record of which chromosomes have been processed my %genomic_freqs; my %processed; -my $bam2nuc_version = 'v0.16.2'; +my $bam2nuc_version = 'v0.16.3'; my ($output_dir,$genome_folder,$parent_dir,$samtools_path,$genome_freq_only) = process_commandline(); diff --git a/bismark b/bismark index fd52c97..4851feb 100755 --- a/bismark +++ b/bismark @@ -25,7 +25,7 @@ use lib "$Bin/../lib"; my $parent_dir = getcwd; -my $bismark_version = 'v0.16.2_dev'; +my $bismark_version = 'v0.16.3'; my $command_line = join (" ",@ARGV); @@ -3041,17 +3041,16 @@ sub check_bowtie_results_single_end_bowtie2{ $overwrite++; # warn "Found better or equal alignment score ($alignment_score), setting \$best_AS_so_far to $best_AS_so_far\n"; - # resetting the ambiguous within thread memory (if applicable at all) only if the current alignment is really better than the previous one. - # 22 07 2016: ambiguous score within same read only resets if the current alignment is really better than the previous one + # 22 07 2016: resetting the ambiguous score within same thread only if the current alignment is really better than the previous one if ($alignment_score > $best_AS_so_far){ # warn "Resetting amb within thread value to 0\n"; $amb_same_thread = 0; - } - - if ($ambig_bam){ # also setting a new first_ambig_alignment - $first_ambig_alignment = $fhs[$index]->{last_line}; - $first_ambig_alignment =~ s/_(CT|GA)_converted//; - # warn "$first_ambig_alignment\n"; sleep(1); + + if ($ambig_bam){ # also setting a new first_ambig_alignment + $first_ambig_alignment = $fhs[$index]->{last_line}; + $first_ambig_alignment =~ s/_(CT|GA)_converted//; + # warn "$first_ambig_alignment\n"; sleep(1); + } } } else{ @@ -3985,23 +3984,23 @@ sub check_bowtie_results_paired_ends_bowtie2{ # 19 07 2016 Changed to >= so that equally good alignments to different positions get added as well. Ambiguous alignments are identified and removed later. $best_AS_so_far = $sum_of_alignment_scores_1; $overwrite = 1; - + # warn "Found better or equal sum of alignment scores ($sum_of_alignment_scores_1), setting \$best_AS_so_far to $best_AS_so_far\n"; # resetting the ambiguous within thread memory (if applicable at all) only if the current alignment is really better than the previous one. - # 22 07 2016: ambiguous score within same read only resets if the current alignment is really better than the previous one + # 22 07 2016: ambiguous score within same thread only resets if the current alignment is really better than the previous one if ($sum_of_alignment_scores_1 > $best_AS_so_far){ # warn "Resetting amb within thread value to 0\n"; $amb_same_thread = 0; - } - - if ($ambig_bam){ # also setting a new first_ambig_alignment - # Read 1 - $first_ambig_alignment_line1 = $fhs[$index]->{last_line_1}; - $first_ambig_alignment_line1 =~ s/_(CT|GA)_converted//; - # Read 2 - $first_ambig_alignment_line2 = $fhs[$index]->{last_line_2}; - $first_ambig_alignment_line2 =~ s/_(CT|GA)_converted//; - # warn "$first_ambig_alignment_line1\n$first_ambig_alignment_line2\n\n"; sleep(1); + + if ($ambig_bam){ # also setting a new first_ambig_alignment + # Read 1 + $first_ambig_alignment_line1 = $fhs[$index]->{last_line_1}; + $first_ambig_alignment_line1 =~ s/_(CT|GA)_converted//; + # Read 2 + $first_ambig_alignment_line2 = $fhs[$index]->{last_line_2}; + $first_ambig_alignment_line2 =~ s/_(CT|GA)_converted//; + # warn "$first_ambig_alignment_line1\n$first_ambig_alignment_line2\n\n"; sleep(1); + } } } else{ @@ -10216,6 +10215,6 @@ Bismark SAM OUTPUT (default): Each read of paired-end alignments is written out in a separate line in the above format. -Last edited on 22 July 2016 +Last edited on 25 July 2016 HOW_TO } diff --git a/bismark2bedGraph b/bismark2bedGraph index d367f98..fa98df5 100755 --- a/bismark2bedGraph +++ b/bismark2bedGraph @@ -21,7 +21,7 @@ use Carp; ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . -my $bismark2bedGraph_version = 'v0.16.2'; +my $bismark2bedGraph_version = 'v0.16.3'; my @bedfiles; my @methylcalls = qw (0 0 0); # [0] = methylated, [1] = unmethylated, [2] = total diff --git a/bismark2report b/bismark2report index 49fd4af..900800f 100755 --- a/bismark2report +++ b/bismark2report @@ -20,7 +20,7 @@ use lib "$Bin/../lib"; ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . -my $bismark2report_version = 'v0.16.2'; +my $bismark2report_version = 'v0.16.3'; my (@alignment_reports,@dedup_reports,@splitting_reports,@mbias_reports,@nuc_reports); my ($output_dir,$verbose,$manual_output_file) = process_commandline(); diff --git a/bismark2summary b/bismark2summary index 451a1e6..563709d 100755 --- a/bismark2summary +++ b/bismark2summary @@ -20,7 +20,7 @@ use lib "$Bin/../lib"; ## You should have received a copy of the GNU General Public License ## along with this program. If not, see . -my $bismark_version = '0.16.2'; +my $bismark_version = '0.16.3'; # Last modified 05 07 2016 diff --git a/bismark_genome_preparation b/bismark_genome_preparation index e186809..689ab79 100755 --- a/bismark_genome_preparation +++ b/bismark_genome_preparation @@ -36,7 +36,7 @@ my $genomic_composition; my %genomic_freqs; # storing the genomic sequence composition my %freqs; -my $bismark_version = 'v0.16.2'; +my $bismark_version = 'v0.16.3'; GetOptions ('verbose' => \$verbose, 'help' => \$help, diff --git a/bismark_methylation_extractor b/bismark_methylation_extractor index d54493e..63f43de 100755 --- a/bismark_methylation_extractor +++ b/bismark_methylation_extractor @@ -29,7 +29,7 @@ my $parent_dir = getcwd(); my %fhs; -my $version = 'v0.16.2'; +my $version = 'v0.16.3'; my ($ignore,$genomic_fasta,$single,$paired,$full,$report,$no_overlap,$merge_non_CpG,$vanilla,$output_dir,$no_header,$bedGraph,$remove,$coverage_threshold,$counts,$cytosine_report,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$sort_size,$samtools_path,$gzip,$ignore_r2,$mbias_off,$mbias_only,$gazillion,$ample_mem,$ignore_3prime,$ignore_3prime_r2,$multicore) = process_commandline(); diff --git a/coverage2cytosine b/coverage2cytosine index 1ae40d5..cc0189b 100755 --- a/coverage2cytosine +++ b/coverage2cytosine @@ -23,7 +23,7 @@ use Carp; my %chromosomes; # storing sequence information of all chromosomes/scaffolds my %processed; # keeping a record of which chromosomes have been processed -my $coverage2cytosine_version = 'v0.16.2'; +my $coverage2cytosine_version = 'v0.16.3'; my ($output_dir,$genome_folder,$zero,$CpG_only,$CX_context,$split_by_chromosome,$parent_dir,$coverage_infile,$cytosine_out,$merge_CpGs,$gc_context,$gzip,$tetra) = process_commandline(); diff --git a/deduplicate_bismark b/deduplicate_bismark index 7e3e7e6..eb17f0d 100755 --- a/deduplicate_bismark +++ b/deduplicate_bismark @@ -30,7 +30,7 @@ use Getopt::Long; ### print "--representative\twill browse through all sequences and print out the sequence with the most representative (as in most frequent) methylation call for any given position. Note that this is very likely the most highly amplified PCR product for a given sequence\n\n"; -my $dedup_version = 'v0.16.2'; +my $dedup_version = 'v0.16.3'; my $help; my $representative;