Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix createyml random image matching #221

Merged
merged 10 commits into from
Aug 26, 2024
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
# CHANGELOG

### 3.9.7
* Solved trio eklipse image being wrongly added to yaml
* removed outdated regex matches for genepanel, woudl remove important genepanels
* general clean-up of create_yml.pl

### 3.9.6
* Fix bug where wrong tuple value unpacked as group and sample id in `bqsr` when starting run from bam


### 3.9.5
* Fixed faulty if-condition for annotsv, would result in empty annotsv tsv everytime

Expand Down
81 changes: 43 additions & 38 deletions bin/create_yml.pl
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
if ($opt{assay}) {
my @a_a = split/,/,$opt{assay};
$assay = $a_a[0];
if ($a_a[1] ne 'false' && $a_a[1]) {
if ($a_a[1]) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

$analysis = $a_a[1];
}
elsif ($opt{d} eq 'ahus') { ## beginning of stinking mess, deadline for fix 2021-03-01
Expand All @@ -132,8 +132,20 @@
}

### Group ###
if (!defined $opt{g}) { print STDERR "need group name"; exit;}
my @g_c = split/,/,$opt{g};
### Proband ### Could differ from group, needed to select correct eklipse image
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Great addition - early fail and clear user info when calling with wrong input. I like it

### Clarity-ID ###
my @g_c;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The meaning of this variable name is not clear to me (I realize this is not from this PR)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes its an abbreviated name, group_clarity. And the --g flag was from it being only group in the beginning.

if (defined $opt{g}) {
@g_c = split/,/,$opt{g};
unless (scalar(@g_c) == 2) {
print STDERR "need group-id,clarity-id\n";
exit;
}
}
else {
print STDERR "need group-id,clarity-id\n";
exit;
}
my $group = $g_c[0];
my $clarity_id = $g_c[1];

Expand All @@ -142,42 +154,46 @@
open (INFO, $files) or die "Cannot open $files\n";
my %INFO;
my @bams;
# For mother affected, father affected also print files associated to these
my @inher_patterns;
while ( <INFO> ) {

my @tmp = split/\s+/,$_;
print STDERR $_,"\n";
if ($tmp[0] eq "BAM") {
$INFO{BAM}->{$tmp[1]} = $tmp[2];
my $category = $tmp[0];
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

my $subcat = $tmp[1];
my $filepath = $tmp[2];
if ($category eq "BAM") {
$INFO{BAM}->{$subcat} = $filepath;
}
elsif ($tmp[0] eq "TISSUE") {
$INFO{TISSUE}->{$tmp[1]} = $tmp[2];
elsif ($category eq "TISSUE") {
$INFO{TISSUE}->{$subcat} = $filepath;
}
elsif ($tmp[0] eq "mtBAM") {
$INFO{mtBAM}->{$tmp[1]} = $tmp[2];
elsif ($category eq "mtBAM") {
$INFO{mtBAM}->{$subcat} = $filepath;
}
elsif ($tmp[0] eq "D4") {
$INFO{D4}->{$tmp[1]} = $tmp[2];
elsif ($category eq "D4") {
$INFO{D4}->{$subcat} = $filepath;
}
elsif ($tmp[0] eq "IMG") {
$INFO{IMG}->{$tmp[1]} = $tmp[2];
elsif ($category eq "IMG") {
$INFO{IMG}->{$subcat} = $filepath;
}
elsif ($tmp[0] eq "STR_IMG") {
$INFO{STR_IMG}->{$tmp[1]} = $tmp[2];
elsif ($category eq "STR_IMG") {
$INFO{STR_IMG}->{$subcat} = $filepath;
}
elsif ($tmp[0] eq "SV" or $tmp[0] eq "SVc" or $tmp[0] eq "SNV" or $tmp[0] eq "MADDE") {
if ($tmp[0] eq "SNV") {
push @inher_patterns,$tmp[1];
elsif ($category eq "SV" or $category eq "SVc" or $category eq "SNV" or $category eq "MADDE") {
if ($category eq "SNV") {
push @inher_patterns,$subcat;
}
$INFO{$tmp[0]}->{$tmp[1]} = $tmp[2];
$INFO{$category}->{$subcat} = $filepath;
}
else {
$INFO{$tmp[0]} = $tmp[1];
$INFO{$category} = $subcat;
}

}
close INFO;
print Dumper(%INFO);
#my $info_json = to_json(\%INFO, { pretty => 1, indent => 4 });
#print STDERR ($info_json);
####################################################

my $kit = "Intersected WGS"; ## placeholder, does not change for panels
Expand All @@ -197,15 +213,14 @@
my $institute = "klingen";
my $institute_owner = "klingen";
if ($opt{assay}) {
## if something added to wgs-hg38, i.e wgs-hg38-nu (no upload loqusdb)
if ($assay =~ /wgs-hg38/ ) {
$assay = "wgs-hg38";
}
$institute = $assays{$assay}{$analysis}{institute};
$institute_owner = $assays{$assay}{$analysis}{institute_owner};
$kit = $assays{$assay}{capture_kit};
if ($assays{$assay}{capture_kit}) {
$kit = $assays{$assay}{capture_kit};
}
}
my $gene_panels = get_genelist($institute);

####################################################


Expand All @@ -227,8 +242,6 @@
}
open (OUT,'>',$out);



print OUT "---\n";
### ASSAY DECIDE OWNER? ####
print OUT "owner: $institute_owner\n";
Expand Down Expand Up @@ -413,16 +426,8 @@ sub get_genelist {
while (<JSON>) {
$data = decode_json($_);
}
# print Dumper($data);
foreach my $key (@{$data}) {
if (ref $key->{institute} eq 'ARRAY') {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, tricky to follow here. Guessing you know this chunk well enough, so that no new issues are introduced here ...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is some really old legacy code. It's from the time we had a manually cured list of genepanels. Now it was just confusing to keep

foreach my $inst (@{ $key->{institute} }) {
next if $key->{'display_name'} =~ /ERSATT|TEST|test|Test/;
push @ok_panels,$key->{panel_name} if $inst eq $institute;
}
}
elsif ($key->{institute} eq $institute) {
next if $key->{'display_name'} =~ /ERSATT|TEST|test|Test/;
if ($key->{institute} eq $institute) {
push @ok_panels,$key->{panel_name};
}
}
Expand Down
28 changes: 18 additions & 10 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ Channel
.fromPath(params.csv)
.splitCsv(header:true)
.map{ row-> tuple(row.group, row.id, row.sex, row.type) }
.into { meta_gatkcov; meta_exp; meta_svbed; meta_pod; meta_mutect2}
.into { meta_gatkcov; meta_exp; meta_svbed; meta_pod; meta_mutect2; meta_eklipse}


Channel
Expand Down Expand Up @@ -1705,14 +1705,18 @@ process run_eklipse {
publishDir "${OUTDIR}/plots/mito", mode: 'copy', overwrite: 'true', pattern: '*.png'

input:
set group, id, file(bam), file(bai) from eklipse_bam

set group, id, file(bam), file(bai), sex, type from eklipse_bam.join(meta_eklipse, by: [0,1])
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the [0, 1] due to the id and group flipping? We should really sort that out soon ... Maybe we can replace all the group + id with a meta object. Then we could also bunch the type and sex into that one. An issue for another day though

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe not so far away though. Seems like something to sort out before transitioning to DSL2

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not because of the flipping. It's just to say that it should match on both. The result is that the group and id channel does not become a wierd list.
if I only match on group it will be:
group, id1, bam, bai, id1/2/3(random), sex, type.
if I only match on id it will be:
[group,group], id, bam, bai, sex, type

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see!

output:
set file("*.png"), file("${id}.hetplasmid_frequency.txt")
set group, file("${id}_eklipse.INFO") into eklipse_INFO
set group, file("${id}_eklipse.INFO") optional true into eklipse_INFO
ViktorHy marked this conversation as resolved.
Show resolved Hide resolved
set group, file("*versions.yml") into ch_run_eklipse_versions

script:
yml_info_command = ""
if (type == "proband") {
yml_info_command = "echo 'IMG eklipse ${params.accessdir}/plots/mito/${id}_eklipse.png' > ${id}_eklipse.INFO"
}
"""
source activate htslib10
echo "${bam}\tsample" > infile.txt
Expand All @@ -1724,17 +1728,21 @@ process run_eklipse {
mv eKLIPse_*/eKLIPse_sample.png ./${id}_eklipse.png
hetplasmid_frequency_eKLIPse.pl --bam ${bam} --in ${id}_deletions.csv
mv hetplasmid_frequency.txt ${id}.hetplasmid_frequency.txt
echo "IMG eklipse ${params.accessdir}/plots/mito/${id}_eklipse.png" > ${id}_eklipse.INFO
$yml_info_command

${run_eklipse_version(task)}
"""

stub:
yml_info_command = ""
if (type == "proband") {
yml_info_command = "echo 'IMG eklipse ${params.accessdir}/plots/mito/${id}_eklipse.png' > ${id}_eklipse.INFO"
}
"""
source activate htslib10
touch "${id}.hetplasmid_frequency.txt"
touch "${id}_eklipse.INFO"
touch "${id}.png"
$yml_info_command

${run_eklipse_version(task)}
"""
Expand Down Expand Up @@ -2217,7 +2225,7 @@ process inher_models {
container = '/fs1/resources/containers/genmod.sif'

input:
set group, file(vcf), type, file(ped) from base_vcf.mix(ma_vcf, fa_vcf).join(ped_inher.mix(ped_inher_ma,ped_inher_fa)).view()
ViktorHy marked this conversation as resolved.
Show resolved Hide resolved
set group, file(vcf), type, file(ped) from base_vcf.mix(ma_vcf, fa_vcf).join(ped_inher.mix(ped_inher_ma,ped_inher_fa))

output:
set group, type, file("${group}.models.vcf") into inhermod
Expand Down Expand Up @@ -3122,7 +3130,7 @@ process cnvkit_panel {
params.sv && params.antype == "panel"

input:
set group, id, file(bam), file(bai), file(vcf), file(multi), val(INS_SIZE), val(MEAN_DEPTH), val(COV_DEV) from bam_cnvkit_panel.mix(bam_cnvkitpanel_choice).join(vcf_cnvkit, by:[0,1]).join(qc_cnvkit_val, by:[0,1]).view()
set group, id, file(bam), file(bai), file(vcf), file(multi), val(INS_SIZE), val(MEAN_DEPTH), val(COV_DEV) from bam_cnvkit_panel.mix(bam_cnvkitpanel_choice).join(vcf_cnvkit, by:[0,1]).join(qc_cnvkit_val, by:[0,1])
//set id, val(INS_SIZE), val(MEAN_DEPTH), val(COV_DEV) from qc_cnvkit_val.view()
//set group, id, file(vcf) from vcf_cnvkit.view()

Expand Down Expand Up @@ -3178,7 +3186,7 @@ process svdb_merge_panel {
input:
//set group, id, file(mantaV), file(dellyV), file(melt), file(cnvkitV) \
// from called_manta_panel.join(called_delly_panel, by:[0,1]).join(melt_vcf, by:[0,1]).join(called_cnvkit_panel, by:[0,1])
set group, id, file(vcfs), id, file(melt) from called_manta_panel.mix(called_delly_panel,called_cnvkit_panel,merged_gatk_panel).groupTuple().join(melt_vcf).view()
set group, id, file(vcfs), id, file(melt) from called_manta_panel.mix(called_delly_panel,called_cnvkit_panel,merged_gatk_panel).groupTuple().join(melt_vcf)

output:
set group, id, file("${group}.merged.filtered.melt.vcf") into vep_sv_panel, annotsv_panel
Expand Down Expand Up @@ -3383,7 +3391,7 @@ process add_to_loqusdb {
!params.noupload && !params.reanalyze

input:
set group, type, file(vcf), file(tbi), type, file(ped), file(svvcf) from vcf_loqus.join(ped_loqus).join(loqusdb_sv.mix(loqusdb_sv_panel)).view()
set group, type, file(vcf), file(tbi), type, file(ped), file(svvcf) from vcf_loqus.join(ped_loqus).join(loqusdb_sv.mix(loqusdb_sv_panel))

output:
file("${group}*.loqus") into loqusdb_done
Expand Down