Skip to content

Commit

Permalink
feat(wdl): Add Minimap2 whole genome alignment workflows
Browse files Browse the repository at this point in the history
  • Loading branch information
lrvdijk committed Jan 8, 2025
1 parent 1a18849 commit 83c57f2
Show file tree
Hide file tree
Showing 4 changed files with 140 additions and 8 deletions.
37 changes: 37 additions & 0 deletions wdl/pipelines/TechAgnostic/Minimap2RefIndex.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
version 1.0

import "../../tasks/alignment/minimap2.wdl" as minimap2

workflow Minimap2RefIndex {
meta {
description: "Create a Minimap2 index of a reference genome"
}

input {
File ref_fasta
String preset = "asm5"
String? extra_params
String? out_prefix
}

parameter_meta {
ref_fasta: "Reference genome in FASTA format (can be gzipped)."
preset: "Minimap2 preset to use."
extra_params: "Extra parameters to pass to minimap2"
out_prefix: "Prefix for the output index file name."
}

String prefix = select_first([out_prefix, basename(ref_fasta)])

call minimap2.IndexWithMinimap2 {
input:
ref_fasta = ref_fasta,
preset = preset,
extra_params = extra_params,
out_prefix = prefix
}

output {
File mmi = IndexWithMinimap2.mmi
}
}
36 changes: 36 additions & 0 deletions wdl/pipelines/TechAgnostic/Minimap2WGA.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
version 1.0

import "../../tasks/alignment/minimap2.wdl" as minimap2
import "../../tasks/alignment/samtools.wdl" as samtools

workflow Minimap2WGA {
meta {
description: "Align reads to a reference genome using Minimap2, requiring an existing minimap2 index. If provided with multiple query fastas (e.g., multiple haplotypes), will merge resulting BAMs."
}

input {
File ref_fasta_mmi
Array[File] query_fasta
String? extra_params
String out_prefix = "aligned"
}

scatter (fasta in query_fasta) {
call minimap2.Minimap2WithIx {
input:
ref_mmi = ref_fasta_mmi,
query_fasta = fasta,
extra_params = extra_params,
}
}

call samtools.Merge as Merge {
input:
bams = Minimap2WithIx.bam,
out_prefix = out_prefix
}

output {
File bam = Merge.bam
}
}
19 changes: 11 additions & 8 deletions wdl/tasks/alignment/minimap2.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@ task IndexWithMinimap2 {
String preset = "asm5"
String? extra_params
String? out_prefix

RuntimeAttr? runtime_attr_override
}
Int disk_size = 1 + 20*ceil(size(ref_fasta, "GB"))
RuntimeAttr default_attr = object {
cpu_cores: 4,
mem_gb: 32,
Expand All @@ -22,7 +25,6 @@ task IndexWithMinimap2 {
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
Int num_cpu = select_first([runtime_attr.cpu_cores, 1])
Int disk_size = 1 + 20*ceil(size(ref_fasta, "GB"))
meta {
description: "Create a Minimap2 index of a reference genome"
Expand Down Expand Up @@ -51,21 +53,23 @@ task IndexWithMinimap2 {
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker]),
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
task Minimap2WithIx {
input {
File ref_mm2_ix
File ref_mmi
File query_fasta
String? extra_params
String? out_prefix = "aligned"
String out_prefix = "aligned"

RuntimeAttr? runtime_attr_override
}
Int disk_size = 1 + 10*2*ceil(size(query_fasta, "GB") + size(ref_mmi, "GB"))
RuntimeAttr default_attr = object {
cpu_cores: 4,
mem_gb: 32,
Expand All @@ -77,8 +81,7 @@ task Minimap2WithIx {
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
Int num_cpu = select_first([runtime_attr.cpu_cores, 1])
Int disk_size = 1 + 10*2*ceil(size(query_fasta, "GB") + size(ref_mm2_ix, "GB") + size(ref_fasta, "GB"))
Int num_cpu = select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
meta {
description: "Align a query genome to a reference genome using Minimap2, using a pre-built index"
Expand All @@ -94,7 +97,7 @@ task Minimap2WithIx {
command <<<
set -euxo pipefail
minimap2 -t ~{num_cpu - 1} ~{extra_params} -d ~{ref_mm2_ix} ~{query_fasta} \
minimap2 -t ~{num_cpu - 1} ~{extra_params} -d ~{ref_mmi} ~{query_fasta} \
| samtools sort -Obam -o ~{out_prefix}.bam
>>>
Expand All @@ -109,6 +112,6 @@ task Minimap2WithIx {
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker]),
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}
56 changes: 56 additions & 0 deletions wdl/tasks/alignment/samtools.wdl
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
version 1.0

import "../../structs/Structs.wdl"

task Merge {
input {
Array[File] bams
String out_prefix = "merged"

RuntimeAttr? runtime_attr_override
}
Int disk_size = 1 + 10*ceil(size(bams, "GB"))
RuntimeAttr default_attr = object {
cpu_cores: 2,
mem_gb: 16,
disk_gb: disk_size,
boot_disk_gb: 20,
preemptible_tries: 3,
max_retries: 2,
docker: "us-central1-docker.pkg.dev/broad-dsp-lrma/pangenome-pipelines/minimap2:latest"
}
RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
Int num_cpu = select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
meta {
description: "Merge multiple BAM files into a single BAM file"
}

parameter_meta {
bams: "List of BAM files to merge"
out_prefix: "Prefix for the output BAM file name"
runtime_attr: "Runtime attributes for the task"
}
command <<<
set -euxo pipefail
samtools merge -@ ~{num_cpu} -o ~{out_prefix}.bam ~{sep=" " bams}
>>>
output {
File bam = "~{out_prefix}.bam"
}

runtime {
cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
docker: select_first([runtime_attr.docker, default_attr.docker])
}
}

0 comments on commit 83c57f2

Please sign in to comment.