Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dorado #277

Open
wants to merge 16 commits into
base: dev
Choose a base branch
from
10 changes: 6 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,12 @@ jobs:
strategy:
matrix:
profiles:
- "test_nodx_vc"
- "test_nodx_stringtie"
- "test_nodx_noaln"
- "test_nodx_rnamod"
- "test_bc_nodx"
- "test_nobc_dx"
- "test_nobc_nodx_vc"
- "test_nobc_nodx_stringtie"
- "test_nobc_nodx_noaln"
- "test_nobc_nodx_rnamod"
steps:
- name: Check out pipeline code
uses: actions/checkout@v3
Expand Down
16 changes: 8 additions & 8 deletions bin/check_samplesheet.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def check_samplesheet(file_in, updated_path, file_out):
barcode = "barcode%s" % (barcode.zfill(2))

## Check input file extension
nanopolish_fast5 = ""
fast5 = ""
if input_file:
if input_file.find(" ") != -1:
print_error("Input file contains spaces!", "Line", line)
Expand All @@ -115,12 +115,12 @@ def check_samplesheet(file_in, updated_path, file_out):
if updated_path != "not_changed":
input_file = "/".join([updated_path, input_file.split("/")[-1]])
list_dir = os.listdir(input_file)
nanopolish_fast5 = input_file
if not (all(fname.endswith(".fast5") for fname in list_dir)):
fast5 = input_file
if not (all(fname.endswith(".fast5") for fname in list_dir)) and not (all(fname.endswith(".pod5") for fname in list_dir)):
if "fast5" in list_dir and "fastq" in list_dir:
nanopolish_fast5 = input_file + "/fast5"
fast5 = input_file + "/fast5"
## CHECK FAST5 DIRECTORY
if not (all(fname.endswith(".fast5") for fname in os.listdir(nanopolish_fast5))):
if not (all(fname.endswith(".fast5") for fname in os.listdir(fast5))):
print_error("fast5 directory contains non-fast5 files.")
## CHECK PROVIDED BASECALLED FASTQ
fastq_path = input_file + "/fastq"
Expand All @@ -139,8 +139,8 @@ def check_samplesheet(file_in, updated_path, file_out):
'{input_file} path does not end with ".fastq.gz", ".fq.gz", or ".bam" and is not an existing directory with correct fast5 and/or fastq inputs.'
)

## Create sample mapping dictionary = {group: {replicate : [ barcode, input_file, nanopolish_fast5 ]}}
sample_info = [barcode, input_file, nanopolish_fast5]
## Create sample mapping dictionary = {group: {replicate : [ barcode, input_file, fast5 ]}}
sample_info = [barcode, input_file, fast5]
if group not in sample_info_dict:
sample_info_dict[group] = {}
if replicate not in sample_info_dict[group]:
Expand All @@ -161,7 +161,7 @@ def check_samplesheet(file_in, updated_path, file_out):
out_dir = os.path.dirname(file_out)
make_dir(out_dir)
with open(file_out, "w") as fout:
fout.write(",".join(["sample", "barcode", "reads", "nanopolish_fast5"]) + "\n")
fout.write(",".join(["sample", "barcode", "reads", "fast5"]) + "\n")
for sample in sorted(sample_info_dict.keys()):
## Check that replicate ids are in format 1..<NUM_REPS>
uniq_rep_ids = set(sample_info_dict[sample].keys())
Expand Down
49 changes: 28 additions & 21 deletions conf/test.config
Original file line number Diff line number Diff line change
@@ -1,33 +1,40 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_nobc_dx,<docker/singularity>
*/
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/nanoseq -profile test,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources
max_cpus = 2
max_memory = 6.GB
max_time = 12.h
// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '12.h'

// Input data to perform demultipexing
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_dx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.gtf'
run_nanolyse = true
protocol = 'DNA'
// Input data to perform both basecalling and demultiplexing
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_dx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
kit = 'SQK-DCS109'
barcode_kit = 'NBD103/NBD104'
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fastq/nondemultiplexed/sample_nobc_dx.fastq.gz'
skip_bigwig = true
skip_bigbed = true
trim_barcodes = true
dorado_model = '[email protected]'
dorado_device = 'cpu'
run_nanolyse = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true
aligner = 'graphmap2'

// This variable is just for reference and isnt actually required for the tests
// Files are downloaded and staged using the "GetTestData" process
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fast5/barcoded_multi/'
}
33 changes: 33 additions & 0 deletions conf/test_bc_nodx.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_bc_nodx,<docker/singularity>
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on Travis
max_cpus = 2
max_memory = 6.GB
max_time = 12.h

// Input data to perform basecalling and to skip demultipexing
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_nodx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
kit = 'SQK-DCS108'
dorado_model = '[email protected]'
dorado_device = 'cpu'
skip_bigbed = true
skip_bigwig = true
skip_demultiplexing = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true
}
36 changes: 36 additions & 0 deletions conf/test_bc_nodx_dnamod.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_bc_nodx,<docker/singularity>
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on Travis
max_cpus = 2
max_memory = 6.GB
max_time = 12.h

// Input data to perform basecalling and to skip demultipexing
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/nanoseq/3.2/samplesheet/samplesheet_bc_nodx_dnamod.csv'
input_path_file_type= 'pod5'
bedmethyl_out = true
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
kit = 'SQK-DCS108'
dorado_model = '[email protected]'
dorado_modification = '5mCG_5hmCG'
dorado_device = 'cpu'
skip_bigbed = true
skip_bigwig = true
skip_demultiplexing = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true
}
33 changes: 33 additions & 0 deletions conf/test_nobc_dx.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_nobc_dx,<docker/singularity>
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources
max_cpus = 2
max_memory = 6.GB
max_time = 12.h

// Input data to perform demultipexing
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_dx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.gtf'
skip_basecalling = true
run_nanolyse = true
protocol = 'DNA'
barcode_kit = 'NBD103/NBD104'
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fastq/nondemultiplexed/sample_nobc_dx.fastq.gz'
skip_bigwig = true
skip_bigbed = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_nodx_noaln.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_1-17550000.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_1-17500000.gtf'
skip_basecalling = true
protocol = 'directRNA'
skip_demultiplexing = true
skip_alignment = true
Expand Down
30 changes: 30 additions & 0 deletions conf/test_nobc_nodx_noaln_vc.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* -------------------------------------------------
* Nextflow config file for running tests
* -------------------------------------------------
* Defines bundled input files and everything required
* to run a fast and simple test. Use as follows:
* nextflow run nf-core/nanoseq -profile test_nobc_nodx_vc,<docker/singularity>
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check variant calling functions'

// Limit resources so that this can run on Travis
max_cpus = 2
max_memory = 6.GB
max_time = 12.h

// Input data to skip demultiplexing and variant call
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/refs/heads/nanoseq/3.2/samplesheet/samplesheet_nobc_nodx_noaln_vc.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
skip_basecalling = true
protocol = 'DNA'
skip_alignment = true
skip_quantification = true
skip_demultiplexing = true
call_variants = true
variant_caller = 'clair3'
structural_variant_caller = 'sniffles'
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ params {
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_nodx_rnamod.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/modification_transcriptome_subset.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/modification_transcriptome_subset.gtf'
skip_basecalling = true
protocol = 'directRNA'
run_nanolyse = true
skip_bigbed = true
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ params {
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.fa'
gtf = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/chr22_23800000-23980000.gtf'
protocol = 'directRNA'
skip_basecalling = true
skip_demultiplexing = true
skip_fusion_analysis= true
skip_modification_analysis=true
Expand Down
1 change: 1 addition & 0 deletions conf/test_nodx_vc.config → conf/test_nobc_nodx_vc.config
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ params {
// Input data to skip demultiplexing and variant call
input = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/3.2/samplesheet/samplesheet_nobc_nodx_vc.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
skip_basecalling = true
protocol = 'DNA'
skip_quantification = true
skip_demultiplexing = true
Expand Down
39 changes: 39 additions & 0 deletions conf/test_withpull.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/nanoseq -profile test,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
config_profile_name = 'Test profile'
config_profile_description = 'Minimal test dataset to check pipeline function'

// Limit resources so that this can run on GitHub Actions
max_cpus = 2
max_memory = '6.GB'
max_time = '6.h'

// Input data to perform both basecalling and demultiplexing
input = 'https://raw.githubusercontent.com/yuukiiwa/test-datasets/nanoseq/3.2/samplesheet/samplesheet_bc_dx.csv'
fasta = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/reference/hg19_KCMF1.fa'
protocol = 'cDNA'
flowcell = 'FLO-MIN106'
kit = 'SQK-DCS109'
barcode_kit = 'EXP-NBD103'
trim_barcodes=true
output_demultiplex_fast5 = true
run_nanolyse = true
skip_quantification = true
skip_fusion_analysis= true
skip_modification_analysis=true

// This variable is just for reference and isnt actually required for the tests
// Files are downloaded and staged using the "GetTestData" process
input_path = 'https://raw.githubusercontent.com/nf-core/test-datasets/nanoseq/fast5/barcoded/'
}
4 changes: 2 additions & 2 deletions modules/local/bambu.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ process BAMBU {

conda "conda-forge::r-base=4.0.3 bioconda::bioconductor-bambu=3.0.8 bioconda::bioconductor-bsgenome=1.66.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bioconductor-bambu:3.0.8--r42hc247a5b_0' :
'quay.io/biocontainers/bioconductor-bambu:3.0.8--r42hc247a5b_0' }"
'https://depot.galaxyproject.org/singularity/bioconductor-bambu:3.4.0--r43hf17093f_0' :
'quay.io/biocontainers/bioconductor-bambu:3.4.0--r43hf17093f_0' }"

input:
tuple path(fasta), path(gtf)
Expand Down
29 changes: 29 additions & 0 deletions modules/local/blue-crab.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
process BLUE_CRAB {
tag "$meta.id"
label 'process_medium'

conda "bioconda::slow5tools==1.2.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/slow5tools:1.2.0--h56e2c18_1' :
'quay.io/biocontainers/slow5tools:1.2.0--h56e2c18_1' }"

input:
tuple val(meta), path(genome), path(gtf), path(fastq), path(bam), path(bai), path(pod5)

output:
tuple val(meta), path(genome), path(gtf), path(fastq), path(bam), path(bai), path(blow5), emit: nanopolish_outputs
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
"""
blue-crab p2s $pod5 -o $blow5

cat <<-END_VERSIONS > versions.yml
"${task.process}":
blue-crab: \$( blue-crab -V | tail -c 6 )
END_VERSIONS
"""
}
25 changes: 25 additions & 0 deletions modules/local/dorado_aligner.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
process DORADO_ALIGNER {
tag "$meta.id"
label 'process_medium'

container "docker.io/ontresearch/dorado"

input:
tuple val(meta), path(mod_bam)
path fasta

output:
tuple val(meta), path("aligned_sorted.bam"), path("*.bai") , emit: aligned_bam
path "versions.yml" , emit: versions

script:
"""
dorado aligner --mm2-preset map-ont $fasta $mod_bam > aligned.bam && samtools sort aligned.bam -o aligned_sorted.bam && samtools index aligned_sorted.bam

cat <<-END_VERSIONS > versions.yml
"${task.process}":
dorado: \$(echo \$(dorado --version 2>&1) | sed -r 's/.{81}//')
END_VERSIONS
"""
}

Loading
Loading