Skip to content

Commit

Permalink
Merge pull request #15 from NorwegianVeterinaryInstitute/dev
Browse files Browse the repository at this point in the history
Completed changes for draft assembly track
  • Loading branch information
hkaspersen authored Sep 25, 2024
2 parents 6fb4c3c + 249d71b commit d7d7e19
Show file tree
Hide file tree
Showing 21 changed files with 269 additions and 16 deletions.
21 changes: 20 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,20 @@
# Assemblage
# Assemblage

Assemblage is a nextflow pipeline used for generating assemblies.
Documentation can be found [here](https://github.com/NorwegianVeterinaryInstitute/Assemblage/wiki).
The pipeline is currently under construction, but the plan is to generate the following tracks:

### Draft genome assembly
This track generates assemblies using the normal Illumina paired-end reads.

Status: Complete

### Hybrid genome assembly
This track generates assemblies based on both Illumina and Nanopore reads, using hybrid assembly.

Status: Under construction

### Optimized genome assembly
This track is using the long-read-first assembly method, compares that to a hybrid assembly, and merges all plasmid sequences by the use of trycycler.

Status: Not yet started
Binary file added assets/data/reads/illumina/sample1_R1.fastq.gz
Binary file not shown.
Binary file added assets/data/reads/illumina/sample1_R2.fastq.gz
Binary file not shown.
Binary file added assets/data/reads/illumina/sample2_R1.fastq.gz
Binary file not shown.
Binary file added assets/data/reads/illumina/sample2_R2.fastq.gz
Binary file not shown.
Binary file added assets/data/reads/illumina/sample3_R1.fastq.gz
Binary file not shown.
Binary file added assets/data/reads/illumina/sample3_R2.fastq.gz
Binary file not shown.
Binary file added assets/data/reads/nanopore/sample1.fastq.gz
Binary file not shown.
Binary file added assets/data/reads/nanopore/sample2.fastq.gz
Binary file not shown.
Binary file added assets/data/reads/nanopore/sample3.fastq.gz
Binary file not shown.
4 changes: 4 additions & 0 deletions assets/data/reads/samplesheet.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
sample,R1,R2,np
sample1,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/illumina/sample1_R1.fastq.gz,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/illumina/sample1_R2.fastq.gz,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/nanopore/sample1.fastq.gz
sample2,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/illumina/sample2_R1.fastq.gz,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/illumina/sample2_R2.fastq.gz,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/nanopore/sample2.fastq.gz
sample3,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/illumina/sample3_R1.fastq.gz,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/illumina/sample3_R2.fastq.gz,https://raw.githubusercontent.com/NorwegianVeterinaryInstitute/Assemblage/master/assets/data/reads/nanopore/sample3.fastq.gz
223 changes: 223 additions & 0 deletions assets/r_env.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
name: alppaca_report
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=2_gnu
- _r-mutex=1.0.1=anacondar_1
- binutils_impl_linux-64=2.36.1=h193b22a_2
- binutils_linux-64=2.36=hf3e587d_10
- bioconductor-ggtree=3.2.0=r41hdfd78af_0
- bioconductor-treeio=1.18.0=r41hdfd78af_0
- bwidget=1.9.14=ha770c72_1
- bzip2=1.0.8=h7f98852_4
- c-ares=1.18.1=h7f98852_0
- ca-certificates=2022.12.7=ha878542_0
- cairo=1.16.0=ha12eb4b_1010
- curl=7.85.0=h2283fc2_0
- expat=2.4.9=h27087fc_0
- font-ttf-dejavu-sans-mono=2.37=hab24e00_0
- font-ttf-inconsolata=3.000=h77eed37_0
- font-ttf-source-code-pro=2.038=h77eed37_0
- font-ttf-ubuntu=0.83=hab24e00_0
- fontconfig=2.14.0=hc2a2eb6_1
- fonts-conda-ecosystem=1=0
- fonts-conda-forge=1=0
- freetype=2.12.1=hca18f0e_0
- fribidi=1.0.10=h36c2ea0_0
- gcc_impl_linux-64=9.5.0=h6c5bc03_16
- gcc_linux-64=9.5.0=h4258300_10
- gettext=0.19.8.1=h27087fc_1009
- gfortran_impl_linux-64=9.5.0=h3c9b8b6_16
- gfortran_linux-64=9.5.0=hdb51d14_10
- graphite2=1.3.13=h58526e2_1001
- gsl=2.7=he838d99_0
- gxx_impl_linux-64=9.5.0=h6c5bc03_16
- gxx_linux-64=9.5.0=h43f449f_10
- harfbuzz=4.2.0=h40b6f09_0
- icu=69.1=h9c3ff4c_0
- jpeg=9e=h166bdaf_2
- kernel-headers_linux-64=2.6.32=he073ed8_15
- keyutils=1.6.1=h166bdaf_0
- krb5=1.19.3=h08a2579_0
- ld_impl_linux-64=2.36.1=hea4e1c9_2
- lerc=4.0.0=h27087fc_0
- libblas=3.9.0=16_linux64_openblas
- libcblas=3.9.0=16_linux64_openblas
- libcurl=7.85.0=h2283fc2_0
- libdeflate=1.14=h166bdaf_0
- libedit=3.1.20191231=he28a2e2_2
- libev=4.33=h516909a_1
- libffi=3.4.2=h7f98852_5
- libgcc-devel_linux-64=9.5.0=h367e8d2_16
- libgcc-ng=12.1.0=h8d9b700_16
- libgfortran-ng=12.1.0=h69a702a_16
- libgfortran5=12.1.0=hdcd56e2_16
- libglib=2.74.0=h7a41b64_0
- libgomp=12.1.0=h8d9b700_16
- libiconv=1.17=h166bdaf_0
- liblapack=3.9.0=16_linux64_openblas
- libnghttp2=1.47.0=hff17c54_1
- libopenblas=0.3.21=pthreads_h78a6416_3
- libpng=1.6.38=h753d276_0
- libsanitizer=9.5.0=hf86b28c_16
- libssh2=1.10.0=hf14f497_3
- libstdcxx-devel_linux-64=9.5.0=h367e8d2_16
- libstdcxx-ng=12.1.0=ha89aaad_16
- libtiff=4.4.0=h55922b4_4
- libuuid=2.32.1=h7f98852_1000
- libwebp-base=1.2.4=h166bdaf_0
- libxcb=1.13=h7f98852_1004
- libxml2=2.9.12=h885dcf4_1
- libzlib=1.2.12=h166bdaf_4
- make=4.3=hd18ef5c_1
- ncurses=6.2=h58526e2_4
- openssl=3.0.8=h0b41bf4_0
- pandoc=2.19.2=ha770c72_0
- pango=1.50.7=hbd2fdc8_0
- pcre2=10.37=hc3806b6_1
- pixman=0.40.0=h36c2ea0_0
- pthread-stubs=0.4=h36c2ea0_1001
- r-ape=5.6_2=r41h43535f1_0
- r-aplot=0.1.8=r41hc72bb7e_0
- r-askpass=1.1=r41h06615bd_3
- r-assertthat=0.2.1=r41hc72bb7e_3
- r-backports=1.4.1=r41h06615bd_1
- r-base=4.1.2=h2553ce4_1
- r-base64enc=0.1_3=r41h06615bd_1005
- r-bit=4.0.4=r41h06615bd_1
- r-bit64=4.0.5=r41h06615bd_1
- r-brio=1.1.3=r41h06615bd_1
- r-bslib=0.4.0=r41hc72bb7e_1
- r-cachem=1.0.6=r41h06615bd_1
- r-callr=3.7.2=r41hc72bb7e_1
- r-cli=3.4.1=r41h7525677_1
- r-clipr=0.8.0=r41hc72bb7e_1
- r-colorspace=2.0_3=r41h06615bd_1
- r-cpp11=0.4.2=r41hc72bb7e_1
- r-crayon=1.5.2=r41hc72bb7e_1
- r-crosstalk=1.2.0=r41hc72bb7e_1
- r-curl=4.3.3=r41h06615bd_1
- r-data.table=1.14.2=r41h06615bd_1
- r-desc=1.4.2=r41hc72bb7e_1
- r-diffobj=0.3.5=r41h06615bd_1
- r-digest=0.6.29=r41h7525677_1
- r-distributional=0.3.1=r41hc72bb7e_1
- r-dplyr=1.0.10=r41h7525677_1
- r-ellipsis=0.3.2=r41h06615bd_1
- r-evaluate=0.17=r41hc72bb7e_1
- r-fansi=1.0.3=r41h06615bd_1
- r-farver=2.1.1=r41h7525677_1
- r-fastmap=1.1.0=r41h7525677_1
- r-formattable=0.2.1=r41ha770c72_1
- r-fs=1.5.2=r41h7525677_2
- r-gdtools=0.2.4=r41h287fb7f_1
- r-generics=0.1.3=r41hc72bb7e_1
- r-ggdist=3.2.1=r41hc72bb7e_0
- r-ggfun=0.0.7=r41hc72bb7e_0
- r-ggplot2=3.4.1=r41hc72bb7e_0
- r-ggplotify=0.1.0=r41hc72bb7e_0
- r-glue=1.6.2=r41h06615bd_1
- r-gridextra=2.3=r41hc72bb7e_1004
- r-gridgraphics=0.5_1=r41hc72bb7e_1
- r-gtable=0.3.1=r41hc72bb7e_1
- r-hdinterval=0.2.4=r41hc72bb7e_0
- r-hexbin=1.28.2=r41h8da6f51_1
- r-highr=0.9=r41hc72bb7e_1
- r-hms=1.1.2=r41hc72bb7e_1
- r-htmltools=0.5.3=r41h7525677_1
- r-htmlwidgets=1.5.4=r41hc72bb7e_1
- r-httr=1.4.4=r41hc72bb7e_1
- r-isoband=0.2.6=r41h7525677_1
- r-jquerylib=0.1.4=r41hc72bb7e_1
- r-jsonlite=1.8.2=r41h06615bd_1
- r-kableextra=1.3.4=r41hc72bb7e_1
- r-knitr=1.40=r41hc72bb7e_1
- r-labeling=0.4.2=r41hc72bb7e_2
- r-later=1.2.0=r41h7525677_1
- r-lattice=0.20_45=r41h06615bd_1
- r-lazyeval=0.2.2=r41h06615bd_3
- r-lifecycle=1.0.3=r41hc72bb7e_1
- r-magrittr=2.0.3=r41h06615bd_1
- r-mass=7.3_58.1=r41h06615bd_1
- r-matrix=1.4_1=r41h5f7b363_1
- r-memoise=2.0.1=r41hc72bb7e_1
- r-mgcv=1.8_40=r41h0154571_0
- r-mime=0.12=r41h06615bd_1
- r-munsell=0.5.0=r41hc72bb7e_1005
- r-nlme=3.1_159=r41h8da6f51_1
- r-numderiv=2016.8_1.1=r41hc72bb7e_4
- r-openssl=2.0.3=r41h1f3e0c5_1
- r-patchwork=1.1.2=r41hc72bb7e_0
- r-pillar=1.8.1=r41hc72bb7e_1
- r-pkgconfig=2.0.3=r41hc72bb7e_2
- r-pkgload=1.3.0=r41hc72bb7e_1
- r-plotly=4.10.0=r41hc72bb7e_0
- r-praise=1.0.0=r41hc72bb7e_1006
- r-prettyunits=1.1.1=r41hc72bb7e_2
- r-processx=3.7.0=r41h06615bd_1
- r-progress=1.2.2=r41hc72bb7e_3
- r-promises=1.2.0.1=r41h7525677_1
- r-ps=1.7.1=r41h06615bd_1
- r-purrr=0.3.5=r41h06615bd_1
- r-quadprog=1.5_8=r41hd009a43_4
- r-r6=2.5.1=r41hc72bb7e_1
- r-rappdirs=0.3.3=r41h06615bd_1
- r-rcolorbrewer=1.1_3=r41h785f33e_1
- r-rcpp=1.0.9=r41h7525677_2
- r-readr=2.1.2=r41h03ef668_0
- r-rematch2=2.1.2=r41hc72bb7e_2
- r-rlang=1.0.6=r41h7525677_1
- r-rmarkdown=2.17=r41hc72bb7e_1
- r-rprojroot=2.0.3=r41hc72bb7e_1
- r-rstudioapi=0.14=r41hc72bb7e_1
- r-rvest=1.0.3=r41hc72bb7e_1
- r-sass=0.4.2=r41h7525677_1
- r-scales=1.2.1=r41hc72bb7e_1
- r-selectr=0.4_2=r41hc72bb7e_2
- r-stringi=1.7.6=r41h337692f_1
- r-stringr=1.4.0=r41hc72bb7e_2
- r-svglite=2.1.0=r41h7525677_1
- r-sys=3.4=r41h06615bd_1
- r-systemfonts=1.0.4=r41h0ff29ef_1
- r-testthat=3.1.5=r41h7525677_1
- r-tibble=3.1.8=r41h06615bd_1
- r-tidyr=1.2.1=r41h7525677_1
- r-tidyselect=1.1.2=r41hc72bb7e_1
- r-tidytree=0.4.1=r41hc72bb7e_0
- r-tinytex=0.42=r41hc72bb7e_1
- r-tzdb=0.3.0=r41h7525677_1
- r-utf8=1.2.2=r41h06615bd_1
- r-vctrs=0.5.2=r41h38f115c_0
- r-viridis=0.6.2=r41hc72bb7e_0
- r-viridislite=0.4.1=r41hc72bb7e_1
- r-vroom=1.6.0=r41h7525677_1
- r-waldo=0.4.0=r41hc72bb7e_1
- r-webshot=0.5.4=r41hc72bb7e_1
- r-withr=2.5.0=r41hc72bb7e_1
- r-xfun=0.33=r41h7525677_1
- r-xml2=1.3.3=r41h03ef668_0
- r-yaml=2.3.5=r41h06615bd_1
- r-yulab.utils=0.0.5=r41hc72bb7e_1
- readline=8.1=h46c0cb4_0
- sed=4.8=he412f7d_0
- sysroot_linux-64=2.12=he073ed8_15
- tk=8.6.12=h27826a3_0
- tktable=2.10=hb7b940f_3
- xorg-kbproto=1.0.7=h7f98852_1002
- xorg-libice=1.0.10=h7f98852_0
- xorg-libsm=1.2.3=hd9c2040_1000
- xorg-libx11=1.7.2=h7f98852_0
- xorg-libxau=1.0.9=h7f98852_0
- xorg-libxdmcp=1.1.3=h7f98852_0
- xorg-libxext=1.3.4=h7f98852_1
- xorg-libxrender=0.9.10=h7f98852_1003
- xorg-libxt=1.2.1=h7f98852_2
- xorg-renderproto=0.11.1=h7f98852_1002
- xorg-xextproto=7.3.0=h7f98852_1002
- xorg-xproto=7.0.31=h7f98852_1007
- xz=5.2.6=h166bdaf_0
- zlib=1.2.12=h166bdaf_4
- zstd=1.5.2=h6239696_4
4 changes: 1 addition & 3 deletions bin/gen_report.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
args <- commandArgs(trailingOnly = TRUE)
workflow <- args[1]
genome_size <- args[2]
species_name <- args[3]

# Generate rmarkdown report for the relevant track
if (workflow == "draft") {
Expand All @@ -13,8 +12,7 @@ if (workflow == "draft") {
quast_report = "transposed_report.tsv",
kraken_report = "kraken_reports.txt",
coverage_report = "coverage_reports.txt",
genome_size_val = genome_size,
species_name = species_name
genome_size_val = genome_size
)
)
}
6 changes: 4 additions & 2 deletions bin/generate_input.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
args <- commandArgs(trailingOnly = TRUE)

option <- args[1]
r1 <- args[4]
r2 <- args[5]

if (option == "illumina") {
path <- args[2]
Expand All @@ -19,8 +21,8 @@ if (option == "illumina") {

filenames <- unique(sub(pattern, "", files))
df <- data.frame(sample = filenames)
forward <- grep("_R1_", files_path, value = TRUE)
reverse <- grep("_R2_", files_path, value = TRUE)
forward <- grep(r1, files_path, value = TRUE)
reverse <- grep(r2, files_path, value = TRUE)

df$R1 <- forward
df$R2 <- reverse
Expand Down
2 changes: 0 additions & 2 deletions bin/report_draft_assembly.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ params:
value: x
genome_size_val:
value: x
species_name:
value: x
---

```{r, message=FALSE, warning=FALSE}
Expand Down
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ process {
]
}

withName: 'UNICYCLER' {
withName: 'UNICYCLER|UNICYCLER_HYBRID' {
ext.args = {
[ params.unicycler_args ? "${params.unicycler_args}" : '',
"--verbosity 2",
Expand Down
Binary file added img/assemblage_draft_track.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 4 additions & 4 deletions modules/KRAKEN.nf
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
process KRAKEN {
conda (params.enable_conda ? 'bioconda::kraken2=2.1.2' : null)
container 'quay.io/biocontainers/kraken2:2.1.2--pl5321h4ac6f70_4'
conda (params.enable_conda ? 'bioconda::kraken2=2.1.3' : null)
container 'quay.io/biocontainers/kraken2:2.1.3--pl5321hdcf5f25_1'

label 'process_high_memory'

input:
tuple val(datasetID), path(R1), path(R2)
tuple val(datasetID), path(R1), path(R2), path(db)

output:
file("*")
path "*kr2.report", emit: report_ch

script:
"""
kraken2 --db $params.kraken_db --paired $R1 $R2 --threads $task.cpus --output ${datasetID}.kr2.out --report ${datasetID}.kr2.report --use-names
kraken2 --db $db --paired $R1 $R2 --threads $task.cpus --output ${datasetID}.kr2.out --report ${datasetID}.kr2.report --use-names
"""
}
2 changes: 2 additions & 0 deletions modules/MERGE.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ process MERGE_COV_REPORTS {
conda (params.enable_conda ? './assets/r_env.yml' : null)
container 'evezeyl/r_assemblage:latest'

label 'process_high_memory'

input:
path(reports)

Expand Down
2 changes: 1 addition & 1 deletion modules/REPORT.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,6 @@ process REPORT_DRAFT {
script:
"""
cp $baseDir/bin/report_draft_assembly.Rmd .
Rscript $baseDir/bin/gen_report.R "draft" $params.genome_size $params.species_name
Rscript $baseDir/bin/gen_report.R "draft" $params.genome_size
"""
}
11 changes: 9 additions & 2 deletions workflows/DRAFT_ASSEMBLY.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,20 @@ workflow DRAFT_ASSEMBLY {
Channel
.fromPath(params.input, checkIfExists: true)
.splitCsv(header:true, sep:",")
.map { tuple(it.id, file(it.R1, checkIfExists: true), file(it.R2, checkIfExists: true)) }
.map { tuple(it.sample, file(it.R1, checkIfExists: true), file(it.R2, checkIfExists: true)) }
.set { input_ch }

Channel
.fromPath(params.kraken_db, checkIfExists: true)
.set { db_ch }

input_ch.combine(db_ch)
.set { kraken2_input_ch }

// QC
FASTQC(input_ch)
MULTIQC_PRE(FASTQC.out.fastqc_reports.collect())
KRAKEN(input_ch)
KRAKEN(kraken2_input_ch)
MERGE_KRAKEN_REPORTS(KRAKEN.out.report_ch.collect())
TRIM(input_ch)
RASUSA(TRIM.out.trim_reads)
Expand Down

0 comments on commit d7d7e19

Please sign in to comment.