Skip to content

Commit

Permalink
Merge pull request #12 from NorwegianVeterinaryInstitute/dev
Browse files Browse the repository at this point in the history
Finish draft assembly track
  • Loading branch information
hkaspersen authored Apr 24, 2024
2 parents cf52ada + c8dbd4d commit 6fb4c3c
Show file tree
Hide file tree
Showing 24 changed files with 731 additions and 146 deletions.
20 changes: 20 additions & 0 deletions bin/gen_report.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/usr/bin/env Rscript

args <- commandArgs(trailingOnly = TRUE)
workflow <- args[1]
genome_size <- args[2]
species_name <- args[3]

# Generate rmarkdown report for the relevant track
if (workflow == "draft") {
rmarkdown::render(
input = 'report_draft_assembly.Rmd',
params = list(
quast_report = "transposed_report.tsv",
kraken_report = "kraken_reports.txt",
coverage_report = "coverage_reports.txt",
genome_size_val = genome_size,
species_name = species_name
)
)
}
89 changes: 89 additions & 0 deletions bin/generate_input.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/usr/bin/env Rscript

args <- commandArgs(trailingOnly = TRUE)

option <- args[1]

if (option == "illumina") {
path <- args[2]
pattern <- args[3]

if (grepl("/$", path) == FALSE) {
path <- paste0(path, "/")
}

files <- list.files(path = path,
pattern = pattern)

files_path <- paste0(path, files)

filenames <- unique(sub(pattern, "", files))
df <- data.frame(sample = filenames)
forward <- grep("_R1_", files_path, value = TRUE)
reverse <- grep("_R2_", files_path, value = TRUE)

df$R1 <- forward
df$R2 <- reverse

write.table(
x = df,
file = "samplesheet.csv",
quote = FALSE,
row.names = FALSE,
sep = ","
)

} else if (option == "hybrid") {
path1 <- args[2]
path2 <- args[3]
pattern <- args[4]

if (grepl("/$", path1) == FALSE) {
path1 <- paste0(path1, "/")
}

if (grepl("/$", path2) == FALSE) {
path2 <- paste0(path2, "/")
}

files <- list.files(path = path1,
pattern = pattern)

np_files <- list.files(path = path2,
pattern = ".fastq.gz")

filenames <- sub(pattern, "", files)
filenames_np <- sub(".fastq.gz", "", np_files)

df <- data.frame(sample = filenames)
df$path <- paste0(path1, files)
df$read <- ifelse(grepl("_R1.fastq.gz", df$path), "R1", "R2")

df_np <- data.frame(sample = filenames_np,
NP = paste0(path2, np_files))

il_samplesheet <- reshape(
data = df,
direction = "wide",
v.names = "path",
idvar = "sample",
timevar = "read"
)

samplesheet <- merge(
x = il_samplesheet,
y = df_np,
by = "sample",
all = TRUE
)

names(samplesheet) <- c("sample","R1","R2","NP")

write.table(
x = samplesheet,
file = "samplesheet.csv",
quote = FALSE,
row.names = FALSE,
sep = ","
)
}
59 changes: 59 additions & 0 deletions bin/merge_cov_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env Rscript

library(impoRt)
library(dplyr)
library(formattable)
library(sparkline)

# Import and merge coverage reports
func_paste <- function(x, collapse = ",") paste(unique(x[!is.na(x)]), collapse = collapse)

calc_cov <- function(df) {
df %>%
rename("reference" = X1,
"pos" = X2,
"reads" = X3) %>%
mutate(reads_no_unmapped = ifelse(reads == 0, NA, reads),
test = ifelse(reads == 0, 0, 1)) %>%
group_by(reference) %>%
summarise(reference = unique(reference),
cov_perc = round(sum(test)/max(pos)*100, 2),
mean_reads_per_base = round(mean(reads_no_unmapped, na.rm = TRUE), 2),
median_reads_per_base = median(reads_no_unmapped, na.rm = TRUE),
sd_reads_per_base = round(sd(reads_no_unmapped, na.rm = TRUE), 2)) %>%
ungroup() %>%
mutate(mean_cov = round(mean(mean_reads_per_base),2),
median_cov = median(median_reads_per_base),
mean_perc_cov = round(mean(cov_perc),2),
Coverage = spk_chr(mean_reads_per_base, width = 180, height = 25))
}

coverage_reports <- get_data(
".",
pattern = "genomecov.txt",
col_names = FALSE,
df = FALSE
) %>%
lapply(., calc_cov) %>%
bind_rows(.id = "sample") %>%
group_by(sample) %>%
summarise_all(list(func_paste)) %>%
mutate(sample = sub("_genomecov.txt", "", sample),
`Mean coverage` = color_bar(color = "#d9d9d9")(mean_cov),
`Median coverage` = color_bar(color = "#ccebc5")(median_cov),
`Mean percent coverage` = color_bar(color = "#fccde5")(mean_perc_cov)) %>%
select(sample,
`Mean coverage`,
`Median coverage`,
`Mean percent coverage`,
Coverage) %>%
rename("Assembly" = sample) %>%
mutate(Coverage = gsub("\n", "", Coverage))

write.table(
coverage_reports,
"coverage_reports.txt",
sep = "\t",
quote = FALSE,
row.names = FALSE
)
28 changes: 28 additions & 0 deletions bin/merge_kraken_data.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/usr/bin/env Rscript

library(impoRt)
library(dplyr)

# Import and merge kraken2 reports
kraken_reports <- get_data(
".",
pattern = "report",
col_names = FALSE,
convert = TRUE
) %>%
rename("perc_fragments" = X1,
"num_fragments_root" = X2,
"num_fragments_direct" = X3,
"rank_code" = X4,
"NCBI_taxon" = X5,
"scientific_name" = X6) %>%
mutate_all(~trimws(., which = "both")) %>%
mutate(ref = sub("\\.kr2\\.report", "", ref))

write.table(
kraken_reports,
"kraken_reports.txt",
sep = "\t",
quote = FALSE,
row.names = FALSE
)
Loading

0 comments on commit 6fb4c3c

Please sign in to comment.