Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plot ragtag paf #136

Merged
merged 17 commits into from
Sep 25, 2024
12 changes: 12 additions & 0 deletions tools/plot_ragtag_paf/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
categories:
- Assembly
- Visualization
description: Plot RagTag output to compare query contigs against the reference
exclude:
- tool_test_output.html
- tool_test_output.json
homepage_url: http://github.com/usegalaxy-au/tools-au
long_description: Plot RagTag output to compare query contigs against the reference
name: plot_ragtag_paf
owner: galaxy-australia
remote_repository_url: https://github.com/usegalaxy-au/tools-au
257 changes: 257 additions & 0 deletions tools/plot_ragtag_paf/plot_ragtag_paf.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
#!/usr/bin/env Rscript

options(
show.error.messages = FALSE,
error = function() {
cat(geterrmessage(), file = stderr())
q("no", 1, FALSE)
}
)

library(data.table)
library(ggplot2)
library(pafr)
library(viridisLite)
library(yaml)

args <- commandArgs(trailingOnly = TRUE)

#############
# FUNCTIONS #
#############


# Adjust the PAF coordinates to the continuous x-axis
adjust_coords <- function(qname, qstart, qend, tname, tstart, tend) {
my_qstart <- lookup_qstart(qname)
my_tstart <- lookup_tstart(tname)
return(
data.table(
adj_qstart = qstart + my_qstart,
adj_qend = qend + my_qstart,
adj_tstart = tstart + my_tstart,
adj_tend = tend + my_tstart
)
)
}

# calculate spacing between contigs so the ref and query take up the same x-axis
# space
get_padding <- function(paf) {
tlen_sum <- unique(paf, by = "tname")[, sum(tlen)]
tgaps_total <- paf[, length(unique(tname))]

qlen_sum <- unique(paf, by = "qname")[, sum(qlen)]
qgaps_total <- paf[, length(unique(qname))]

# the minumum gap is going to be one tenth of the x-axis space
if (tlen_sum > qlen_sum) {
min_gap <- (tlen_sum * gap_size) / tgaps_total
full_tlen <- tlen_sum + (tgaps_total * min_gap)

total_qpadding <- full_tlen - qlen_sum
return(
c(
"t_padding" = min_gap,
"q_padding" = total_qpadding / qgaps_total
)
)
} else if (tlen_sum < qlen_sum) {
min_gap <- (qlen_sum * gap_size) / qgaps_total
full_qlen <- qlen_sum + (qgaps_total * min_gap)

total_tpadding <- full_qlen - tlen_sum

return(
c(
"t_padding" = total_tpadding / tgaps_total,
"q_padding" = min_gap
)
)
} else {
min_gap <- (tlen_sum * gap_size) / tgaps_total
return(
c(
"t_padding" = min_gap,
"q_padding" = min_gap
)
)
}
}

# offsets for the adjusted coordinates
lookup_tstart <- function(x) {
return(unique(tstarts[tname == x, shift_tstart]))
}


lookup_qstart <- function(x) {
return(unique(qstarts[qname == x, shift_qstart]))
}


###########
# GLOBALS #
###########

# PAF column spec
sort_columns <- c("tname", "tstart", "tend", "qname", "qstart", "qend")

config_file <- args[1]
config <- yaml.load_file(config_file)
typed_config <- lapply(config, type.convert, as.is = TRUE)
list2env(typed_config, envir = .GlobalEnv)

########
# MAIN #
########

# read the data
agp <- fread(agp_file, fill = TRUE, skip = 2)[!V5 %in% c("N", "U")]
raw_paf <- read_paf(paf_file)
paf_dt <- data.table(raw_paf)

# calculate spacing
padding <- get_padding(paf_dt[tp == "P" & nmatch >= min_nmatch])

# order the reference contigs
paf_dt[, tname := factor(tname, levels = gtools::mixedsort(unique(tname)))]
setkeyv(paf_dt, cols = sort_columns)

# generate continuous reference coordinates
tpaf <- unique(paf_dt, by = "tname")
tpaf[, pad_tstart := shift(cumsum(tlen + padding[["t_padding"]]), 1, 0)]
tpaf[, shift_tstart := pad_tstart + (padding[["t_padding"]] / 2)]
tpaf[, pad_tend := shift_tstart + tlen]

# order the query contigs by their position in the AGP file
query_order <- agp[, V6]

query_paf <- paf_dt[qname %in% query_order & tp == "P" & nmatch >= min_nmatch]
subset_query_order <- query_order[query_order %in% query_paf[, qname]]

# Map query contigs onto a universal x-scale
query_paf[, qname := factor(qname, levels = subset_query_order)]
setkeyv(query_paf, cols = sort_columns)

qpaf <- unique(query_paf, by = "qname")
qpaf[, pad_qstart := shift(cumsum(qlen + padding[["q_padding"]]), 1, 0)]
qpaf[, shift_qstart := pad_qstart + (padding[["q_padding"]] / 2)]
qpaf[, pad_qend := shift_qstart + qlen]

# generate offsets for the alignment records
tstarts <- unique(tpaf[, .(tname, shift_tstart)])
qstarts <- unique(qpaf[, .(qname, shift_qstart)])

# adjust the alignment coordinates
paf_dt[,
c(
"adj_qstart",
"adj_qend",
"adj_tstart",
"adj_tend"
) := adjust_coords(qname, qstart, qend, tname, tstart, tend),
by = .(qname, qstart, qend, tname, tstart, tend)
]

# generate polygons. P is for primary alignments only
polygon_y_bump <- 0.017 # account for contig thickness
paf_polygons <- paf_dt[
tp == "P" & nmatch >= min_nmatch,
.(
x = c(adj_tstart, adj_qstart, adj_qend, adj_tend),
y = c(
t_y + polygon_y_bump,
q_y - polygon_y_bump,
q_y - polygon_y_bump,
t_y + polygon_y_bump
),
id = paste0("polygon", .GRP)
),
by = .(qname, qstart, qend, tname, tstart, tend)
]

# set up plot
total_height <- (q_y - t_y) * 1.618
y_axis_space <- (total_height - (q_y - t_y)) / 2
middle_x <- tpaf[1, shift_tstart] + tpaf[.N, pad_tend] / 2



all_contig_names <- c(tpaf[, unique(tname)])
all_colours <- viridis(
length(all_contig_names) + palette_space + 1
)
names(all_colours) <- c(
"query",
rep("blank", palette_space),
all_contig_names
)

# Plot the ideogram with ribbons connecting the two sets of contigs
gp <- ggplot() +
theme_void(base_family = "Lato", base_size = 12) +
scale_fill_manual(
values = all_colours, guide = "none"
) +
scale_colour_manual(
values = all_colours, guide = "none"
) +
geom_polygon(
data = paf_polygons,
aes(
x = x, y = y, group = id, fill = tname
), alpha = 0.5
) +
geom_segment(
data = tpaf,
aes(
x = shift_tstart,
xend = pad_tend,
colour = tname
),
y = t_y,
linewidth = 5,
lineend = "butt"
) +
geom_segment(
data = qpaf,
aes(
x = shift_qstart,
xend = pad_qend
),
colour = all_colours[["query"]],
y = q_y,
linewidth = 5,
lineend = "butt"
) +
ylim(
t_y - y_axis_space,
q_y + y_axis_space
) +
annotate(
geom = "text",
label = "Query contigs",
x = middle_x,
y = q_y + (y_axis_space / 3),
hjust = 0.5,
vjust = 0.5
) +
annotate(
geom = "text",
label = "Reference contigs",
x = middle_x,
y = t_y - (y_axis_space / 3),
hjust = 0.5,
vjust = 0.5
)

ggsave(plot_file,
gp,
width = plot_width,
height = plot_height,
units = "mm",
device = cairo_pdf
)

sessionInfo()
78 changes: 78 additions & 0 deletions tools/plot_ragtag_paf/plot_ragtag_paf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
<tool id="plot_ragtag_paf" name="Plot RagTag output" version="0.0.1">
<description>to compare query contigs to the reference</description>
<requirements>
<requirement type="package" version="0.0.2">r-pafr</requirement>
<requirement type="package" version="0.4.2">r-viridislite</requirement>
<requirement type="package" version="1.15.4">r-data.table</requirement>
<requirement type="package" version="2.3.10">r-yaml</requirement>
<requirement type="package" version="3.5.1">r-ggplot2</requirement>
<requirement type="package" version="3.9.5">r-gtools</requirement>
</requirements>
<command detect_errors="exit_code"><![CDATA[
Rscript
'${__tool_directory__}/plot_ragtag_paf.R'
'${plot_config}'
]]></command>
<configfiles>
<configfile name="plot_config">
agp_file: '${input_agp}'
paf_file: '${input_paf}'
plot_file: 'ragtag.pdf'
fontsize: '${plot_params.fontsize}'
gap_size: '${plot_params.gap_size}'
min_nmatch: '${min_nmatch}'
palette_space: '${plot_params.palette_space}'
plot_height: '${plot_params.plot_height}'
plot_width: '${plot_params.plot_width}'
q_y: '${plot_params.q_y}'
t_y: '${plot_params.t_y}'
</configfile>
</configfiles>
<inputs>
<param format="paf" name="input_paf" label="PAF output from RagTag" type="data"/>
<param format="agp" name="input_agp" label="AGP output from RagTag" type="data"/>
<param type="integer" name="min_nmatch" label="Minimum alignment length to include in plot" value="20000"/>
<section name="plot_params" title="Plot parameters">
<param type="integer" name="plot_width" label="Plot width (mm)" value="254"/>
<param type="integer" name="plot_height" label="Plot height (mm)" value="191"/>
<param type="integer" name="fontsize" label="Fontsize (pt)" value="12"/>
<param type="integer" name="palette_space" label="Number of unused colours in the colour palette between the query contig colour and the colour of the first reference contig." help="Try a higher value if the query contigs look too similar to the reference contigs." value="4"/>
<param type="float" min="0" max="1" name="gap_size" label="Total length of gaps between contigs relative to the total assembly length." help="Try a higher value if the contigs look too close together." value="0.1"/>
<param type="integer" name="t_y" label="Position of reference contigs on the y-axis" value="1"/>
<param type="integer" name="q_y" label="Position of query contigs on the y-axis" value="2"/>
</section>
</inputs>
<outputs>
<data name="plot" format="pdf" label="${tool.name} on ${on_string}" from_work_dir="ragtag.pdf"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="input_paf" ftype="paf.gz" value="ragtag.paf.gz"/>
<param name="input_agp" ftype="agp" value="ragtag.agp"/>
<output name="plot">
<assert_contents>
<has_size size="8758" delta="1000" />
</assert_contents>
</output>
</test>
</tests>
<help>
Accepts the PAF and AGP produced by the RagTag Scaffold and draws a plot
showing the alignment of the query contigs to the reference contigs.

.. figure:: $PATH_TO_IMAGES/ragtag.png
:alt: RagTag plot

</help>
<citations>
<citation type="bibtex">
@misc{plot_ragtag_paf,
title = {plot_ragtag_paf},
author = {Harrop, Tom},
year = {2024},
organization = {Galaxy Australia},
url = {https://github.com/usegalaxy-au/tools-au},
}
</citation>
</citations>
</tool>
Binary file added tools/plot_ragtag_paf/static/images/ragtag.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading