Skip to content

Commit

Permalink
updates
Browse files Browse the repository at this point in the history
  • Loading branch information
maxfieldk committed Oct 13, 2024
1 parent fae33c8 commit 14e0c99
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 32 deletions.
2 changes: 1 addition & 1 deletion workflow/scripts/generate_colors_to_source.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ sample_table <- read_csv(conf[["sample_table"]])
sample_table <- sample_table[match(conf$samples, sample_table$sample_name), ]

# these are optionally used depending on values set in config.yaml
custom_descriptive <- c("#4b9b7a", "#ca6728", "#716dab", "#d43f88", "#0068f9", "#ffe100", "#7B3A96FF", "#CB2027FF")
custom_descriptive <- c("#4b9b7a", "#ca6728", "#716dab", "#d43f88", "#0068f9", "#ffe100", "#7B3A96FF", "#CB2027FF", "#808180FF", "#1B1919FF")
custom_condition <- c("#3B4992FF", "#EE0000FF", "#008B45FF", "#631879FF", "#008280FF", "#BB0021FF", "#5F559BFF", "#A20056FF", "#808180FF", "#1B1919FF")

tryCatch(
Expand Down
37 changes: 35 additions & 2 deletions workflow/srna/scripts/characterize_samples.R
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@ library(rtracklayer)
library(ComplexUpset)
library(patchwork)
library(scales)


# library(ggrastr)


Expand Down Expand Up @@ -199,6 +197,23 @@ p <- pf %>%
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
mysaveandstore(sprintf("%s/multiple_groups/gene_or_repeat_type.pdf", outputdir), w = 1 + 1.5 * length(conf$samples) / 2.4, h = 4)

p_rep_only <- pf %>%
mutate(Class = factor(Class, levels = c("Gene", "LowComp", "SimpleRep", "Retroposon", "SAT", "DNA", "LTR", "LINE", "SINE", "Other"))) %>%
group_by(sample, Class) %>%
summarize(tpm = sum(tpm)) %>%
ungroup() %>%
filter(Class != "Gene") %>%
mutate(sample = factor(sample, levels = conf$samples)) %>%
mutate(sample = factor(sample, levels = conf$samples)) %>%
ggbarplot(x = "sample", y = "tpm", fill = "Class") +
xlab("") +
ylab("TPM") +
scale_palette +
mtclosed +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
mysaveandstore(pl = p_rep_only, sprintf("%s/multiple_groups/repeat_only_type.pdf", outputdir), w = 1 + 1.5 * length(conf$samples) / 2.4, h = 4)


refseq <- import(params$annotation_genes)
refseqdf <- refseq %>%
as.data.frame() %>%
Expand Down Expand Up @@ -231,7 +246,25 @@ p <- pff %>%
mtclosed +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
mysaveandstore(sprintf("%s/multiple_groups/gene_biotype_or_repeat.pdf", outputdir), w = 1 + 1.5 * length(conf$samples) / 2.4, h = 4)
p_gene_only <- pff %>%
mutate(Class = factor(Class, levels = biotype_levels)) %>%
group_by(sample, Class) %>%
summarize(tpm = sum(tpm)) %>%
ungroup() %>%
filter(Class != "repeat") %>%
mutate(sample = factor(sample, levels = conf$samples)) %>%
mutate(sample = factor(sample, levels = conf$samples)) %>%
ggbarplot(x = "sample", y = "tpm", fill = "Class") +
xlab("") +
ylab("TPM") +
scale_palette +
mtclosed +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
mysaveandstore(pl = p_gene_only, sprintf("%s/multiple_groups/gene_biotype_only.pdf", outputdir), w = 1 + 1.5 * length(conf$samples) / 2.4, h = 4)


ptch <- p_gene_only + p_rep_only + plot_layout(nrow = 2)
mysaveandstore(pl = ptch, sprintf("%s/multiple_groups/gene_repeat_split.pdf", outputdir), w = 1 + 1.5 * length(conf$samples) / 2.4, h = 8)

x <- tibble(OUT = "")
write_tsv(x, file = outputs$environment)
18 changes: 17 additions & 1 deletion workflow/srna/scripts/ea_repeats.R
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,6 @@ for (contrast in params[["contrasts"]]) {
gse_df <<- rbind(gse_df, df)
}

genesettheme <- theme_gray() + theme(axis.text.y = element_text(colour = "black"))
# p <- dotplot(gse, showCategory = 20) + ggtitle(paste("GSEA", contrast, sep = " ")) + genesettheme + mtopen
# mysaveandstore(sprintf("%s/%s/%s/gsea/%s/%s/dotplot.pdf", params[["outputdir"]], counttype, contrast, ontology, filter_var), w = 3, h = 4, res = 300)

Expand Down Expand Up @@ -246,6 +245,23 @@ for (test_type in c("std", "pos", "neg")) {
arrange(p.adjust) %>%
slice_head(n = 5) %$% ID %>%
unique()

# human_subfam_ordering <- c(
# "L1HS", "L1PA2", "L1PA3", "L1PA4", "L1PA5", "L1PA6",
# "AluY", "HERVK_LTR", "HERVK_INT", "HERVL_LTR", "HERVL_INT",
# "SVA_A", "SVA_B", "SVA_C", "SVA_D", "SVA_E", "SVA_F"
# )
# other_ordering <- resultsdf %>%
# filter(rte_subfamily != "Other") %>%
# group_by(rte_family, rte_subfamily) %>%
# summarise(family_av_pctdiv = mean(family_av_pctdiv)) %>%
# mutate(rte_family = factor(rte_family, levels = c("L1", "Alu", "ERV", "SVA"))) %>%
# arrange(rte_family, family_av_pctdiv) %$% rte_subfamily
# if (confALL$aref$species != "human") {
# subfam_ordering <<- other_ordering
# } else {
# subfam_ordering <<- human_subfam_ordering
# }
p <- grestemp %>%
dplyr::filter(ID %in% sigIDs) %>%
mutate(sig = ifelse(p.adjust < 0.05, "*", "")) %>%
Expand Down
102 changes: 74 additions & 28 deletions workflow/srna/scripts/repeatanalysisPlots.R
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ for (ontology in c("rte_subfamily_limited", "l1_subfamily_limited", "rte_family"

queryIDs <- mcols(query)$gene_id
subjectIDs <- mcols(subject)$gene_id[hits]
subset$nearest_gene <- subjectIDs[queryIDs]
subset$nearest_gene <- subjectIDs


te_gene_matrix_list <- list()
Expand All @@ -517,24 +517,20 @@ for (ontology in c("rte_subfamily_limited", "l1_subfamily_limited", "rte_family"
contrast_samples <- sample_table %>%
filter(condition %in% c(contrast_level_1, contrast_level_2)) %>%
pull(sample_name)

resultsdfwithgenes[match(subjectIDs, resultsdfwithgenes$gene_id), ] %>%
pull(!!sym(contrast_log2FoldChange)) %>%
length()
te_gene_matrix <- subset %>%
dplyr::select(!!sym(ontology), !!sym(contrast_log2FoldChange), loc_integrative, req_integrative, rte_length_req) %>%
dplyr::select(gene_id, nearest_gene, !!sym(ontology), !!sym(contrast_log2FoldChange), loc_integrative, req_integrative, rte_length_req) %>%
dplyr::rename(TE = !!sym(contrast_log2FoldChange))
te_gene_matrix$GENE <- resultsdfwithgenes[match(subjectIDs, resultsdfwithgenes$gene_id), ] %>% pull(!!sym(contrast_log2FoldChange))

te_gene_matrix <- te_gene_matrix %>% drop_na()
te_gene_matrix_list[[contrast]] <- te_gene_matrix
cor.test(te_gene_matrix$TE, te_gene_matrix$GENE, method = "spearman", )$estimate
cor.test(te_gene_matrix$TE, te_gene_matrix$GENE, method = "spearman", )$p.value
te_gene_matrix <- te_gene_matrix %>% drop_na()
tryCatch(
{
cor_df <- te_gene_matrix %>%
mutate(req_integrative = gsub(".*Intact.*", "Full Length", req_integrative)) %>%
cor_df <<- te_gene_matrix %>%
mutate(req_integrative = gsub(".*Intact.*", "Yng FL", req_integrative)) %>%
group_by(!!sym(ontology), req_integrative, loc_integrative) %>%
mutate(groupN = n()) %>%
filter(groupN > 4) %>%
Expand All @@ -545,21 +541,20 @@ for (ontology in c("rte_subfamily_limited", "l1_subfamily_limited", "rte_family"
filter(loc_integrative != "Centromeric") %>%
complete(!!sym(ontology), req_integrative, loc_integrative)
p <- pf %>%
mutate(genicfacet = ifelse(loc_integrative == "Intergenic", "", "Genic")) %>%
mutate(loc_integrative = factor(loc_integrative, levels = c("NoncodingTxAdjacent", "NoncodingTx", "CodingTxAdjacent", "Intronic", "Exonic", "Intergenic"))) %>%
ggplot() +
geom_tile(aes(x = !!sym(ontology), y = loc_integrative, fill = cor)) +
facet_grid(genicfacet ~ req_integrative, space = "free", scales = "free") +
facet_grid(~req_integrative, space = "free", scales = "free") +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", breaks = c(-0.8, 0, 0.8), na.value = "dark grey") +
mtclosed +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "")
mysaveandstore(sprintf("%s/%s/%s/rte_gene_cor/rte_genic_cor_%s.pdf", outputdir, counttype, contrast, ontology), 6, 4)

p <- pf %>%
mutate(genicfacet = ifelse(loc_integrative == "Intergenic", "", "Genic")) %>%
mutate(loc_integrative = factor(loc_integrative, levels = c("NoncodingTxAdjacent", "NoncodingTx", "CodingTxAdjacent", "Intronic", "Exonic", "Intergenic"))) %>%
ggplot(aes(x = !!sym(ontology), y = loc_integrative)) +
geom_tile(aes(fill = cor)) +
facet_grid(genicfacet ~ req_integrative, space = "free", scales = "free") +
facet_grid(~req_integrative, space = "free", scales = "free") +
geom_text(aes(label = ifelse(pval < 0.05, "*", "")), size = 3) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", breaks = c(-0.8, 0, 0.8), na.value = "dark grey") +
mtclosed +
Expand All @@ -572,24 +567,29 @@ for (ontology in c("rte_subfamily_limited", "l1_subfamily_limited", "rte_family"
}
)
}

te_gene_matrix_all <- Reduce(bind_rows, te_gene_matrix_list)
base_level <- conf$levels[[1]]
# keep only contrasts vs base level to not artificially inflate number of independent comparisons
te_gene_matrix_all <- Reduce(
bind_rows,
te_gene_matrix_list[grepl(sprintf("_vs_%s", base_level), names(te_gene_matrix_list))]
)
print(te_gene_matrix_all, width = Inf)
cor_df <- te_gene_matrix_all %>%
group_by(!!sym(ontology), req_integrative, loc_integrative, rte_length_req) %>%
mutate(groupN = n()) %>%
filter(groupN > 4) %>%
summarise(cor = cor.test(TE, GENE, method = "spearman")$estimate, pval = cor.test(TE, GENE, method = "spearman")$p.value)
summarise(cor = cor.test(TE, GENE, method = "spearman")$estimate, pval = cor.test(TE, GENE, method = "spearman")$p.value) %>%
mutate(padj = p.adjust(pval, method = "fdr"))
cor_df %$% req_integrative %>% unique()
pf <- cor_df %>%
ungroup() %>%
filter(loc_integrative != "Centromeric") %>%
complete(!!sym(ontology), req_integrative, loc_integrative, rte_length_req)
complete(!!sym(ontology), req_integrative, loc_integrative)

p <- pf %>%
mutate(genicfacet = ifelse(loc_integrative == "Intergenic", "", "Genic")) %>%
ggplot() +
geom_tile(aes(x = !!sym(ontology), y = loc_integrative, fill = cor)) +
facet_grid(genicfacet ~ req_integrative, space = "free", scales = "free") +
facet_grid(~req_integrative, space = "free", scales = "free") +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", breaks = c(-0.8, 0, 0.8), na.value = "dark grey") +
mtclosed +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
Expand All @@ -598,37 +598,39 @@ for (ontology in c("rte_subfamily_limited", "l1_subfamily_limited", "rte_family"


p <- pf %>%
mutate(genicfacet = ifelse(loc_integrative == "Intergenic", "", "Genic")) %>%
ggplot(aes(x = !!sym(ontology), y = loc_integrative)) +
geom_tile(aes(fill = cor)) +
facet_grid(genicfacet ~ req_integrative, space = "free", scales = "free") +
geom_text(aes(label = ifelse(pval < 0.05, "*", "")), size = 3) +
facet_grid(~ req_integrative, space = "free", scales = "free") +
geom_text(aes(label = ifelse(padj < 0.05, "*", "")), size = 3) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", breaks = c(-0.8, 0, 0.8), na.value = "dark grey") +
mtclosed +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "")
mysaveandstore(sprintf("%s/%s/%s/rte_gene_cor/rte_genic_cor_pval_%s.pdf", outputdir, counttype, "pan_contrast", ontology), 10, 6)

pf %>% filter(rte_subfamily_limited == "AluY") %>% filter(req_integrative == "Old FL")

cor_df <- te_gene_matrix_all %>%
group_by(!!sym(ontology), loc_integrative, rte_length_req) %>%
mutate(groupN = n()) %>%
filter(groupN > 4) %>%
summarise(cor = cor.test(TE, GENE, method = "spearman")$estimate, pval = cor.test(TE, GENE, method = "spearman")$p.value)
summarise(cor = cor.test(TE, GENE, method = "spearman")$estimate, pval = cor.test(TE, GENE, method = "spearman")$p.value) %>%
mutate(padj = p.adjust(pval, method = "fdr"))

pf <- cor_df %>%
ungroup() %>%
filter(loc_integrative != "Centromeric") %>%
complete(!!sym(ontology), loc_integrative, rte_length_req)
p <- pf %>%
mutate(genicfacet = ifelse(loc_integrative == "Intergenic", "", "Genic")) %>%
mutate(loc_integrative = factor(loc_integrative, levels = c("NoncodingTxAdjacent", "NoncodingTx", "CodingTxAdjacent", "Intronic", "Exonic", "Intergenic"))) %>%
ggplot(aes(x = !!sym(ontology), y = loc_integrative)) +
geom_tile(aes(fill = cor)) +
facet_grid(genicfacet ~ rte_length_req, space = "free", scales = "free") +
geom_text(aes(label = ifelse(pval < 0.05, "*", "")), size = 3) +
facet_grid(~rte_length_req, space = "free", scales = "free") +
geom_text(aes(label = ifelse(padj < 0.05, "*", "")), size = 3) +
scale_fill_gradient2(low = "blue", mid = "white", high = "red", breaks = c(-0.5, 0, 0.5), na.value = "dark grey") +
mtclosed +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "", y = "")
mysaveandstore(sprintf("%s/%s/%s/rte_gene_cor/rte_genic_cor_length_req_pval_%s.pdf", outputdir, counttype, "pan_contrast", ontology), 6.5, 5)
mysaveandstore(sprintf("%s/%s/%s/rte_gene_cor/rte_genic_cor_length_req_pval1_%s.pdf", outputdir, counttype, "pan_contrast", ontology), 5, 4)
}


Expand Down Expand Up @@ -1156,6 +1158,50 @@ for (group_var in c("repeat_superfamily", "rte_subfamily", "rte_family", "l1_sub

p <- wrap_elements(grid.grabExpr(draw(hm, heatmap_legend_side = "right", annotation_legend_side = "right")))
mysaveandstore(sprintf("%s/%s/pan_contrast/heatmap/%s_heatmap.pdf", outputdir, counttype, group_var), 0.5 + length(colnames(m)) * 0.35, 1.75 + length(rownames(m)) * 0.25)

human_subfam_ordering <- c(
"L1HS", "L1PA2", "L1PA3", "L1PA4", "L1PA5", "L1PA6",
"AluY", "HERVK_LTR", "HERVK_INT", "HERVL_LTR", "HERVL_INT",
"SVA_A", "SVA_B", "SVA_C", "SVA_D", "SVA_E", "SVA_F"
)
other_ordering <- resultsdfwithgenes %>%
filter(rte_subfamily != "Other") %>%
group_by(rte_family, rte_subfamily) %>%
summarise(family_av_pctdiv = mean(family_av_pctdiv)) %>%
mutate(rte_family = factor(rte_family, levels = c("L1", "Alu", "ERV", "SVA"))) %>%
arrange(rte_family, family_av_pctdiv) %$% rte_subfamily
if (confALL$aref$species != "human") {
subfam_ordering <<- other_ordering
} else {
subfam_ordering <<- human_subfam_ordering
}

m <- stat_frame %>%
dplyr::select(sample, !!sym(group_var), l2fc) %>%
pivot_wider(names_from = sample, values_from = l2fc) %>%
mutate(rte_subfamily = factor(rte_subfamily, levels = subfam_ordering)) %>%
arrange(rte_subfamily) %>%
column_to_rownames(group_var) %>%
as.matrix()
m <- m[!rowSums(is.na(m)), ]
library(patchwork)
hm <- m %>%
Heatmap(
name = "log2FC",
cluster_rows = FALSE,
cluster_columns = FALSE,
show_row_names = TRUE,
show_parent_dend_line = FALSE,
show_row_dend = FALSE,
show_column_names = TRUE,
column_names_rot = 90,
row_title = NULL,
cluster_row_slices = FALSE,
border_gp = gpar(col = "black")
)
p <- wrap_elements(grid.grabExpr(draw(hm, heatmap_legend_side = "right", annotation_legend_side = "right")))
mysaveandstore(sprintf("%s/%s/pan_contrast/heatmap/%s_heatmap_no_rowclust.pdf", outputdir, counttype, group_var), 0.5 + length(colnames(m)) * 0.35, 1.75 + length(rownames(m)) * 0.25)
mysaveandstore(sprintf("%s/%s/pan_contrast/heatmap/%s_heatmap_no_rowclust_compressed.pdf", outputdir, counttype, group_var), 0.5 + length(colnames(m)) * 0.275, 1.75 + length(rownames(m)) * 0.16)
}


Expand Down

0 comments on commit 14e0c99

Please sign in to comment.