-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate_heatmap.R
106 lines (86 loc) · 3.59 KB
/
generate_heatmap.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
library(ggplot2)
library(reshape2)
library(dplyr)
library(tidyr)
library(viridis)
args <- commandArgs(trailingOnly = TRUE)
# Read the column names from the text file
column_names <- readLines(args[1])
#column_names <- readLines("/home/ctuni/columns.txt")
column_names <- gsub("%", ".", column_names)
data <- read.table(args[2], header = TRUE, sep = "\t")
#data <- read.table("QC_table_all.tsv", header = TRUE, sep = "\t")
# Ensure 'dataset' and 'sample' are included
essential_columns <- c("dataset", "Sample")
columns_to_keep <- unique(c(essential_columns, column_names))
# Filter the dataframe to keep only the required columns
filtered_data <- data[, columns_to_keep]
filtered_data_unique <- unique(filtered_data)
#######
# Median normalized by 0-1 range
#######
# Calculate the median value for each dataset across the columns
median_data <- filtered_data_unique %>%
group_by(dataset) %>%
summarize(across(all_of(column_names), median, na.rm = TRUE)) %>%
pivot_longer(cols = -dataset, names_to = "metric", values_to = "value")
# Normalize the value column within each metric
median_data_zero_one <- median_data %>%
group_by(metric) %>%
mutate(value_scaled = (value - min(value, na.rm = TRUE)) /
(max(value, na.rm = TRUE) - min(value, na.rm = TRUE)))
# Reshape the data to wide format for plotting
wide_data <- dcast(median_data_zero_one, metric ~ dataset, value.var = "value_scaled")
# Creating the heatmap with scales normalized for each metric
heatmap_plot <- ggplot(melt(wide_data), aes(x = variable, y = metric, fill = value)) +
geom_tile() +
scale_fill_viridis(name = "Scaled Value", option = "D") +
theme_minimal() +
labs(x = "Dataset", y = "Metric", fill = "Scaled Value")
# Save the plot as a high-quality PNG file with rotated x-axis labels
png("heatmap_plot_range.png", width = 2600, height = 1200, res = 300)
heatmap_plot <- heatmap_plot +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
print(heatmap_plot)
dev.off()
#####
# Median normalized by z-score
#####
# Normalize the value column within each metric by the z-score
median_data_z_score <- median_data %>%
group_by(metric) %>%
mutate(value_scaled = (value - mean(value, na.rm = TRUE)) /
sd(value, na.rm = TRUE))
# Reshape the data to wide format for plotting
wide_data <- dcast(median_data_z_score, metric ~ dataset, value.var = "value_scaled")
# Creating the heatmap plot object
heatmap_plot <- ggplot(melt(wide_data), aes(x = variable, y = metric, fill = value)) +
geom_tile() +
scale_fill_viridis_c(name = "Z-Score", option = "D") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
labs(x = "Dataset", y = "Metric")
# Save the plot as a high-quality PNG file
png("heatmap_plot_zscore.png", width = 2600, height = 1200, res = 300)
print(heatmap_plot)
dev.off()
# # Reshape the dataframe to wide format for plotting
# wide_data <- dcast(median_data, metric ~ dataset, value.var = "value")
# # Creating the transposed heatmap
# ggplot(melt(wide_data), aes(x = variable, y = metric, fill = value)) +
# geom_tile() +
# scale_fill_gradient(low = "blue", high = "red") +
# theme_minimal() +
# labs(x = "Dataset", y = "Metric", fill = "Median Value")
#
# # Reshape the dataframe to long format
# long_data <- melt(filtered_data, id.vars = essential_columns)
# long_data_unique <- unique(long_data)
#
# # Creating the heatmap
# ggplot(long_data, aes(x = Sample, y = variable, fill = value)) +
# geom_tile() +
# facet_grid(. ~ dataset) +
# scale_fill_gradient(low = "blue", high = "red") +
# theme_minimal() +
# labs(x = "Sample", y = "Metric", fill = "Value")