forked from bitroniq/ten-seconds-cv
-
Notifications
You must be signed in to change notification settings - Fork 1
/
heatmap.py
151 lines (124 loc) · 5.69 KB
/
heatmap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontManager
import numpy as np
import pandas as pd
import seaborn as sns
import yaml
def print_available_fonts():
global yaml_data
font_manager = FontManager()
fuentes_disponibles = [f.name for f in font_manager.ttflist]
# Imprime las fuentes disponibles
for fuente in sorted(set(fuentes_disponibles)):
print(fuente)
# Load the YAML data from a string
def load_yaml_data(yaml_data_string):
yaml_data = yaml.load(yaml_data_string, Loader=yaml.FullLoader)
return yaml_data
def load_yaml_data_from_file(yaml_file):
with open(yaml_file) as file:
docs = yaml.load_all(file, yaml.FullLoader)
for yaml_data in docs:
if yaml_data is not None and 'skills_details' in yaml_data:
break
if 'skills_details' not in yaml_data:
raise Exception('Invalid YAML file. Missing skills section')
return yaml_data
def configure_labels(ax):
# Obtener las etiquetas actuales del eje x
ax = plt.gca()
xlabels = [label.get_text() for label in ax.get_xticklabels()]
# filtrar las etiquetas para mostrar solo los años
xlabels = [label if label.endswith('01') else '' for label in xlabels]
# Establecer las etiquetas filtradas en el eje x
ax.set_xticks([xlabels.index(label) for label in xlabels])
ax.set_xticklabels(xlabels)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
# Rotar las etiquetas para mejor visualización
plt.xticks(rotation=90)
# Ajustes de visualización
plt.xlabel('Date', fontdict={'fontsize': 16, 'fontfamily': 'Roboto Slab', 'fontweight': 'bold'})
plt.ylabel('Skill', fontdict={'fontsize': 16, 'fontfamily': 'Roboto Slab', 'fontweight': 'bold'})
return ax
def group_periods(df):
# Group by skill and period and calculate the average level
# Use M for monthly, Q for quarterly, Y for yearly averages
df_grouped = df.groupby([df['skill'], df['period'].dt.to_period('Q')])['level'].mean()
df_grouped = df_grouped.reset_index()
# Convertimos el periodo a formato de fecha para el gráfico
df_grouped['period'] = df_grouped['period'].dt.to_timestamp()
return df_grouped
def format_periods(df):
# Some periods can contain words like "current" or "actually" and must be replaced with the current date
df['period'] = df['period'].apply(lambda x: x.replace('current', pd.Timestamp.now().strftime('%Y-%m-%d')))
# convert the period to a list of months between the start and end date
df['period'] = df['period'].apply(
lambda x: pd.date_range(start=x.split(' to ')[0], end=x.split(' to ')[1], freq='W').tolist())
# create a new dataframe with the period expanded
df = df.explode('period')
# Convert the period to a datetime object
df['period'] = pd.to_datetime(df['period'])
return df
def yaml_to_dataframe(yaml_data):
# convert to a structured like this
# {
# 'year': year,
# 'week': week,
# 'skill': name,
# 'level': grade
# }
data = []
for skill in yaml_data['skills_details']:
for level in skill['levels']:
data.append({
'period': level['period'],
'skill': skill['name'],
'level': level['grade']
})
# Create a dataframe from the data keeping the order of the columns entries
df = pd.DataFrame(data, columns=['skill', 'period', 'level'])
return df
def sum_of_skills(df):
# Sum the level of each skill for each period
df_grouped = df.groupby([df['period'].dt.to_period('Q')])['level'].sum()
df_grouped = df_grouped.reset_index()
# Convert the period to a datetime object
df_grouped['period'] = df_grouped['period'].dt.to_timestamp()
return df_grouped
def main():
yaml_data = load_yaml_data_from_file('data.md')
df = yaml_to_dataframe(yaml_data)
original_skill_order = df['skill'].unique()
df = format_periods(df)
df_grouped = group_periods(df)
# Preparamos los datos para el mapa de calor mantiendo el orden las filas y columnas
heatmap_data = pd.pivot_table(df_grouped, values='level', index='skill', columns='period', aggfunc=np.mean)
heatmap_data.columns = heatmap_data.columns.to_series().dt.strftime('%Y-%m')
heatmap_data = heatmap_data.fillna(0)
# sort by the sum of the skills
# heatmap_data = heatmap_data.reindex(heatmap_data.sum().sort_values(ascending=False).index)
heatmap_data = heatmap_data.reindex(original_skill_order)
#
# Ajustamos el tamaño del gráfico
fig, ax = plt.subplots(figsize=(20, 4))
# cmap = sns.light_palette("darkgreen", as_cmap=True)
# Crear el mapa de calor
custom_colors = sns.color_palette("YlGn", as_cmap=True)
# add blue at the beginning to show the lowest values
custom_colors = sns.color_palette(["#ECECEC"] + list(sns.color_palette("YlGn", 10).as_hex()), as_cmap=True)
sns.heatmap(heatmap_data, annot=False, cmap=custom_colors, linewidths=1, square=True, ax=ax)
ax = configure_labels(ax)
ax.set_title('Level of skill', loc='center', pad=24,
fontdict={'fontsize': 16, 'fontweight': 'bold', 'fontfamily': 'Roboto Slab'})
ax.yaxis.tick_right()
# Encuentra la barra de colores actual en el gráfico, si existe
cbar = ax.collections[0].colorbar
# Si existe la barra de colores, ajusta su posición
if cbar:
cbar.remove() # Primero, removemos la barra de colores existente
cbar = fig.colorbar(ax.collections[0], ax=ax, location='left', shrink=0.65, pad=0.04, fraction=0.01)
plt.subplots_adjust(left=0.02, right=0.92, top=0.999, bottom=0.001)
#save the figure
fig.savefig('heatmap.eps', format='eps')
if __name__ == '__main__':
main()