-
Notifications
You must be signed in to change notification settings - Fork 0
/
MDverse_data_explorer.py
56 lines (44 loc) · 1.6 KB
/
MDverse_data_explorer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""Streamlit web app for exploring molecular dynamics (MD) data."""
import streamlit as st
from wordcloud import WordCloud, STOPWORDS
import website_management as wm
st.set_page_config(page_title="MDverse", page_icon="🔎", layout="wide")
st.write("# Welcome to MDverse data explorer 🔎")
st.sidebar.success("⬆️ Select the item you want to search for.")
datasets_df = wm.load_data()["datasets"]
dataset_agg = (
datasets_df.groupby("dataset_origin")
.agg(
number_of_datasets=("dataset_id", "nunique"),
date_first_dataset=("date_creation", "min"),
date_last_dataset=("date_creation", "max"),
)
.rename(
columns={
"number_of_datasets": "Number of datasets",
"date_first_dataset": "First dataset",
"date_last_dataset": "Last dataset",
}
)
)
dataset_agg.loc["total"] = dataset_agg.sum(numeric_only=True)
dataset_agg.index.name = "Dataset origin"
st.write("Amount of data available:")
st.dataframe(dataset_agg.style.format(thousands=",", precision=0))
gro_df = wm.load_data()["gro"]
st.write(f"{len(gro_df)} Gromacs GRO files")
mdp_df = wm.load_data()["mdp"]
st.write(f"{len(mdp_df)} Gromacs MDP files")
# Build word list for the wordcloud.
titles = " ".join(datasets_df["title"].values)
titles = titles.replace(",", " ").replace(".", " ")
words = [word.lower().strip() for word in titles.split()]
# Build and display the wordcloud.
wordcloud = WordCloud(
width=1000,
height=400,
background_color="white",
stopwords=set(STOPWORDS),
min_font_size=12,
).generate(" ".join(words))
st.image(wordcloud.to_array())