-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
ef0d8a6
commit 0c0a872
Showing
439 changed files
with
226 additions
and
1,270 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[theme] | ||
primaryColor = '#1c83e1' | ||
backgroundColor = '#ffffff' | ||
secondaryBackgroundColor = '#f0f2f6' | ||
textColor = '#000000' |
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
import streamlit as st | ||
from data_processing import load_data, clean_and_prepare_data | ||
from utils import data_quality_check | ||
from plots import plot_time_series, plot_area, create_scatter_plot, create_correlation_analysis | ||
import pandas as pd | ||
|
||
# Load datasets | ||
datasets = load_data() | ||
|
||
# Streamlit UI | ||
st.title("Solar Radiation Data Analysis") | ||
|
||
# Sidebar for dataset selection | ||
dataset_name = st.sidebar.selectbox("Select Dataset", ("Benin", "Togo", "Sierra Leone")) | ||
df = datasets[dataset_name] | ||
|
||
# Display the dataset summary | ||
st.write(f"### {dataset_name} Dataset Summary") | ||
st.write(df.describe()) | ||
|
||
# Sidebar: Want to Clean Section | ||
clean_data = st.sidebar.checkbox("Want to Clean Data") | ||
|
||
if clean_data: | ||
# Data Quality Check Before Cleaning | ||
quality_results_before = data_quality_check(df) | ||
st.write("#### Data Quality Check Results (Before Cleaning)") | ||
st.write(pd.DataFrame(quality_results_before).T) | ||
|
||
# Clean Data | ||
df_cleaned = clean_and_prepare_data(df) | ||
|
||
# Data Quality Check After Cleaning | ||
quality_results_after = data_quality_check(df_cleaned) | ||
st.write("#### Data Quality Check Results (After Cleaning)") | ||
st.write(pd.DataFrame(quality_results_after).T) | ||
|
||
# Display cleaned data | ||
st.write(f"### {dataset_name} Cleaned Data") | ||
st.write(df_cleaned.head()) | ||
|
||
# Sidebar: Analysis Selection | ||
st.sidebar.write("### Which to Analyze?") | ||
analyze_uncleaned = st.sidebar.checkbox("Analyze Uncleaned Data") | ||
analyze_cleaned = st.sidebar.checkbox("Analyze Cleaned Data") | ||
|
||
# Determine which dataset to analyze | ||
df_to_analyze = None | ||
data_label = "" | ||
|
||
if analyze_uncleaned: | ||
df_to_analyze = df | ||
data_label = "Uncleaned Data" | ||
elif analyze_cleaned and clean_data: | ||
df_to_analyze = df_cleaned | ||
data_label = "Cleaned Data" | ||
|
||
# Display analysis options if either checkbox is selected | ||
if analyze_uncleaned or (analyze_cleaned and clean_data): | ||
st.sidebar.write(f"### Analysis Options for {data_label}") | ||
plot_area_selected = st.sidebar.checkbox("Area Plot") | ||
plot_time_series_selected = st.sidebar.checkbox("Time Series Plot") | ||
plot_scatter_selected = st.sidebar.checkbox("Scatter Plot") | ||
plot_correlation_selected = st.sidebar.checkbox("Correlation Analysis") | ||
|
||
# Perform selected analyses | ||
if plot_correlation_selected and df_to_analyze is not None: | ||
create_correlation_analysis(df_to_analyze, dataset_name) | ||
|
||
if plot_area_selected and df_to_analyze is not None: | ||
plot_area(df_to_analyze, dataset_name) | ||
|
||
if plot_scatter_selected and df_to_analyze is not None: | ||
create_scatter_plot(df_to_analyze) | ||
|
||
if plot_time_series_selected and df_to_analyze is not None: | ||
plot_time_series(df_to_analyze, dataset_name) | ||
else: | ||
st.warning("Please select either 'Analyze Uncleaned Data' or 'Analyze Cleaned Data'.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import pandas as pd | ||
import numpy as np | ||
|
||
COLUMNS_TO_CHECK = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust'] | ||
|
||
def load_data(): | ||
datasets = { | ||
"Benin": pd.read_csv('../data/benin-malanville.csv'), | ||
"Togo": pd.read_csv('../data/togo-dapaong_qc.csv'), | ||
"Sierra Leone": pd.read_csv('../data/sierraleone-bumbuna.csv') | ||
} | ||
return datasets | ||
|
||
def clean_data(df): | ||
df = df[(df[COLUMNS_TO_CHECK] >= 0).all(axis=1)] | ||
z_scores = np.abs((df[COLUMNS_TO_CHECK] - df[COLUMNS_TO_CHECK].mean()) / df[COLUMNS_TO_CHECK].std()) | ||
df = df[(z_scores < 3).all(axis=1)] | ||
return df | ||
|
||
def clean_and_prepare_data(df): | ||
df_cleaned = clean_data(df) | ||
df_cleaned['Timestamp'] = pd.to_datetime(df_cleaned['Timestamp']) | ||
df_cleaned.set_index('Timestamp', inplace=True) | ||
return df_cleaned |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import matplotlib.pyplot as plt | ||
import seaborn as sns | ||
import streamlit as st | ||
|
||
def plot_time_series(df, dataset_name): | ||
fig, ax = plt.subplots(figsize=(14, 8)) | ||
df[['GHI', 'DNI', 'DHI', 'Tamb']].plot(ax=ax) | ||
plt.title(f'Time Series Analysis of GHI, DNI, DHI, and Tamb in {dataset_name}') | ||
st.pyplot(fig) | ||
|
||
import matplotlib.pyplot as plt | ||
import streamlit as st | ||
|
||
def plot_area(df, title, columns): | ||
try: | ||
# Check if any column contains both positive and negative values | ||
for col in columns: | ||
if df[col].min() < 0 and df[col].max() > 0: | ||
raise ValueError(f"Column '{col}' contains both positive and negative values, which is not allowed in an area plot.") | ||
|
||
# Create area plot | ||
fig, ax = plt.subplots() | ||
df[columns].plot(kind='area', ax=ax, alpha=0.5) | ||
ax.set_title(title) | ||
plt.xticks(rotation=45) | ||
plt.tight_layout() | ||
st.pyplot(fig) | ||
|
||
except ValueError as e: | ||
# Handle the ValueError and provide an appropriate message | ||
st.error(f"Error in plotting area chart: {e}") | ||
|
||
def create_scatter_plot(df): | ||
fig, ax = plt.subplots() | ||
sns.scatterplot(data=df, x='RH', y='Tamb', ax=ax) | ||
plt.title("Scatter Plot: Temperature (Tamb) vs Relative Humidity (RH)") | ||
st.pyplot(fig) | ||
|
||
def create_correlation_analysis(df, dataset_name): | ||
correlation = df[['RH', 'Tamb', 'TModA', 'TModB']].corr() | ||
st.write(f"### Correlation Analysis - {dataset_name}") | ||
st.write(correlation) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
import pandas as pd | ||
import numpy as np | ||
|
||
def data_quality_check(df): | ||
COLUMNS_TO_CHECK = ['GHI', 'DNI', 'DHI', 'ModA', 'ModB', 'WS', 'WSgust'] | ||
data_quality = { | ||
"Column": COLUMNS_TO_CHECK, | ||
"Missing Values": df[COLUMNS_TO_CHECK].isnull().sum().values, | ||
"Outliers": (np.abs((df[COLUMNS_TO_CHECK] - df[COLUMNS_TO_CHECK].mean()) / df[COLUMNS_TO_CHECK].std()) > 3).sum().values, | ||
"Incorrect Entries": (df[COLUMNS_TO_CHECK] < 0).sum().values | ||
} | ||
return pd.DataFrame(data_quality) |
Oops, something went wrong.