diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py
index 4d9a694..a06dd4e 100644
--- a/vayu/googleMaps.py
+++ b/vayu/googleMaps.py
@@ -1,4 +1,4 @@
-def googleMaps(df, lat, long, pollutant, dataLoc):
+def googleMaps(df, lat, long, pollutant, date, markersize,zoom):
"""Plots a geographical plot.
Plots a folium plot of longitude and latitude points
@@ -15,9 +15,13 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
long: str
Name of column in df of where longitude points are
pollutant: str
- Name of pollutant
- dataLoc: str
- Name of df column where pollutanat values are stored
+ Name of pollutant where values of that pollutant is stored.
+ date: str
+ visualizing the pollutant of a specific date.
+ markersize: int
+ The int by which the value of pollutant will be multiplied.
+ zoom: int
+ The int by which you want to zoom in the plot
"""
import folium
@@ -26,56 +30,28 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
-
- latitude = 37.0902
- longitude = -95.7129
- Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4)
+
+
+ df1 = df[df['date'] == date]
# =============================================================================
# df = pd.read_csv('interpolData.csv')
# =============================================================================
- some_value = pollutant
- df = df.loc[df["Parameter Name"] == some_value]
-
- some_value = "2018-05-07"
- df = df.loc[df["Date Local"] == some_value]
-
- df = df.sample(frac=1)
+ lat= df1[lat].values[0]
+ long=df1[long].values[0]
+ my_map4 = folium.Map(location = [lat, long], zoom_start = zoom)
- # df_train, df_test = train_test_split(df, test_size=0.2)
- df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False)
- colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"}
+ for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']):
+ folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"
"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4)
- for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip(
- df[lat],
- df[long],
- df["Arithmetic Mean Q"],
- df[dataLoc],
- df["City Name"],
- df["AQI"],
- ):
- folium.CircleMarker(
- [lat, lon],
- radius=0.15 * AQI,
- popup=(
- "City: "
- + str(city).capitalize()
- + "
"
- #'Bike score: ' + str(bike) + '
'
- "Arithmetic_Mean level: "
- + str(Arithmetic_Mean)
- + "%"
- ),
- color="b",
- key_on=Arithmetic_Mean_Q,
- threshold_scale=[0, 1, 2, 3],
- fill_color=colordict[Arithmetic_Mean_Q],
- fill=True,
- fill_opacity=0.7,
- ).add_to(Arithmetic_Mean_map)
- Arithmetic_Mean_map.save("mymap.html")
+ my_map4.save("googleMaps.html")
+ print('your map has been saved')
+ return my_map4
+#Example:
# df = pd.read_csv('interpolData.csv')
-# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean')
+# Call the function and display the map in Jupyter Notebook
+# map_obj = googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10)
+# map_obj
diff --git a/vayu/scatterPlot.py b/vayu/scatterPlot.py
index 7cd2390..229214f 100644
--- a/vayu/scatterPlot.py
+++ b/vayu/scatterPlot.py
@@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs):
import matplotlib.cm as cm
from math import pi
- pm10 = df.pm10
- o3 = df.o3
- ws = df.ws
- wd = df.wd
- nox = df.nox
- no2 = df.no2
-
+
#########################################
# converts wind data to randians
- df = pd.DataFrame({"speed": ws, "direction": wd})
- df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0)
- df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0)
+ #df1 = pd.DataFrame({"speed": ws, "direction": wd})
+ df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0)
+ df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0)
fig, ax = plt.subplots(figsize=(8, 8), dpi=80)
x0, x1 = ax.get_xlim()
y0, y1 = ax.get_ylim()
- ax.set_aspect("equal")
- _ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax)
+ #ax.set_aspect("equal")
+ _ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax)
+ plt.show()
+
####################################
# simple seaborn plot that shows how given variables relate with one another
- if x == "nox":
- x = nox
- elif x == "no2":
- x = no2
- elif x == "o3":
- x = o3
- elif x == "pm10":
- x = pm10
- if y == "nox":
- y = nox
- elif y == "no2":
- y = no2
- elif y == "o3":
- y = o3
- elif y == "pm10":
- y = pm10
-
- sns.jointplot(x=x, y=y, kind="hex")
-
+ sns.jointplot(x=df[x].values, y=df[y].values, kind="hex")
+ plt.xlabel(x)
+ plt.ylabel(y)
plt.show()
-
-
+
+
# =============================================================================
# df = pd.read_csv("mydata.csv")
# scatterPlot(df,'nox','no2')
diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py
index 14aa415..a8080c3 100644
--- a/vayu/selectByDate.py
+++ b/vayu/selectByDate.py
@@ -1,28 +1,48 @@
-def selectByDate(df, year):
- """
- Utility function to cut given dataframe by the year
- and find the average value of each day
+import pandas as pd
+import numpy as np
+
+def select_by(df:pd.Dataframe, year:str, group:list=None, time_period:str='day'):
+ """
+ Utility function to cut a given dataframe by year and find the average value
+ of each day, month, or year. Optionally, data can be grouped by specified columns.
Parameters
----------
df: data frame
- a data frame containing a date field
+ A data frame containing a date field and optional grouping columns.
year: type string
- a year to select to cut data
+ A year to select and filter the data.
+ group: list, optional
+ A list of columns to group the data by. Default is None (no grouping).
+ time_period: {'day', 'month', 'year'}, optional
+ The time period to compute the average value. Default is 'day'.
+
+ Returns
+ -------
+ data frame
+ A data frame with the average value of each day, month, or year.
+ If group is specified, the data will be grouped accordingly.
"""
- import pandas as pd
- import numpy as np
-
- df.index = pd.to_datetime(df.date)
- df = df.drop("date", axis=1)
- df_n = df[year].resample("1D").mean()
- df_n = df_n.fillna(method="ffill")
- df_n["month"] = df_n.index.month
- df_n.index.dayofweek
- print(df_n)
+
+ df['date'] = pd.to_datetime(df['date'])
+ df_year = df[df['date'].dt.year == int(year)]
+
+ if group:
+ df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True)
+ return df_grouped
+
+ if time_period == 'month':
+ df_month = df_year.resample('M', on='date').mean(numeric_only=True)
+ return df_month
+ elif time_period == 'year':
+ df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True)
+ return df_yearly
+
+ df_day = df_year.resample('D', on='date').mean(numeric_only=True)
+ return df_day
# =============================================================================
# df = pd.read_csv("mydata.csv")
-# selectByDate(df,'2003')
+#select_by(df1,'2022',group=['latitude','longitude','station'], time_period='month')
# =============================================================================
diff --git a/vayu/summary_plot.py b/vayu/summary_plot.py
new file mode 100644
index 0000000..bcc9303
--- /dev/null
+++ b/vayu/summary_plot.py
@@ -0,0 +1,130 @@
+import datetime as dt
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import numpy as np
+import pandas as pd
+from numpy import array
+import matplotlib.patches as mpatches
+import seaborn as sns
+from matplotlib.pyplot import figure
+
+def summary_plot(df: pd.DataFrame):
+ """ Plots import summary of data frame given. Plots line plots
+ and histograms for each polutant as well as statiscs such as
+ mean,max,min,median, and 95th percentile
+
+ Parameters
+ ----------
+ df: data frame
+ data frame to be summarised. Must contain a date field
+ and at least one other parameter
+ """
+
+ # Initialize variables
+ pollutants = ["pm10", "pm25", "sox", "co", "o3", "nox", "pb", "nh3"]
+ categories = ["s", "m", "h"]
+
+ counts = {pollutant: {category: 0 for category in categories} for pollutant in pollutants}
+
+
+ df.index = pd.to_datetime(df.date)
+ df = df.drop("date", axis=1)
+ df_all = df.resample("1D")
+ df_all = df.copy()
+ df_all = df_all.fillna(method="ffill")
+ #print(df_all.columns)
+
+ # Calculate counts for each pollutant category
+ for pollutant in pollutants:
+ if pollutant in df_all.columns:
+ column_data = df_all[pollutant]
+ #print(df_all)
+ for _, data in column_data.iteritems():
+ if pollutant in ["pm10", "pm25"]:
+ if data < 100:
+ counts[pollutant]["s"] += 1
+ elif data < 250:
+ counts[pollutant]["m"] += 1
+ else:
+ counts[pollutant]["h"] += 1
+ elif pollutant == "co":
+ if data < 2:
+ counts[pollutant]["s"] += 1
+ elif data < 10:
+ counts[pollutant]["m"] += 1
+ else:
+ counts[pollutant]["h"] += 1
+ elif pollutant == "sox":
+ if data <= 80:
+ counts[pollutant]["s"] += 1
+ elif data <= 380:
+ counts[pollutant]["m"] += 1
+ else:
+ counts[pollutant]["h"] += 1
+ elif pollutant == "o3":
+ if data < 100:
+ counts[pollutant]["s"] += 1
+ elif data < 168:
+ counts[pollutant]["m"] += 1
+ else:
+ counts[pollutant]["h"] += 1
+ elif pollutant == "nox":
+ if data < 80:
+ counts[pollutant]["s"] += 1
+ elif data < 180:
+ counts[pollutant]["m"] += 1
+ else:
+ counts[pollutant]["h"] += 1
+ elif pollutant == "pb":
+ if data <= 1:
+ counts[pollutant]["s"] += 1
+ elif data <= 2:
+ counts[pollutant]["m"] += 1
+ else:
+ counts[pollutant]["h"] += 1
+ elif pollutant == "nh3":
+ if data <= 400:
+ counts[pollutant]["s"] += 1
+ elif data <= 800:
+ counts[pollutant]["m"] += 1
+ else:
+ counts[pollutant]["h"] += 1
+
+
+
+ # Plot line, histogram, and pie charts for each pollutant
+ fig, axes = plt.subplots(len(df_all.columns), 3, figsize=(25,25))
+
+ for i, pollutant in enumerate(df_all.columns):
+ ax_line = axes[i, 0]
+ ax_hist = axes[i, 1]
+ ax_pie = axes[i, 2]
+
+ df_all[pollutant].plot.line(ax=ax_line, color="gold")
+ ax_line.axes.get_xaxis().set_visible(False)
+ ax_line.yaxis.set_label_position("left")
+ ax_line.set_ylabel(pollutant, fontsize=30, bbox=dict(facecolor="whitesmoke"))
+
+ ax_hist.hist(df_all[pollutant], bins=50, color="green")
+
+ labels = ["Safe", "Moderate", "High"]
+ sizes = [counts[pollutant][category] for category in categories]
+ explode = [0, 0, 1]
+
+ ax_pie.pie(sizes, explode=explode, labels=labels, autopct="%1.1f%%", shadow=False, startangle=90)
+ ax_pie.axis("equal")
+
+ ax_pie.set_xlabel("Statistics")
+
+ print(f"{pollutant}\nmin = {df_all[pollutant].min():.2f}\nmax = {df_all[pollutant].max():.2f}\nmissing = {df_all[pollutant].isna().sum()}\nmean = {df_all[pollutant].mean():.2f}\nmedian = {df_all[pollutant].median():.2f}\n95th percentile = {df_all[pollutant].quantile(0.95):.2f}\n")
+
+ plt.savefig("summary_plot.png", dpi=300, format="png")
+ plt.show()
+ print("your plots has also been saved")
+ plt.close()
+
+
+# =============================================================================
+# df = pd.read_csv('mydata.csv')
+# summary_plot(df)
+# =============================================================================
diff --git a/vayu/timePlot.py b/vayu/timePlot.py
index 4470099..581063a 100644
--- a/vayu/timePlot.py
+++ b/vayu/timePlot.py
@@ -1,5 +1,10 @@
-def timePlot(df, year, month,
- pollutants=["ws", "nox", "o3", "pm25", "pm10"]):
+import numpy as np
+import pandas as pd
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import plotly.graph_objects as go
+
+def time_plot(df:pd.DataFrame, year:str, pollutants:list=["pm25"]):
"""
Plot time series of pollutants for given month and year.
@@ -10,46 +15,56 @@ def timePlot(df, year, month,
and at least one variable to plot
year: str
year of which data will be cut
- month: int
- month of what plot will be graphed
pollutants: list
column names of pollutatnts to compare
"""
- import numpy as np
- import pandas as pd
- import matplotlib as mpl
- import matplotlib.pyplot as plt
-
- # Cuts the df down to the month specified
+
+ # Cuts the df down to the month specified
df.index = pd.to_datetime(df.date)
- df = df.drop("date", axis=1)
- df_n = df[year]
- df_n = df_n.fillna(method="ffill")
- df_n["month"] = df_n.index.month
- df_n.index.dayofweek
- df_n_1 = df_n[df_n.month == month]
- # New lists that have the value of the pollutant in the month specified
-
- color_list = ["red", "blue", "green", "purple", "orange"]
-
- plt.figure(1)
- # series of `len(pollutants)` plots in one large plot that contains the
- # time series of the polutants
-
- axs = []
-
- for ix, pollutant in enumerate(pollutants):
- values = df_n_1[pollutant]
- color = color_list[ix % len(color_list)]
-
- # plotting
- plt.subplot(f"{len(pollutants)}1{ix}")
- a = values.plot.line(color=color)
- a.axes.get_xaxis().set_visible(False)
- a.yaxis.set_label_position("right")
- axs.append(a)
- plt.ylabel(pollutant)
+ df_n_1 = df[(df.index.year == int(year))]
+ #df_n_1 = df[(df.index.month == int(month)) & (df.index.year == int(year))]
+
+ fig = go.Figure()
+
+ for pollutant in pollutants:
+ if pollutant in df_n_1.columns:
+ values = df_n_1[pollutant]
+
+ # Add trace for each selected pollutant
+ fig.add_trace(go.Scatter(
+ x=values.index,
+ y=values.values,
+ name=pollutant
+ ))
+ else:
+ print(f"Warning: {pollutant} data not found.")
+
+ # Configure layout
+ fig.update_layout(
+ xaxis=dict(
+ rangeselector=dict(
+ buttons=list([
+ dict(count=1, label="1d", step="day", stepmode="backward"),
+ dict(count=7, label="1w", step="day", stepmode="backward"),
+ dict(count=1, label="1m", step="month", stepmode="backward"),
+ dict(count=6, label="6m", step="month", stepmode="backward"),
+ dict(count=1, label="YTD", step="year", stepmode="todate"),
+ dict(count=1, label="1y", step="year", stepmode="backward"),
+ dict(step="all")
+ ])
+ #active=2
+ ),
+ rangeslider=dict(
+ visible=True
+ ),
+
+ type="date"
+ )
+ )
+
+ fig.show()
- # making dates visible.
- axs[0].axes.get_xaxis().set_visible(True)
- return axs
+#Example:
+#time_plot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on])
+#--------------------
+
\ No newline at end of file