diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py
index 4d9a694..a06dd4e 100644
--- a/vayu/googleMaps.py
+++ b/vayu/googleMaps.py
@@ -1,4 +1,4 @@
-def googleMaps(df, lat, long, pollutant, dataLoc):
+def googleMaps(df, lat, long, pollutant, date, markersize,zoom):
"""Plots a geographical plot.
Plots a folium plot of longitude and latitude points
@@ -15,9 +15,13 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
long: str
Name of column in df of where longitude points are
pollutant: str
- Name of pollutant
- dataLoc: str
- Name of df column where pollutanat values are stored
+ Name of pollutant where values of that pollutant is stored.
+ date: str
+ visualizing the pollutant of a specific date.
+ markersize: int
+ The int by which the value of pollutant will be multiplied.
+ zoom: int
+ The int by which you want to zoom in the plot
"""
import folium
@@ -26,56 +30,28 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
-
- latitude = 37.0902
- longitude = -95.7129
- Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4)
+
+
+ df1 = df[df['date'] == date]
# =============================================================================
# df = pd.read_csv('interpolData.csv')
# =============================================================================
- some_value = pollutant
- df = df.loc[df["Parameter Name"] == some_value]
-
- some_value = "2018-05-07"
- df = df.loc[df["Date Local"] == some_value]
-
- df = df.sample(frac=1)
+ lat= df1[lat].values[0]
+ long=df1[long].values[0]
+ my_map4 = folium.Map(location = [lat, long], zoom_start = zoom)
- # df_train, df_test = train_test_split(df, test_size=0.2)
- df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False)
- colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"}
+ for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']):
+ folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"
"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4)
- for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip(
- df[lat],
- df[long],
- df["Arithmetic Mean Q"],
- df[dataLoc],
- df["City Name"],
- df["AQI"],
- ):
- folium.CircleMarker(
- [lat, lon],
- radius=0.15 * AQI,
- popup=(
- "City: "
- + str(city).capitalize()
- + "
"
- #'Bike score: ' + str(bike) + '
'
- "Arithmetic_Mean level: "
- + str(Arithmetic_Mean)
- + "%"
- ),
- color="b",
- key_on=Arithmetic_Mean_Q,
- threshold_scale=[0, 1, 2, 3],
- fill_color=colordict[Arithmetic_Mean_Q],
- fill=True,
- fill_opacity=0.7,
- ).add_to(Arithmetic_Mean_map)
- Arithmetic_Mean_map.save("mymap.html")
+ my_map4.save("googleMaps.html")
+ print('your map has been saved')
+ return my_map4
+#Example:
# df = pd.read_csv('interpolData.csv')
-# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean')
+# Call the function and display the map in Jupyter Notebook
+# map_obj = googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10)
+# map_obj
diff --git a/vayu/scatterPlot.py b/vayu/scatterPlot.py
index 7cd2390..229214f 100644
--- a/vayu/scatterPlot.py
+++ b/vayu/scatterPlot.py
@@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs):
import matplotlib.cm as cm
from math import pi
- pm10 = df.pm10
- o3 = df.o3
- ws = df.ws
- wd = df.wd
- nox = df.nox
- no2 = df.no2
-
+
#########################################
# converts wind data to randians
- df = pd.DataFrame({"speed": ws, "direction": wd})
- df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0)
- df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0)
+ #df1 = pd.DataFrame({"speed": ws, "direction": wd})
+ df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0)
+ df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0)
fig, ax = plt.subplots(figsize=(8, 8), dpi=80)
x0, x1 = ax.get_xlim()
y0, y1 = ax.get_ylim()
- ax.set_aspect("equal")
- _ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax)
+ #ax.set_aspect("equal")
+ _ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax)
+ plt.show()
+
####################################
# simple seaborn plot that shows how given variables relate with one another
- if x == "nox":
- x = nox
- elif x == "no2":
- x = no2
- elif x == "o3":
- x = o3
- elif x == "pm10":
- x = pm10
- if y == "nox":
- y = nox
- elif y == "no2":
- y = no2
- elif y == "o3":
- y = o3
- elif y == "pm10":
- y = pm10
-
- sns.jointplot(x=x, y=y, kind="hex")
-
+ sns.jointplot(x=df[x].values, y=df[y].values, kind="hex")
+ plt.xlabel(x)
+ plt.ylabel(y)
plt.show()
-
-
+
+
# =============================================================================
# df = pd.read_csv("mydata.csv")
# scatterPlot(df,'nox','no2')
diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py
index 14aa415..4553238 100644
--- a/vayu/selectByDate.py
+++ b/vayu/selectByDate.py
@@ -1,28 +1,47 @@
-def selectByDate(df, year):
- """
- Utility function to cut given dataframe by the year
- and find the average value of each day
+def selectByDate(df, year, group=None, time_period='day'):
+ """
+ Utility function to cut a given dataframe by year and find the average value
+ of each day, month, or year. Optionally, data can be grouped by specified columns.
Parameters
----------
df: data frame
- a data frame containing a date field
+ A data frame containing a date field and optional grouping columns.
year: type string
- a year to select to cut data
+ A year to select and filter the data.
+ group: list, optional
+ A list of columns to group the data by. Default is None (no grouping).
+ time_period: {'day', 'month', 'year'}, optional
+ The time period to compute the average value. Default is 'day'.
+
+ Returns
+ -------
+ data frame
+ A data frame with the average value of each day, month, or year.
+ If group is specified, the data will be grouped accordingly.
"""
import pandas as pd
import numpy as np
-
- df.index = pd.to_datetime(df.date)
- df = df.drop("date", axis=1)
- df_n = df[year].resample("1D").mean()
- df_n = df_n.fillna(method="ffill")
- df_n["month"] = df_n.index.month
- df_n.index.dayofweek
- print(df_n)
+
+ df['date'] = pd.to_datetime(df['date'])
+ df_year = df[df['date'].dt.year == int(year)]
+
+ if group:
+ df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True)
+ return df_grouped
+
+ if time_period == 'month':
+ df_month = df_year.resample('M', on='date').mean(numeric_only=True)
+ return df_month
+ elif time_period == 'year':
+ df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True)
+ return df_yearly
+
+ df_day = df_year.resample('D', on='date').mean(numeric_only=True)
+ return df_day
# =============================================================================
# df = pd.read_csv("mydata.csv")
-# selectByDate(df,'2003')
+#selectByDate(df1,'2022',group=['latitude','longitude','station'], time_period='month')
# =============================================================================
diff --git a/vayu/timePlot.py b/vayu/timePlot.py
index 4470099..052091b 100644
--- a/vayu/timePlot.py
+++ b/vayu/timePlot.py
@@ -1,5 +1,4 @@
-def timePlot(df, year, month,
- pollutants=["ws", "nox", "o3", "pm25", "pm10"]):
+def timePlot(df, year, pollutants=["pm25"]):
"""
Plot time series of pollutants for given month and year.
@@ -10,8 +9,6 @@ def timePlot(df, year, month,
and at least one variable to plot
year: str
year of which data will be cut
- month: int
- month of what plot will be graphed
pollutants: list
column names of pollutatnts to compare
"""
@@ -19,37 +16,54 @@ def timePlot(df, year, month,
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
+ import plotly.graph_objects as go
- # Cuts the df down to the month specified
+ # Cuts the df down to the month specified
df.index = pd.to_datetime(df.date)
- df = df.drop("date", axis=1)
- df_n = df[year]
- df_n = df_n.fillna(method="ffill")
- df_n["month"] = df_n.index.month
- df_n.index.dayofweek
- df_n_1 = df_n[df_n.month == month]
- # New lists that have the value of the pollutant in the month specified
-
- color_list = ["red", "blue", "green", "purple", "orange"]
-
- plt.figure(1)
- # series of `len(pollutants)` plots in one large plot that contains the
- # time series of the polutants
-
- axs = []
-
- for ix, pollutant in enumerate(pollutants):
- values = df_n_1[pollutant]
- color = color_list[ix % len(color_list)]
-
- # plotting
- plt.subplot(f"{len(pollutants)}1{ix}")
- a = values.plot.line(color=color)
- a.axes.get_xaxis().set_visible(False)
- a.yaxis.set_label_position("right")
- axs.append(a)
- plt.ylabel(pollutant)
+ df_n_1 = df[(df.index.year == int(year))]
+ #df_n_1 = df[(df.index.month == int(month)) & (df.index.year == int(year))]
+
+ fig = go.Figure()
+
+ for pollutant in pollutants:
+ if pollutant in df_n_1.columns:
+ values = df_n_1[pollutant]
+
+ # Add trace for each selected pollutant
+ fig.add_trace(go.Scatter(
+ x=values.index,
+ y=values.values,
+ name=pollutant
+ ))
+ else:
+ print(f"Warning: {pollutant} data not found.")
+
+ # Configure layout
+ fig.update_layout(
+ xaxis=dict(
+ rangeselector=dict(
+ buttons=list([
+ dict(count=1, label="1d", step="day", stepmode="backward"),
+ dict(count=7, label="1w", step="day", stepmode="backward"),
+ dict(count=1, label="1m", step="month", stepmode="backward"),
+ dict(count=6, label="6m", step="month", stepmode="backward"),
+ dict(count=1, label="YTD", step="year", stepmode="todate"),
+ dict(count=1, label="1y", step="year", stepmode="backward"),
+ dict(step="all")
+ ])
+ #active=2
+ ),
+ rangeslider=dict(
+ visible=True
+ ),
+
+ type="date"
+ )
+ )
+
+ fig.show()
- # making dates visible.
- axs[0].axes.get_xaxis().set_visible(True)
- return axs
+#Example:
+#timePlot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on])
+#--------------------
+
\ No newline at end of file