Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SelectByDate: Code Extension (refer issue #53) #57

Closed
wants to merge 8 commits into from
70 changes: 23 additions & 47 deletions vayu/googleMaps.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
def googleMaps(df, lat, long, pollutant, dataLoc):
def googleMaps(df, lat, long, pollutant, date, markersize,zoom):
"""Plots a geographical plot.

Plots a folium plot of longitude and latitude points
Expand All @@ -15,9 +15,13 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
long: str
Name of column in df of where longitude points are
pollutant: str
Name of pollutant
dataLoc: str
Name of df column where pollutanat values are stored
Name of pollutant where values of that pollutant is stored.
date: str
visualizing the pollutant of a specific date.
markersize: int
The int by which the value of pollutant will be multiplied.
zoom: int
The int by which you want to zoom in the plot

"""
import folium
Expand All @@ -26,56 +30,28 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

latitude = 37.0902
longitude = -95.7129
Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4)


df1 = df[df['date'] == date]

# =============================================================================
# df = pd.read_csv('interpolData.csv')
# =============================================================================

some_value = pollutant
df = df.loc[df["Parameter Name"] == some_value]

some_value = "2018-05-07"
df = df.loc[df["Date Local"] == some_value]

df = df.sample(frac=1)
lat= df1[lat].values[0]
long=df1[long].values[0]
my_map4 = folium.Map(location = [lat, long], zoom_start = zoom)

# df_train, df_test = train_test_split(df, test_size=0.2)
df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False)
colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"}
for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']):
folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"<br>"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4)

for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip(
df[lat],
df[long],
df["Arithmetic Mean Q"],
df[dataLoc],
df["City Name"],
df["AQI"],
):
folium.CircleMarker(
[lat, lon],
radius=0.15 * AQI,
popup=(
"City: "
+ str(city).capitalize()
+ "<br>"
#'Bike score: ' + str(bike) + '<br>'
"Arithmetic_Mean level: "
+ str(Arithmetic_Mean)
+ "%"
),
color="b",
key_on=Arithmetic_Mean_Q,
threshold_scale=[0, 1, 2, 3],
fill_color=colordict[Arithmetic_Mean_Q],
fill=True,
fill_opacity=0.7,
).add_to(Arithmetic_Mean_map)
Arithmetic_Mean_map.save("mymap.html")
my_map4.save("googleMaps.html")
print('your map has been saved')
return my_map4


#Example:
# df = pd.read_csv('interpolData.csv')
# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean')
# Call the function and display the map in Jupyter Notebook
# map_obj = googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10)
# map_obj
46 changes: 13 additions & 33 deletions vayu/scatterPlot.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs):
import matplotlib.cm as cm
from math import pi

pm10 = df.pm10
o3 = df.o3
ws = df.ws
wd = df.wd
nox = df.nox
no2 = df.no2


#########################################
# converts wind data to randians
df = pd.DataFrame({"speed": ws, "direction": wd})
df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0)
df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0)
#df1 = pd.DataFrame({"speed": ws, "direction": wd})
df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0)
df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0)
fig, ax = plt.subplots(figsize=(8, 8), dpi=80)
x0, x1 = ax.get_xlim()
y0, y1 = ax.get_ylim()
ax.set_aspect("equal")
_ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax)
#ax.set_aspect("equal")
_ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax)
plt.show()


####################################
# simple seaborn plot that shows how given variables relate with one another
if x == "nox":
x = nox
elif x == "no2":
x = no2
elif x == "o3":
x = o3
elif x == "pm10":
x = pm10
if y == "nox":
y = nox
elif y == "no2":
y = no2
elif y == "o3":
y = o3
elif y == "pm10":
y = pm10

sns.jointplot(x=x, y=y, kind="hex")

sns.jointplot(x=df[x].values, y=df[y].values, kind="hex")
plt.xlabel(x)
plt.ylabel(y)
plt.show()


# =============================================================================
# df = pd.read_csv("mydata.csv")
# scatterPlot(df,'nox','no2')
Expand Down
49 changes: 34 additions & 15 deletions vayu/selectByDate.py
Original file line number Diff line number Diff line change
@@ -1,28 +1,47 @@
def selectByDate(df, year):
"""
Utility function to cut given dataframe by the year
and find the average value of each day
def selectByDate(df, year, group=None, time_period='day'):
"""
Utility function to cut a given dataframe by year and find the average value
of each day, month, or year. Optionally, data can be grouped by specified columns.

Parameters
----------
df: data frame
a data frame containing a date field
A data frame containing a date field and optional grouping columns.
year: type string
a year to select to cut data
A year to select and filter the data.
group: list, optional
A list of columns to group the data by. Default is None (no grouping).
time_period: {'day', 'month', 'year'}, optional
The time period to compute the average value. Default is 'day'.

Returns
-------
data frame
A data frame with the average value of each day, month, or year.
If group is specified, the data will be grouped accordingly.
"""
import pandas as pd
import numpy as np

df.index = pd.to_datetime(df.date)
df = df.drop("date", axis=1)
df_n = df[year].resample("1D").mean()
df_n = df_n.fillna(method="ffill")
df_n["month"] = df_n.index.month
df_n.index.dayofweek
print(df_n)

df['date'] = pd.to_datetime(df['date'])
df_year = df[df['date'].dt.year == int(year)]

if group:
df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True)
return df_grouped

if time_period == 'month':
df_month = df_year.resample('M', on='date').mean(numeric_only=True)
return df_month
elif time_period == 'year':
df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True)
return df_yearly

df_day = df_year.resample('D', on='date').mean(numeric_only=True)
return df_day


# =============================================================================
# df = pd.read_csv("mydata.csv")
# selectByDate(df,'2003')
#selectByDate(df1,'2022',group=['latitude','longitude','station'], time_period='month')
# =============================================================================
84 changes: 49 additions & 35 deletions vayu/timePlot.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
def timePlot(df, year, month,
pollutants=["ws", "nox", "o3", "pm25", "pm10"]):
def timePlot(df, year, pollutants=["pm25"]):
"""
Plot time series of pollutants for given month and year.

Expand All @@ -10,46 +9,61 @@ def timePlot(df, year, month,
and at least one variable to plot
year: str
year of which data will be cut
month: int
month of what plot will be graphed
pollutants: list
column names of pollutatnts to compare
"""
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# Cuts the df down to the month specified
# Cuts the df down to the month specified
df.index = pd.to_datetime(df.date)
df = df.drop("date", axis=1)
df_n = df[year]
df_n = df_n.fillna(method="ffill")
df_n["month"] = df_n.index.month
df_n.index.dayofweek
df_n_1 = df_n[df_n.month == month]
# New lists that have the value of the pollutant in the month specified

color_list = ["red", "blue", "green", "purple", "orange"]

plt.figure(1)
# series of `len(pollutants)` plots in one large plot that contains the
# time series of the polutants

axs = []

for ix, pollutant in enumerate(pollutants):
values = df_n_1[pollutant]
color = color_list[ix % len(color_list)]

# plotting
plt.subplot(f"{len(pollutants)}1{ix}")
a = values.plot.line(color=color)
a.axes.get_xaxis().set_visible(False)
a.yaxis.set_label_position("right")
axs.append(a)
plt.ylabel(pollutant)
df_n_1 = df[(df.index.year == int(year))]
#df_n_1 = df[(df.index.month == int(month)) & (df.index.year == int(year))]

fig = go.Figure()

for pollutant in pollutants:
if pollutant in df_n_1.columns:
values = df_n_1[pollutant]

# Add trace for each selected pollutant
fig.add_trace(go.Scatter(
x=values.index,
y=values.values,
name=pollutant
))
else:
print(f"Warning: {pollutant} data not found.")

# Configure layout
fig.update_layout(
xaxis=dict(
rangeselector=dict(
buttons=list([
dict(count=1, label="1d", step="day", stepmode="backward"),
dict(count=7, label="1w", step="day", stepmode="backward"),
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(count=1, label="YTD", step="year", stepmode="todate"),
dict(count=1, label="1y", step="year", stepmode="backward"),
dict(step="all")
])
#active=2
),
rangeslider=dict(
visible=True
),

type="date"
)
)

fig.show()

# making dates visible.
axs[0].axes.get_xaxis().set_visible(True)
return axs
#Example:
#timePlot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on])
#--------------------