From 03f403ac26975be604610c93378099174fafec23 Mon Sep 17 00:00:00 2001 From: Tanvi-Jain01 Date: Fri, 30 Jun 2023 08:59:09 +0530 Subject: [PATCH 1/8] enhanced code of scatterPlot(refer issue #43) --- vayu/scatterPlot.py | 46 +++++++++++++-------------------------------- 1 file changed, 13 insertions(+), 33 deletions(-) diff --git a/vayu/scatterPlot.py b/vayu/scatterPlot.py index 7cd2390..229214f 100644 --- a/vayu/scatterPlot.py +++ b/vayu/scatterPlot.py @@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs): import matplotlib.cm as cm from math import pi - pm10 = df.pm10 - o3 = df.o3 - ws = df.ws - wd = df.wd - nox = df.nox - no2 = df.no2 - + ######################################### # converts wind data to randians - df = pd.DataFrame({"speed": ws, "direction": wd}) - df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0) - df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0) + #df1 = pd.DataFrame({"speed": ws, "direction": wd}) + df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0) + df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0) fig, ax = plt.subplots(figsize=(8, 8), dpi=80) x0, x1 = ax.get_xlim() y0, y1 = ax.get_ylim() - ax.set_aspect("equal") - _ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax) + #ax.set_aspect("equal") + _ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax) + plt.show() + #################################### # simple seaborn plot that shows how given variables relate with one another - if x == "nox": - x = nox - elif x == "no2": - x = no2 - elif x == "o3": - x = o3 - elif x == "pm10": - x = pm10 - if y == "nox": - y = nox - elif y == "no2": - y = no2 - elif y == "o3": - y = o3 - elif y == "pm10": - y = pm10 - - sns.jointplot(x=x, y=y, kind="hex") - + sns.jointplot(x=df[x].values, y=df[y].values, kind="hex") + plt.xlabel(x) + plt.ylabel(y) plt.show() - - + + # ============================================================================= # df = pd.read_csv("mydata.csv") # scatterPlot(df,'nox','no2') From a2ea7c73c045beea12cc074cb8952467e17884ce Mon Sep 17 00:00:00 2001 From: Tanvi-Jain01 Date: Fri, 30 Jun 2023 15:34:52 +0530 Subject: [PATCH 2/8] timplot: modifying plots using plotly and --- vayu/timePlot.py | 84 ++++++++++++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 34 deletions(-) diff --git a/vayu/timePlot.py b/vayu/timePlot.py index 4470099..7ab87db 100644 --- a/vayu/timePlot.py +++ b/vayu/timePlot.py @@ -1,5 +1,4 @@ -def timePlot(df, year, month, - pollutants=["ws", "nox", "o3", "pm25", "pm10"]): +def timePlot(df, year, pollutants=["pm25", "pm10"]): """ Plot time series of pollutants for given month and year. @@ -19,37 +18,54 @@ def timePlot(df, year, month, import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt - - # Cuts the df down to the month specified + import plotly.graph_objects as go + + # Cuts the df down to the month specified df.index = pd.to_datetime(df.date) - df = df.drop("date", axis=1) - df_n = df[year] - df_n = df_n.fillna(method="ffill") - df_n["month"] = df_n.index.month - df_n.index.dayofweek - df_n_1 = df_n[df_n.month == month] - # New lists that have the value of the pollutant in the month specified - - color_list = ["red", "blue", "green", "purple", "orange"] - - plt.figure(1) - # series of `len(pollutants)` plots in one large plot that contains the - # time series of the polutants - - axs = [] - - for ix, pollutant in enumerate(pollutants): - values = df_n_1[pollutant] - color = color_list[ix % len(color_list)] - - # plotting - plt.subplot(f"{len(pollutants)}1{ix}") - a = values.plot.line(color=color) - a.axes.get_xaxis().set_visible(False) - a.yaxis.set_label_position("right") - axs.append(a) - plt.ylabel(pollutant) + df_n_1 = df[(df.index.year == int(year))] + #df_n_1 = df[(df.index.month == int(month)) & (df.index.year == int(year))] + + fig = go.Figure() + + for pollutant in pollutants: + if pollutant in df_n_1.columns: + values = df_n_1[pollutant] + + # Add trace for each selected pollutant + fig.add_trace(go.Scatter( + x=values.index, + y=values.values, + name=pollutant + )) + else: + print(f"Warning: {pollutant} data not found.") + + # Configure layout + fig.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, label="1d", step="day", stepmode="backward"), + dict(count=7, label="1w", step="day", stepmode="backward"), + dict(count=1, label="1m", step="month", stepmode="backward"), + dict(count=6, label="6m", step="month", stepmode="backward"), + dict(count=1, label="YTD", step="year", stepmode="todate"), + dict(count=1, label="1y", step="year", stepmode="backward"), + dict(step="all") + ]) + #active=2 + ), + rangeslider=dict( + visible=True + ), + + type="date" + ) + ) + + fig.show() - # making dates visible. - axs[0].axes.get_xaxis().set_visible(True) - return axs +#Example: +#timePlot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on]) +#-------------------- + \ No newline at end of file From d7d2e6bdfaacfa2b4e12b772fecaa701403eb497 Mon Sep 17 00:00:00 2001 From: Tanvi-Jain01 Date: Fri, 30 Jun 2023 15:39:37 +0530 Subject: [PATCH 3/8] Adding visualization using plotly --- vayu/timePlot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vayu/timePlot.py b/vayu/timePlot.py index 7ab87db..42a6ec5 100644 --- a/vayu/timePlot.py +++ b/vayu/timePlot.py @@ -19,7 +19,7 @@ def timePlot(df, year, pollutants=["pm25", "pm10"]): import matplotlib as mpl import matplotlib.pyplot as plt import plotly.graph_objects as go - + # Cuts the df down to the month specified df.index = pd.to_datetime(df.date) df_n_1 = df[(df.index.year == int(year))] From fd7c45cb673539d28898d679e6a10aea732339ee Mon Sep 17 00:00:00 2001 From: Tanvi-Jain01 Date: Fri, 30 Jun 2023 16:20:19 +0530 Subject: [PATCH 4/8] modifying the code of googleMaps --- vayu/googleMaps.py | 66 +++++++++++++++------------------------------- 1 file changed, 21 insertions(+), 45 deletions(-) diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py index 4d9a694..22474b8 100644 --- a/vayu/googleMaps.py +++ b/vayu/googleMaps.py @@ -15,9 +15,11 @@ def googleMaps(df, lat, long, pollutant, dataLoc): long: str Name of column in df of where longitude points are pollutant: str - Name of pollutant - dataLoc: str - Name of df column where pollutanat values are stored + Name of pollutant where values of that pollutant is stored. + date: str + visualizing the pollutant of a specific date. + markersize: int + The int by which the value of pollutant will be multiplied. """ import folium @@ -26,56 +28,30 @@ def googleMaps(df, lat, long, pollutant, dataLoc): import matplotlib.pyplot as plt import numpy as np import pandas as pd - - latitude = 37.0902 - longitude = -95.7129 - Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4) + + def googleMaps(df, lat, long, pollutant, date, markersize): + df1=df + print(date) + df1=df[df['date']==date] + print(df1) + # ============================================================================= # df = pd.read_csv('interpolData.csv') # ============================================================================= - some_value = pollutant - df = df.loc[df["Parameter Name"] == some_value] - - some_value = "2018-05-07" - df = df.loc[df["Date Local"] == some_value] - - df = df.sample(frac=1) + lat= df1[lat].values[0] + long=df1[long].values[0] + my_map4 = folium.Map(location = [lat, long], zoom_start = 10) - # df_train, df_test = train_test_split(df, test_size=0.2) - df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False) - colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"} + for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']): + + folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"
"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4) - for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip( - df[lat], - df[long], - df["Arithmetic Mean Q"], - df[dataLoc], - df["City Name"], - df["AQI"], - ): - folium.CircleMarker( - [lat, lon], - radius=0.15 * AQI, - popup=( - "City: " - + str(city).capitalize() - + "
" - #'Bike score: ' + str(bike) + '
' - "Arithmetic_Mean level: " - + str(Arithmetic_Mean) - + "%" - ), - color="b", - key_on=Arithmetic_Mean_Q, - threshold_scale=[0, 1, 2, 3], - fill_color=colordict[Arithmetic_Mean_Q], - fill=True, - fill_opacity=0.7, - ).add_to(Arithmetic_Mean_map) - Arithmetic_Mean_map.save("mymap.html") + my_map4.save("googleMaps.html") + print('your map has been saved') +#Example # df = pd.read_csv('interpolData.csv') # googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean') From 5b48d3156e57af55485e58598092e323fe8c3feb Mon Sep 17 00:00:00 2001 From: Tanvi-Jain01 Date: Fri, 30 Jun 2023 16:25:04 +0530 Subject: [PATCH 5/8] modifying googlemaps #38 --- vayu/googleMaps.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py index 22474b8..97a7e05 100644 --- a/vayu/googleMaps.py +++ b/vayu/googleMaps.py @@ -1,4 +1,4 @@ -def googleMaps(df, lat, long, pollutant, dataLoc): +def googleMaps(df, lat, long, pollutant, date, markersize): """Plots a geographical plot. Plots a folium plot of longitude and latitude points @@ -54,4 +54,4 @@ def googleMaps(df, lat, long, pollutant, date, markersize): #Example # df = pd.read_csv('interpolData.csv') -# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean') +#googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-08-23', 5) From 043b730917c62ba042dd3817cfcbd2ae15e99f1b Mon Sep 17 00:00:00 2001 From: Tanvi-Jain01 Date: Mon, 10 Jul 2023 12:00:18 +0530 Subject: [PATCH 6/8] Commit message: timeplot using plotly and subplot error solved --- vayu/timePlot.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vayu/timePlot.py b/vayu/timePlot.py index 42a6ec5..052091b 100644 --- a/vayu/timePlot.py +++ b/vayu/timePlot.py @@ -1,4 +1,4 @@ -def timePlot(df, year, pollutants=["pm25", "pm10"]): +def timePlot(df, year, pollutants=["pm25"]): """ Plot time series of pollutants for given month and year. @@ -9,8 +9,6 @@ def timePlot(df, year, pollutants=["pm25", "pm10"]): and at least one variable to plot year: str year of which data will be cut - month: int - month of what plot will be graphed pollutants: list column names of pollutatnts to compare """ From 273342d38c18c192880e9c54430de37cb317b072 Mon Sep 17 00:00:00 2001 From: Tanvi-Jain01 Date: Mon, 10 Jul 2023 12:29:24 +0530 Subject: [PATCH 7/8] googleMaps code enhanced and errors solved --- vayu/googleMaps.py | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py index 97a7e05..a06dd4e 100644 --- a/vayu/googleMaps.py +++ b/vayu/googleMaps.py @@ -1,4 +1,4 @@ -def googleMaps(df, lat, long, pollutant, date, markersize): +def googleMaps(df, lat, long, pollutant, date, markersize,zoom): """Plots a geographical plot. Plots a folium plot of longitude and latitude points @@ -20,6 +20,8 @@ def googleMaps(df, lat, long, pollutant, date, markersize): visualizing the pollutant of a specific date. markersize: int The int by which the value of pollutant will be multiplied. + zoom: int + The int by which you want to zoom in the plot """ import folium @@ -29,29 +31,27 @@ def googleMaps(df, lat, long, pollutant, date, markersize): import numpy as np import pandas as pd - def googleMaps(df, lat, long, pollutant, date, markersize): - df1=df - print(date) - df1=df[df['date']==date] - print(df1) - + + df1 = df[df['date'] == date] # ============================================================================= # df = pd.read_csv('interpolData.csv') # ============================================================================= - lat= df1[lat].values[0] - long=df1[long].values[0] - my_map4 = folium.Map(location = [lat, long], zoom_start = 10) + lat= df1[lat].values[0] + long=df1[long].values[0] + my_map4 = folium.Map(location = [lat, long], zoom_start = zoom) - for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']): - - folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"
"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4) + for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']): + folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"
"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4) - my_map4.save("googleMaps.html") - print('your map has been saved') + my_map4.save("googleMaps.html") + print('your map has been saved') + return my_map4 -#Example +#Example: # df = pd.read_csv('interpolData.csv') -#googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-08-23', 5) +# Call the function and display the map in Jupyter Notebook +# map_obj = googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10) +# map_obj From 4087590b6646067b330fdd80d0796f84acfff761 Mon Sep 17 00:00:00 2001 From: Tanvi-Jain01 Date: Mon, 10 Jul 2023 13:54:59 +0530 Subject: [PATCH 8/8] code extended with group and time_period --- vayu/selectByDate.py | 49 ++++++++++++++++++++++++++++++-------------- 1 file changed, 34 insertions(+), 15 deletions(-) diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py index 14aa415..4553238 100644 --- a/vayu/selectByDate.py +++ b/vayu/selectByDate.py @@ -1,28 +1,47 @@ -def selectByDate(df, year): - """ - Utility function to cut given dataframe by the year - and find the average value of each day +def selectByDate(df, year, group=None, time_period='day'): + """ + Utility function to cut a given dataframe by year and find the average value + of each day, month, or year. Optionally, data can be grouped by specified columns. Parameters ---------- df: data frame - a data frame containing a date field + A data frame containing a date field and optional grouping columns. year: type string - a year to select to cut data + A year to select and filter the data. + group: list, optional + A list of columns to group the data by. Default is None (no grouping). + time_period: {'day', 'month', 'year'}, optional + The time period to compute the average value. Default is 'day'. + + Returns + ------- + data frame + A data frame with the average value of each day, month, or year. + If group is specified, the data will be grouped accordingly. """ import pandas as pd import numpy as np - - df.index = pd.to_datetime(df.date) - df = df.drop("date", axis=1) - df_n = df[year].resample("1D").mean() - df_n = df_n.fillna(method="ffill") - df_n["month"] = df_n.index.month - df_n.index.dayofweek - print(df_n) + + df['date'] = pd.to_datetime(df['date']) + df_year = df[df['date'].dt.year == int(year)] + + if group: + df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True) + return df_grouped + + if time_period == 'month': + df_month = df_year.resample('M', on='date').mean(numeric_only=True) + return df_month + elif time_period == 'year': + df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True) + return df_yearly + + df_day = df_year.resample('D', on='date').mean(numeric_only=True) + return df_day # ============================================================================= # df = pd.read_csv("mydata.csv") -# selectByDate(df,'2003') +#selectByDate(df1,'2022',group=['latitude','longitude','station'], time_period='month') # =============================================================================