diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py index 4d9a694..a06dd4e 100644 --- a/vayu/googleMaps.py +++ b/vayu/googleMaps.py @@ -1,4 +1,4 @@ -def googleMaps(df, lat, long, pollutant, dataLoc): +def googleMaps(df, lat, long, pollutant, date, markersize,zoom): """Plots a geographical plot. Plots a folium plot of longitude and latitude points @@ -15,9 +15,13 @@ def googleMaps(df, lat, long, pollutant, dataLoc): long: str Name of column in df of where longitude points are pollutant: str - Name of pollutant - dataLoc: str - Name of df column where pollutanat values are stored + Name of pollutant where values of that pollutant is stored. + date: str + visualizing the pollutant of a specific date. + markersize: int + The int by which the value of pollutant will be multiplied. + zoom: int + The int by which you want to zoom in the plot """ import folium @@ -26,56 +30,28 @@ def googleMaps(df, lat, long, pollutant, dataLoc): import matplotlib.pyplot as plt import numpy as np import pandas as pd - - latitude = 37.0902 - longitude = -95.7129 - Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4) + + + df1 = df[df['date'] == date] # ============================================================================= # df = pd.read_csv('interpolData.csv') # ============================================================================= - some_value = pollutant - df = df.loc[df["Parameter Name"] == some_value] - - some_value = "2018-05-07" - df = df.loc[df["Date Local"] == some_value] - - df = df.sample(frac=1) + lat= df1[lat].values[0] + long=df1[long].values[0] + my_map4 = folium.Map(location = [lat, long], zoom_start = zoom) - # df_train, df_test = train_test_split(df, test_size=0.2) - df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False) - colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"} + for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']): + folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"
"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4) - for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip( - df[lat], - df[long], - df["Arithmetic Mean Q"], - df[dataLoc], - df["City Name"], - df["AQI"], - ): - folium.CircleMarker( - [lat, lon], - radius=0.15 * AQI, - popup=( - "City: " - + str(city).capitalize() - + "
" - #'Bike score: ' + str(bike) + '
' - "Arithmetic_Mean level: " - + str(Arithmetic_Mean) - + "%" - ), - color="b", - key_on=Arithmetic_Mean_Q, - threshold_scale=[0, 1, 2, 3], - fill_color=colordict[Arithmetic_Mean_Q], - fill=True, - fill_opacity=0.7, - ).add_to(Arithmetic_Mean_map) - Arithmetic_Mean_map.save("mymap.html") + my_map4.save("googleMaps.html") + print('your map has been saved') + return my_map4 +#Example: # df = pd.read_csv('interpolData.csv') -# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean') +# Call the function and display the map in Jupyter Notebook +# map_obj = googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10) +# map_obj diff --git a/vayu/scatterPlot.py b/vayu/scatterPlot.py index 7cd2390..229214f 100644 --- a/vayu/scatterPlot.py +++ b/vayu/scatterPlot.py @@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs): import matplotlib.cm as cm from math import pi - pm10 = df.pm10 - o3 = df.o3 - ws = df.ws - wd = df.wd - nox = df.nox - no2 = df.no2 - + ######################################### # converts wind data to randians - df = pd.DataFrame({"speed": ws, "direction": wd}) - df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0) - df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0) + #df1 = pd.DataFrame({"speed": ws, "direction": wd}) + df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0) + df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0) fig, ax = plt.subplots(figsize=(8, 8), dpi=80) x0, x1 = ax.get_xlim() y0, y1 = ax.get_ylim() - ax.set_aspect("equal") - _ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax) + #ax.set_aspect("equal") + _ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax) + plt.show() + #################################### # simple seaborn plot that shows how given variables relate with one another - if x == "nox": - x = nox - elif x == "no2": - x = no2 - elif x == "o3": - x = o3 - elif x == "pm10": - x = pm10 - if y == "nox": - y = nox - elif y == "no2": - y = no2 - elif y == "o3": - y = o3 - elif y == "pm10": - y = pm10 - - sns.jointplot(x=x, y=y, kind="hex") - + sns.jointplot(x=df[x].values, y=df[y].values, kind="hex") + plt.xlabel(x) + plt.ylabel(y) plt.show() - - + + # ============================================================================= # df = pd.read_csv("mydata.csv") # scatterPlot(df,'nox','no2') diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py index 14aa415..a8080c3 100644 --- a/vayu/selectByDate.py +++ b/vayu/selectByDate.py @@ -1,28 +1,48 @@ -def selectByDate(df, year): - """ - Utility function to cut given dataframe by the year - and find the average value of each day +import pandas as pd +import numpy as np + +def select_by(df:pd.Dataframe, year:str, group:list=None, time_period:str='day'): + """ + Utility function to cut a given dataframe by year and find the average value + of each day, month, or year. Optionally, data can be grouped by specified columns. Parameters ---------- df: data frame - a data frame containing a date field + A data frame containing a date field and optional grouping columns. year: type string - a year to select to cut data + A year to select and filter the data. + group: list, optional + A list of columns to group the data by. Default is None (no grouping). + time_period: {'day', 'month', 'year'}, optional + The time period to compute the average value. Default is 'day'. + + Returns + ------- + data frame + A data frame with the average value of each day, month, or year. + If group is specified, the data will be grouped accordingly. """ - import pandas as pd - import numpy as np - - df.index = pd.to_datetime(df.date) - df = df.drop("date", axis=1) - df_n = df[year].resample("1D").mean() - df_n = df_n.fillna(method="ffill") - df_n["month"] = df_n.index.month - df_n.index.dayofweek - print(df_n) + + df['date'] = pd.to_datetime(df['date']) + df_year = df[df['date'].dt.year == int(year)] + + if group: + df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True) + return df_grouped + + if time_period == 'month': + df_month = df_year.resample('M', on='date').mean(numeric_only=True) + return df_month + elif time_period == 'year': + df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True) + return df_yearly + + df_day = df_year.resample('D', on='date').mean(numeric_only=True) + return df_day # ============================================================================= # df = pd.read_csv("mydata.csv") -# selectByDate(df,'2003') +#select_by(df1,'2022',group=['latitude','longitude','station'], time_period='month') # ============================================================================= diff --git a/vayu/summary_plot.py b/vayu/summary_plot.py new file mode 100644 index 0000000..bcc9303 --- /dev/null +++ b/vayu/summary_plot.py @@ -0,0 +1,130 @@ +import datetime as dt +import matplotlib.pyplot as plt +import matplotlib as mpl +import numpy as np +import pandas as pd +from numpy import array +import matplotlib.patches as mpatches +import seaborn as sns +from matplotlib.pyplot import figure + +def summary_plot(df: pd.DataFrame): + """ Plots import summary of data frame given. Plots line plots + and histograms for each polutant as well as statiscs such as + mean,max,min,median, and 95th percentile + + Parameters + ---------- + df: data frame + data frame to be summarised. Must contain a date field + and at least one other parameter + """ + + # Initialize variables + pollutants = ["pm10", "pm25", "sox", "co", "o3", "nox", "pb", "nh3"] + categories = ["s", "m", "h"] + + counts = {pollutant: {category: 0 for category in categories} for pollutant in pollutants} + + + df.index = pd.to_datetime(df.date) + df = df.drop("date", axis=1) + df_all = df.resample("1D") + df_all = df.copy() + df_all = df_all.fillna(method="ffill") + #print(df_all.columns) + + # Calculate counts for each pollutant category + for pollutant in pollutants: + if pollutant in df_all.columns: + column_data = df_all[pollutant] + #print(df_all) + for _, data in column_data.iteritems(): + if pollutant in ["pm10", "pm25"]: + if data < 100: + counts[pollutant]["s"] += 1 + elif data < 250: + counts[pollutant]["m"] += 1 + else: + counts[pollutant]["h"] += 1 + elif pollutant == "co": + if data < 2: + counts[pollutant]["s"] += 1 + elif data < 10: + counts[pollutant]["m"] += 1 + else: + counts[pollutant]["h"] += 1 + elif pollutant == "sox": + if data <= 80: + counts[pollutant]["s"] += 1 + elif data <= 380: + counts[pollutant]["m"] += 1 + else: + counts[pollutant]["h"] += 1 + elif pollutant == "o3": + if data < 100: + counts[pollutant]["s"] += 1 + elif data < 168: + counts[pollutant]["m"] += 1 + else: + counts[pollutant]["h"] += 1 + elif pollutant == "nox": + if data < 80: + counts[pollutant]["s"] += 1 + elif data < 180: + counts[pollutant]["m"] += 1 + else: + counts[pollutant]["h"] += 1 + elif pollutant == "pb": + if data <= 1: + counts[pollutant]["s"] += 1 + elif data <= 2: + counts[pollutant]["m"] += 1 + else: + counts[pollutant]["h"] += 1 + elif pollutant == "nh3": + if data <= 400: + counts[pollutant]["s"] += 1 + elif data <= 800: + counts[pollutant]["m"] += 1 + else: + counts[pollutant]["h"] += 1 + + + + # Plot line, histogram, and pie charts for each pollutant + fig, axes = plt.subplots(len(df_all.columns), 3, figsize=(25,25)) + + for i, pollutant in enumerate(df_all.columns): + ax_line = axes[i, 0] + ax_hist = axes[i, 1] + ax_pie = axes[i, 2] + + df_all[pollutant].plot.line(ax=ax_line, color="gold") + ax_line.axes.get_xaxis().set_visible(False) + ax_line.yaxis.set_label_position("left") + ax_line.set_ylabel(pollutant, fontsize=30, bbox=dict(facecolor="whitesmoke")) + + ax_hist.hist(df_all[pollutant], bins=50, color="green") + + labels = ["Safe", "Moderate", "High"] + sizes = [counts[pollutant][category] for category in categories] + explode = [0, 0, 1] + + ax_pie.pie(sizes, explode=explode, labels=labels, autopct="%1.1f%%", shadow=False, startangle=90) + ax_pie.axis("equal") + + ax_pie.set_xlabel("Statistics") + + print(f"{pollutant}\nmin = {df_all[pollutant].min():.2f}\nmax = {df_all[pollutant].max():.2f}\nmissing = {df_all[pollutant].isna().sum()}\nmean = {df_all[pollutant].mean():.2f}\nmedian = {df_all[pollutant].median():.2f}\n95th percentile = {df_all[pollutant].quantile(0.95):.2f}\n") + + plt.savefig("summary_plot.png", dpi=300, format="png") + plt.show() + print("your plots has also been saved") + plt.close() + + +# ============================================================================= +# df = pd.read_csv('mydata.csv') +# summary_plot(df) +# ============================================================================= diff --git a/vayu/timePlot.py b/vayu/timePlot.py index 4470099..581063a 100644 --- a/vayu/timePlot.py +++ b/vayu/timePlot.py @@ -1,5 +1,10 @@ -def timePlot(df, year, month, - pollutants=["ws", "nox", "o3", "pm25", "pm10"]): +import numpy as np +import pandas as pd +import matplotlib as mpl +import matplotlib.pyplot as plt +import plotly.graph_objects as go + +def time_plot(df:pd.DataFrame, year:str, pollutants:list=["pm25"]): """ Plot time series of pollutants for given month and year. @@ -10,46 +15,56 @@ def timePlot(df, year, month, and at least one variable to plot year: str year of which data will be cut - month: int - month of what plot will be graphed pollutants: list column names of pollutatnts to compare """ - import numpy as np - import pandas as pd - import matplotlib as mpl - import matplotlib.pyplot as plt - - # Cuts the df down to the month specified + + # Cuts the df down to the month specified df.index = pd.to_datetime(df.date) - df = df.drop("date", axis=1) - df_n = df[year] - df_n = df_n.fillna(method="ffill") - df_n["month"] = df_n.index.month - df_n.index.dayofweek - df_n_1 = df_n[df_n.month == month] - # New lists that have the value of the pollutant in the month specified - - color_list = ["red", "blue", "green", "purple", "orange"] - - plt.figure(1) - # series of `len(pollutants)` plots in one large plot that contains the - # time series of the polutants - - axs = [] - - for ix, pollutant in enumerate(pollutants): - values = df_n_1[pollutant] - color = color_list[ix % len(color_list)] - - # plotting - plt.subplot(f"{len(pollutants)}1{ix}") - a = values.plot.line(color=color) - a.axes.get_xaxis().set_visible(False) - a.yaxis.set_label_position("right") - axs.append(a) - plt.ylabel(pollutant) + df_n_1 = df[(df.index.year == int(year))] + #df_n_1 = df[(df.index.month == int(month)) & (df.index.year == int(year))] + + fig = go.Figure() + + for pollutant in pollutants: + if pollutant in df_n_1.columns: + values = df_n_1[pollutant] + + # Add trace for each selected pollutant + fig.add_trace(go.Scatter( + x=values.index, + y=values.values, + name=pollutant + )) + else: + print(f"Warning: {pollutant} data not found.") + + # Configure layout + fig.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, label="1d", step="day", stepmode="backward"), + dict(count=7, label="1w", step="day", stepmode="backward"), + dict(count=1, label="1m", step="month", stepmode="backward"), + dict(count=6, label="6m", step="month", stepmode="backward"), + dict(count=1, label="YTD", step="year", stepmode="todate"), + dict(count=1, label="1y", step="year", stepmode="backward"), + dict(step="all") + ]) + #active=2 + ), + rangeslider=dict( + visible=True + ), + + type="date" + ) + ) + + fig.show() - # making dates visible. - axs[0].axes.get_xaxis().set_visible(True) - return axs +#Example: +#time_plot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on]) +#-------------------- + \ No newline at end of file