diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py index 4d9a694..a06dd4e 100644 --- a/vayu/googleMaps.py +++ b/vayu/googleMaps.py @@ -1,4 +1,4 @@ -def googleMaps(df, lat, long, pollutant, dataLoc): +def googleMaps(df, lat, long, pollutant, date, markersize,zoom): """Plots a geographical plot. Plots a folium plot of longitude and latitude points @@ -15,9 +15,13 @@ def googleMaps(df, lat, long, pollutant, dataLoc): long: str Name of column in df of where longitude points are pollutant: str - Name of pollutant - dataLoc: str - Name of df column where pollutanat values are stored + Name of pollutant where values of that pollutant is stored. + date: str + visualizing the pollutant of a specific date. + markersize: int + The int by which the value of pollutant will be multiplied. + zoom: int + The int by which you want to zoom in the plot """ import folium @@ -26,56 +30,28 @@ def googleMaps(df, lat, long, pollutant, dataLoc): import matplotlib.pyplot as plt import numpy as np import pandas as pd - - latitude = 37.0902 - longitude = -95.7129 - Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4) + + + df1 = df[df['date'] == date] # ============================================================================= # df = pd.read_csv('interpolData.csv') # ============================================================================= - some_value = pollutant - df = df.loc[df["Parameter Name"] == some_value] - - some_value = "2018-05-07" - df = df.loc[df["Date Local"] == some_value] - - df = df.sample(frac=1) + lat= df1[lat].values[0] + long=df1[long].values[0] + my_map4 = folium.Map(location = [lat, long], zoom_start = zoom) - # df_train, df_test = train_test_split(df, test_size=0.2) - df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False) - colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"} + for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']): + folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"
"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4) - for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip( - df[lat], - df[long], - df["Arithmetic Mean Q"], - df[dataLoc], - df["City Name"], - df["AQI"], - ): - folium.CircleMarker( - [lat, lon], - radius=0.15 * AQI, - popup=( - "City: " - + str(city).capitalize() - + "
" - #'Bike score: ' + str(bike) + '
' - "Arithmetic_Mean level: " - + str(Arithmetic_Mean) - + "%" - ), - color="b", - key_on=Arithmetic_Mean_Q, - threshold_scale=[0, 1, 2, 3], - fill_color=colordict[Arithmetic_Mean_Q], - fill=True, - fill_opacity=0.7, - ).add_to(Arithmetic_Mean_map) - Arithmetic_Mean_map.save("mymap.html") + my_map4.save("googleMaps.html") + print('your map has been saved') + return my_map4 +#Example: # df = pd.read_csv('interpolData.csv') -# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean') +# Call the function and display the map in Jupyter Notebook +# map_obj = googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10) +# map_obj diff --git a/vayu/scatterPlot.py b/vayu/scatterPlot.py index 7cd2390..229214f 100644 --- a/vayu/scatterPlot.py +++ b/vayu/scatterPlot.py @@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs): import matplotlib.cm as cm from math import pi - pm10 = df.pm10 - o3 = df.o3 - ws = df.ws - wd = df.wd - nox = df.nox - no2 = df.no2 - + ######################################### # converts wind data to randians - df = pd.DataFrame({"speed": ws, "direction": wd}) - df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0) - df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0) + #df1 = pd.DataFrame({"speed": ws, "direction": wd}) + df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0) + df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0) fig, ax = plt.subplots(figsize=(8, 8), dpi=80) x0, x1 = ax.get_xlim() y0, y1 = ax.get_ylim() - ax.set_aspect("equal") - _ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax) + #ax.set_aspect("equal") + _ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax) + plt.show() + #################################### # simple seaborn plot that shows how given variables relate with one another - if x == "nox": - x = nox - elif x == "no2": - x = no2 - elif x == "o3": - x = o3 - elif x == "pm10": - x = pm10 - if y == "nox": - y = nox - elif y == "no2": - y = no2 - elif y == "o3": - y = o3 - elif y == "pm10": - y = pm10 - - sns.jointplot(x=x, y=y, kind="hex") - + sns.jointplot(x=df[x].values, y=df[y].values, kind="hex") + plt.xlabel(x) + plt.ylabel(y) plt.show() - - + + # ============================================================================= # df = pd.read_csv("mydata.csv") # scatterPlot(df,'nox','no2') diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py index 14aa415..4553238 100644 --- a/vayu/selectByDate.py +++ b/vayu/selectByDate.py @@ -1,28 +1,47 @@ -def selectByDate(df, year): - """ - Utility function to cut given dataframe by the year - and find the average value of each day +def selectByDate(df, year, group=None, time_period='day'): + """ + Utility function to cut a given dataframe by year and find the average value + of each day, month, or year. Optionally, data can be grouped by specified columns. Parameters ---------- df: data frame - a data frame containing a date field + A data frame containing a date field and optional grouping columns. year: type string - a year to select to cut data + A year to select and filter the data. + group: list, optional + A list of columns to group the data by. Default is None (no grouping). + time_period: {'day', 'month', 'year'}, optional + The time period to compute the average value. Default is 'day'. + + Returns + ------- + data frame + A data frame with the average value of each day, month, or year. + If group is specified, the data will be grouped accordingly. """ import pandas as pd import numpy as np - - df.index = pd.to_datetime(df.date) - df = df.drop("date", axis=1) - df_n = df[year].resample("1D").mean() - df_n = df_n.fillna(method="ffill") - df_n["month"] = df_n.index.month - df_n.index.dayofweek - print(df_n) + + df['date'] = pd.to_datetime(df['date']) + df_year = df[df['date'].dt.year == int(year)] + + if group: + df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True) + return df_grouped + + if time_period == 'month': + df_month = df_year.resample('M', on='date').mean(numeric_only=True) + return df_month + elif time_period == 'year': + df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True) + return df_yearly + + df_day = df_year.resample('D', on='date').mean(numeric_only=True) + return df_day # ============================================================================= # df = pd.read_csv("mydata.csv") -# selectByDate(df,'2003') +#selectByDate(df1,'2022',group=['latitude','longitude','station'], time_period='month') # ============================================================================= diff --git a/vayu/timePlot.py b/vayu/timePlot.py index 4470099..052091b 100644 --- a/vayu/timePlot.py +++ b/vayu/timePlot.py @@ -1,5 +1,4 @@ -def timePlot(df, year, month, - pollutants=["ws", "nox", "o3", "pm25", "pm10"]): +def timePlot(df, year, pollutants=["pm25"]): """ Plot time series of pollutants for given month and year. @@ -10,8 +9,6 @@ def timePlot(df, year, month, and at least one variable to plot year: str year of which data will be cut - month: int - month of what plot will be graphed pollutants: list column names of pollutatnts to compare """ @@ -19,37 +16,54 @@ def timePlot(df, year, month, import pandas as pd import matplotlib as mpl import matplotlib.pyplot as plt + import plotly.graph_objects as go - # Cuts the df down to the month specified + # Cuts the df down to the month specified df.index = pd.to_datetime(df.date) - df = df.drop("date", axis=1) - df_n = df[year] - df_n = df_n.fillna(method="ffill") - df_n["month"] = df_n.index.month - df_n.index.dayofweek - df_n_1 = df_n[df_n.month == month] - # New lists that have the value of the pollutant in the month specified - - color_list = ["red", "blue", "green", "purple", "orange"] - - plt.figure(1) - # series of `len(pollutants)` plots in one large plot that contains the - # time series of the polutants - - axs = [] - - for ix, pollutant in enumerate(pollutants): - values = df_n_1[pollutant] - color = color_list[ix % len(color_list)] - - # plotting - plt.subplot(f"{len(pollutants)}1{ix}") - a = values.plot.line(color=color) - a.axes.get_xaxis().set_visible(False) - a.yaxis.set_label_position("right") - axs.append(a) - plt.ylabel(pollutant) + df_n_1 = df[(df.index.year == int(year))] + #df_n_1 = df[(df.index.month == int(month)) & (df.index.year == int(year))] + + fig = go.Figure() + + for pollutant in pollutants: + if pollutant in df_n_1.columns: + values = df_n_1[pollutant] + + # Add trace for each selected pollutant + fig.add_trace(go.Scatter( + x=values.index, + y=values.values, + name=pollutant + )) + else: + print(f"Warning: {pollutant} data not found.") + + # Configure layout + fig.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, label="1d", step="day", stepmode="backward"), + dict(count=7, label="1w", step="day", stepmode="backward"), + dict(count=1, label="1m", step="month", stepmode="backward"), + dict(count=6, label="6m", step="month", stepmode="backward"), + dict(count=1, label="YTD", step="year", stepmode="todate"), + dict(count=1, label="1y", step="year", stepmode="backward"), + dict(step="all") + ]) + #active=2 + ), + rangeslider=dict( + visible=True + ), + + type="date" + ) + ) + + fig.show() - # making dates visible. - axs[0].axes.get_xaxis().set_visible(True) - return axs +#Example: +#timePlot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on]) +#-------------------- + \ No newline at end of file