sustainability-lab · Tanvi-Jain01 · Jun 30, 2023 · Jun 30, 2023 · Jun 30, 2023 · Jun 30, 2023
diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py
@@ -1,4 +1,4 @@
-def googleMaps(df, lat, long, pollutant, dataLoc):
+def googleMaps(df, lat, long, pollutant, date, markersize,zoom):
     """Plots a geographical plot.
 
     Plots a folium plot of longitude and latitude points 
@@ -15,9 +15,13 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
     long: str
         Name of column in df of where longitude points are
     pollutant: str
-        Name of pollutant 
-    dataLoc: str
-        Name of df column where pollutanat values are stored
+        Name of pollutant where values of that pollutant is stored.
+    date: str
+        visualizing the pollutant of a specific date.
+    markersize: int
+        The int by which the value of pollutant will be multiplied.
+    zoom: int
+        The int by which you want to zoom in the plot
 
     """
     import folium
@@ -26,56 +30,28 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
     import matplotlib.pyplot as plt
     import numpy as np
     import pandas as pd
-
-    latitude = 37.0902
-    longitude = -95.7129
-    Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4)
+
+
+    df1 = df[df['date'] == date]
 
     # =============================================================================
     # df = pd.read_csv('interpolData.csv')
     # =============================================================================
 
-    some_value = pollutant
-    df = df.loc[df["Parameter Name"] == some_value]
-
-    some_value = "2018-05-07"
-    df = df.loc[df["Date Local"] == some_value]
-
-    df = df.sample(frac=1)
+    lat= df1[lat].values[0] 
+    long=df1[long].values[0] 
+    my_map4 = folium.Map(location = [lat, long], zoom_start = zoom)
 
-    # df_train, df_test = train_test_split(df, test_size=0.2)
-    df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False)
-    colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"}
+    for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']):
+        folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"<br>"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4)
 
-    for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip(
-        df[lat],
-        df[long],
-        df["Arithmetic Mean Q"],
-        df[dataLoc],
-        df["City Name"],
-        df["AQI"],
-    ):
-        folium.CircleMarker(
-            [lat, lon],
-            radius=0.15 * AQI,
-            popup=(
-                "City: "
-                + str(city).capitalize()
-                + "<br>"
-                #'Bike score: ' + str(bike) + '<br>'
-                "Arithmetic_Mean level: "
-                + str(Arithmetic_Mean)
-                + "%"
-            ),
-            color="b",
-            key_on=Arithmetic_Mean_Q,
-            threshold_scale=[0, 1, 2, 3],
-            fill_color=colordict[Arithmetic_Mean_Q],
-            fill=True,
-            fill_opacity=0.7,
-        ).add_to(Arithmetic_Mean_map)
-    Arithmetic_Mean_map.save("mymap.html")
+    my_map4.save("googleMaps.html")
+    print('your map has been saved')
+    return my_map4
 
 
+#Example:
 # df = pd.read_csv('interpolData.csv')
-# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean')
+# Call the function and display the map in Jupyter Notebook
+# map_obj = googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10)
+# map_obj
diff --git a/vayu/scatterPlot.py b/vayu/scatterPlot.py
@@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs):
     import matplotlib.cm as cm
     from math import pi
 
-    pm10 = df.pm10
-    o3 = df.o3
-    ws = df.ws
-    wd = df.wd
-    nox = df.nox
-    no2 = df.no2
-
+
     #########################################
     # converts wind data to randians
-    df = pd.DataFrame({"speed": ws, "direction": wd})
-    df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0)
-    df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0)
+    #df1 = pd.DataFrame({"speed": ws, "direction": wd})
+    df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0)
+    df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0)   
     fig, ax = plt.subplots(figsize=(8, 8), dpi=80)
     x0, x1 = ax.get_xlim()
     y0, y1 = ax.get_ylim()
-    ax.set_aspect("equal")
-    _ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax)
+    #ax.set_aspect("equal")
+    _ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax)
+    plt.show()
+
 
     ####################################
     # simple seaborn plot that shows how given variables relate with one another
-    if x == "nox":
-        x = nox
-    elif x == "no2":
-        x = no2
-    elif x == "o3":
-        x = o3
-    elif x == "pm10":
-        x = pm10
-    if y == "nox":
-        y = nox
-    elif y == "no2":
-        y = no2
-    elif y == "o3":
-        y = o3
-    elif y == "pm10":
-        y = pm10
-
-    sns.jointplot(x=x, y=y, kind="hex")
-
+    sns.jointplot(x=df[x].values, y=df[y].values, kind="hex")
+    plt.xlabel(x)
+    plt.ylabel(y)
     plt.show()
-
-
+    
+    
 # =============================================================================
 # df = pd.read_csv("mydata.csv")
 # scatterPlot(df,'nox','no2')

diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py
@@ -1,28 +1,47 @@
-def selectByDate(df, year):
-    """ 
-    Utility function to cut given dataframe by the year 
-    and find the average value of each day 
+def selectByDate(df, year, group=None, time_period='day'):
+    """
+    Utility function to cut a given dataframe by year and find the average value
+    of each day, month, or year. Optionally, data can be grouped by specified columns.
 
     Parameters
     ----------
     df: data frame
-        a data frame containing a date field
+        A data frame containing a date field and optional grouping columns.
     year: type string
-        a year to select to cut data
+        A year to select and filter the data.
+    group: list, optional
+        A list of columns to group the data by. Default is None (no grouping).
+    time_period: {'day', 'month', 'year'}, optional
+        The time period to compute the average value. Default is 'day'.
+
+    Returns
+    -------
+    data frame
+        A data frame with the average value of each day, month, or year.
+        If group is specified, the data will be grouped accordingly.
     """
     import pandas as pd
     import numpy as np
-
-    df.index = pd.to_datetime(df.date)
-    df = df.drop("date", axis=1)
-    df_n = df[year].resample("1D").mean()
-    df_n = df_n.fillna(method="ffill")
-    df_n["month"] = df_n.index.month
-    df_n.index.dayofweek
-    print(df_n)
+
+    df['date'] = pd.to_datetime(df['date'])
+    df_year = df[df['date'].dt.year == int(year)]
+
+    if group:
+        df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True)
+        return df_grouped
+
+    if time_period == 'month':
+        df_month = df_year.resample('M', on='date').mean(numeric_only=True)
+        return df_month
+    elif time_period == 'year':
+        df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True)
+        return df_yearly
+
+    df_day = df_year.resample('D', on='date').mean(numeric_only=True)
+    return df_day
 
 
 # =============================================================================
 # df = pd.read_csv("mydata.csv")
-# selectByDate(df,'2003')
+#selectByDate(df1,'2022',group=['latitude','longitude','station'], time_period='month')
 # =============================================================================
diff --git a/vayu/timePlot.py b/vayu/timePlot.py
@@ -1,5 +1,4 @@
-def timePlot(df, year, month, 
-    pollutants=["ws", "nox", "o3", "pm25", "pm10"]):
+def timePlot(df, year, pollutants=["pm25"]):
     """
     Plot time series of pollutants for given month and year.
 
@@ -10,46 +9,61 @@ def timePlot(df, year, month,
         and at least one variable to plot
     year: str
         year of which data will be cut
-    month: int
-        month of what plot will be graphed
     pollutants: list
         column names of pollutatnts to compare
     """
     import numpy as np
     import pandas as pd
     import matplotlib as mpl
     import matplotlib.pyplot as plt
+    import plotly.graph_objects as go
 
-    # Cuts the df down to the month specified
+     # Cuts the df down to the month specified
     df.index = pd.to_datetime(df.date)
-    df = df.drop("date", axis=1)
-    df_n = df[year]
-    df_n = df_n.fillna(method="ffill")
-    df_n["month"] = df_n.index.month
-    df_n.index.dayofweek
-    df_n_1 = df_n[df_n.month == month]
-    # New lists that have the value of the pollutant in the month specified
-
-    color_list = ["red", "blue", "green", "purple", "orange"]
-
-    plt.figure(1)
-    # series of `len(pollutants)` plots in one large plot that contains the
-    # time series of the polutants
-
-    axs = []
-
-    for ix, pollutant in enumerate(pollutants):
-        values = df_n_1[pollutant]
-        color = color_list[ix % len(color_list)]
-
-        # plotting
-        plt.subplot(f"{len(pollutants)}1{ix}")
-        a = values.plot.line(color=color)
-        a.axes.get_xaxis().set_visible(False)
-        a.yaxis.set_label_position("right")
-        axs.append(a)
-        plt.ylabel(pollutant)
+    df_n_1 = df[(df.index.year == int(year))]
+    #df_n_1 = df[(df.index.month == int(month)) & (df.index.year == int(year))]
+
+    fig = go.Figure()
+
+    for pollutant in pollutants:
+        if pollutant in df_n_1.columns:
+            values = df_n_1[pollutant]
+
+            # Add trace for each selected pollutant
+            fig.add_trace(go.Scatter(
+                x=values.index,
+                y=values.values,
+                name=pollutant
+            ))
+        else:
+            print(f"Warning: {pollutant} data not found.")
+
+    # Configure layout
+    fig.update_layout(
+        xaxis=dict(
+            rangeselector=dict(
+                buttons=list([
+                    dict(count=1, label="1d", step="day", stepmode="backward"),
+                    dict(count=7, label="1w", step="day", stepmode="backward"),
+                    dict(count=1, label="1m", step="month", stepmode="backward"),
+                    dict(count=6, label="6m", step="month", stepmode="backward"),
+                    dict(count=1, label="YTD", step="year", stepmode="todate"),
+                    dict(count=1, label="1y", step="year", stepmode="backward"),
+                    dict(step="all")
+                ])
+                #active=2
+            ),
+            rangeslider=dict(
+                visible=True
+            ),
+
+            type="date"
+        )
+    )
+
+    fig.show()
 
-    # making dates visible.
-    axs[0].axes.get_xaxis().set_visible(True)
-    return axs
+#Example:
+#timePlot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on])
+#--------------------
+