From 03f403ac26975be604610c93378099174fafec23 Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Fri, 30 Jun 2023 08:59:09 +0530
Subject: [PATCH 01/11] enhanced code  of scatterPlot(refer issue #43)

---
 vayu/scatterPlot.py | 46 +++++++++++++--------------------------------
 1 file changed, 13 insertions(+), 33 deletions(-)

diff --git a/vayu/scatterPlot.py b/vayu/scatterPlot.py
index 7cd2390..229214f 100644
--- a/vayu/scatterPlot.py
+++ b/vayu/scatterPlot.py
@@ -19,48 +19,28 @@ def scatterPlot(df, x, y, **kwargs):
     import matplotlib.cm as cm
     from math import pi
 
-    pm10 = df.pm10
-    o3 = df.o3
-    ws = df.ws
-    wd = df.wd
-    nox = df.nox
-    no2 = df.no2
-
+   
     #########################################
     # converts wind data to randians
-    df = pd.DataFrame({"speed": ws, "direction": wd})
-    df["speed_x"] = df["speed"] * np.sin(df["direction"] * pi / 180.0)
-    df["speed_y"] = df["speed"] * np.cos(df["direction"] * pi / 180.0)
+    #df1 = pd.DataFrame({"speed": ws, "direction": wd})
+    df["speed"+str(x)] = df['ws'] * np.sin(df['wd'] * pi / 180.0)
+    df["speed"+str(y)] = df['ws'] * np.cos(df['wd'] * pi / 180.0)   
     fig, ax = plt.subplots(figsize=(8, 8), dpi=80)
     x0, x1 = ax.get_xlim()
     y0, y1 = ax.get_ylim()
-    ax.set_aspect("equal")
-    _ = df.plot(kind="scatter", x="speed_x", y="speed_y", alpha=0.35, ax=ax)
+    #ax.set_aspect("equal")
+    _ = df.plot(kind="scatter", x="speed"+str(x), y="speed"+str(y), alpha=0.35, ax=ax)
+    plt.show()
+    
 
     ####################################
     # simple seaborn plot that shows how given variables relate with one another
-    if x == "nox":
-        x = nox
-    elif x == "no2":
-        x = no2
-    elif x == "o3":
-        x = o3
-    elif x == "pm10":
-        x = pm10
-    if y == "nox":
-        y = nox
-    elif y == "no2":
-        y = no2
-    elif y == "o3":
-        y = o3
-    elif y == "pm10":
-        y = pm10
-
-    sns.jointplot(x=x, y=y, kind="hex")
-
+    sns.jointplot(x=df[x].values, y=df[y].values, kind="hex")
+    plt.xlabel(x)
+    plt.ylabel(y)
     plt.show()
-
-
+    
+    
 # =============================================================================
 # df = pd.read_csv("mydata.csv")
 # scatterPlot(df,'nox','no2')

From a2ea7c73c045beea12cc074cb8952467e17884ce Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Fri, 30 Jun 2023 15:34:52 +0530
Subject: [PATCH 02/11] timplot: modifying plots using plotly and

---
 vayu/timePlot.py | 84 ++++++++++++++++++++++++++++--------------------
 1 file changed, 50 insertions(+), 34 deletions(-)

diff --git a/vayu/timePlot.py b/vayu/timePlot.py
index 4470099..7ab87db 100644
--- a/vayu/timePlot.py
+++ b/vayu/timePlot.py
@@ -1,5 +1,4 @@
-def timePlot(df, year, month, 
-    pollutants=["ws", "nox", "o3", "pm25", "pm10"]):
+def timePlot(df, year, pollutants=["pm25", "pm10"]):
     """
     Plot time series of pollutants for given month and year.
         
@@ -19,37 +18,54 @@ def timePlot(df, year, month,
     import pandas as pd
     import matplotlib as mpl
     import matplotlib.pyplot as plt
-
-    # Cuts the df down to the month specified
+    import plotly.graph_objects as go
+    
+     # Cuts the df down to the month specified
     df.index = pd.to_datetime(df.date)
-    df = df.drop("date", axis=1)
-    df_n = df[year]
-    df_n = df_n.fillna(method="ffill")
-    df_n["month"] = df_n.index.month
-    df_n.index.dayofweek
-    df_n_1 = df_n[df_n.month == month]
-    # New lists that have the value of the pollutant in the month specified
-
-    color_list = ["red", "blue", "green", "purple", "orange"]
-
-    plt.figure(1)
-    # series of `len(pollutants)` plots in one large plot that contains the
-    # time series of the polutants
-
-    axs = []
-
-    for ix, pollutant in enumerate(pollutants):
-        values = df_n_1[pollutant]
-        color = color_list[ix % len(color_list)]
-
-        # plotting
-        plt.subplot(f"{len(pollutants)}1{ix}")
-        a = values.plot.line(color=color)
-        a.axes.get_xaxis().set_visible(False)
-        a.yaxis.set_label_position("right")
-        axs.append(a)
-        plt.ylabel(pollutant)
+    df_n_1 = df[(df.index.year == int(year))]
+    #df_n_1 = df[(df.index.month == int(month)) & (df.index.year == int(year))]
+    
+    fig = go.Figure()
+    
+    for pollutant in pollutants:
+        if pollutant in df_n_1.columns:
+            values = df_n_1[pollutant]
+            
+            # Add trace for each selected pollutant
+            fig.add_trace(go.Scatter(
+                x=values.index,
+                y=values.values,
+                name=pollutant
+            ))
+        else:
+            print(f"Warning: {pollutant} data not found.")
+        
+    # Configure layout
+    fig.update_layout(
+        xaxis=dict(
+            rangeselector=dict(
+                buttons=list([
+                    dict(count=1, label="1d", step="day", stepmode="backward"),
+                    dict(count=7, label="1w", step="day", stepmode="backward"),
+                    dict(count=1, label="1m", step="month", stepmode="backward"),
+                    dict(count=6, label="6m", step="month", stepmode="backward"),
+                    dict(count=1, label="YTD", step="year", stepmode="todate"),
+                    dict(count=1, label="1y", step="year", stepmode="backward"),
+                    dict(step="all")
+                ])
+                #active=2
+            ),
+            rangeslider=dict(
+                visible=True
+            ),
+            
+            type="date"
+        )
+    )
+    
+    fig.show()
 
-    # making dates visible.
-    axs[0].axes.get_xaxis().set_visible(True)
-    return axs
+#Example:
+#timePlot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on])
+#--------------------
+    
\ No newline at end of file

From d7d2e6bdfaacfa2b4e12b772fecaa701403eb497 Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Fri, 30 Jun 2023 15:39:37 +0530
Subject: [PATCH 03/11] Adding visualization using plotly

---
 vayu/timePlot.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/vayu/timePlot.py b/vayu/timePlot.py
index 7ab87db..42a6ec5 100644
--- a/vayu/timePlot.py
+++ b/vayu/timePlot.py
@@ -19,7 +19,7 @@ def timePlot(df, year, pollutants=["pm25", "pm10"]):
     import matplotlib as mpl
     import matplotlib.pyplot as plt
     import plotly.graph_objects as go
-    
+
      # Cuts the df down to the month specified
     df.index = pd.to_datetime(df.date)
     df_n_1 = df[(df.index.year == int(year))]

From fd7c45cb673539d28898d679e6a10aea732339ee Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Fri, 30 Jun 2023 16:20:19 +0530
Subject: [PATCH 04/11] modifying the code of googleMaps

---
 vayu/googleMaps.py | 66 +++++++++++++++-------------------------------
 1 file changed, 21 insertions(+), 45 deletions(-)

diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py
index 4d9a694..22474b8 100644
--- a/vayu/googleMaps.py
+++ b/vayu/googleMaps.py
@@ -15,9 +15,11 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
     long: str
         Name of column in df of where longitude points are
     pollutant: str
-        Name of pollutant 
-    dataLoc: str
-        Name of df column where pollutanat values are stored
+        Name of pollutant where values of that pollutant is stored.
+    date: str
+        visualizing the pollutant of a specific date.
+    markersize: int
+        The int by which the value of pollutant will be multiplied.
 
     """
     import folium
@@ -26,56 +28,30 @@ def googleMaps(df, lat, long, pollutant, dataLoc):
     import matplotlib.pyplot as plt
     import numpy as np
     import pandas as pd
-
-    latitude = 37.0902
-    longitude = -95.7129
-    Arithmetic_Mean_map = folium.Map(location=[latitude, longitude], zoom_start=4)
+    
+    def googleMaps(df, lat, long, pollutant, date, markersize):
+        df1=df
+        print(date)
+        df1=df[df['date']==date]
+        print(df1)
+    
 
     # =============================================================================
     # df = pd.read_csv('interpolData.csv')
     # =============================================================================
 
-    some_value = pollutant
-    df = df.loc[df["Parameter Name"] == some_value]
-
-    some_value = "2018-05-07"
-    df = df.loc[df["Date Local"] == some_value]
-
-    df = df.sample(frac=1)
+        lat= df1[lat].values[0] 
+        long=df1[long].values[0] 
+        my_map4 = folium.Map(location = [lat, long], zoom_start = 10)
 
-    # df_train, df_test = train_test_split(df, test_size=0.2)
-    df["Arithmetic Mean Q"] = pd.qcut(df[dataLoc], 4, labels=False)
-    colordict = {0: "lightblue", 1: "lightgreen", 2: "orange", 3: "red"}
+        for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']):
+            
+            folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"<br>"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4)
 
-    for lat, lon, Arithmetic_Mean_Q, Arithmetic_Mean, city, AQI in zip(
-        df[lat],
-        df[long],
-        df["Arithmetic Mean Q"],
-        df[dataLoc],
-        df["City Name"],
-        df["AQI"],
-    ):
-        folium.CircleMarker(
-            [lat, lon],
-            radius=0.15 * AQI,
-            popup=(
-                "City: "
-                + str(city).capitalize()
-                + "<br>"
-                #'Bike score: ' + str(bike) + '<br>'
-                "Arithmetic_Mean level: "
-                + str(Arithmetic_Mean)
-                + "%"
-            ),
-            color="b",
-            key_on=Arithmetic_Mean_Q,
-            threshold_scale=[0, 1, 2, 3],
-            fill_color=colordict[Arithmetic_Mean_Q],
-            fill=True,
-            fill_opacity=0.7,
-        ).add_to(Arithmetic_Mean_map)
-    Arithmetic_Mean_map.save("mymap.html")
+        my_map4.save("googleMaps.html")
+        print('your map has been saved')
 
 
+#Example
 # df = pd.read_csv('interpolData.csv')
 # googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean')

From 5b48d3156e57af55485e58598092e323fe8c3feb Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Fri, 30 Jun 2023 16:25:04 +0530
Subject: [PATCH 05/11] modifying googlemaps #38

---
 vayu/googleMaps.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py
index 22474b8..97a7e05 100644
--- a/vayu/googleMaps.py
+++ b/vayu/googleMaps.py
@@ -1,4 +1,4 @@
-def googleMaps(df, lat, long, pollutant, dataLoc):
+def googleMaps(df, lat, long, pollutant, date, markersize):
     """Plots a geographical plot.
 
     Plots a folium plot of longitude and latitude points 
@@ -54,4 +54,4 @@ def googleMaps(df, lat, long, pollutant, date, markersize):
 
 #Example
 # df = pd.read_csv('interpolData.csv')
-# googleMaps(df,'Latitude','Longitude','Ozone','Arithmetic Mean')
+#googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-08-23', 5)

From 043b730917c62ba042dd3817cfcbd2ae15e99f1b Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Mon, 10 Jul 2023 12:00:18 +0530
Subject: [PATCH 06/11] Commit message: timeplot using plotly and subplot error
 solved

---
 vayu/timePlot.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/vayu/timePlot.py b/vayu/timePlot.py
index 42a6ec5..052091b 100644
--- a/vayu/timePlot.py
+++ b/vayu/timePlot.py
@@ -1,4 +1,4 @@
-def timePlot(df, year, pollutants=["pm25", "pm10"]):
+def timePlot(df, year, pollutants=["pm25"]):
     """
     Plot time series of pollutants for given month and year.
         
@@ -9,8 +9,6 @@ def timePlot(df, year, pollutants=["pm25", "pm10"]):
         and at least one variable to plot
     year: str
         year of which data will be cut
-    month: int
-        month of what plot will be graphed
     pollutants: list
         column names of pollutatnts to compare
     """

From 273342d38c18c192880e9c54430de37cb317b072 Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Mon, 10 Jul 2023 12:29:24 +0530
Subject: [PATCH 07/11] googleMaps code enhanced and errors solved

---
 vayu/googleMaps.py | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/vayu/googleMaps.py b/vayu/googleMaps.py
index 97a7e05..a06dd4e 100644
--- a/vayu/googleMaps.py
+++ b/vayu/googleMaps.py
@@ -1,4 +1,4 @@
-def googleMaps(df, lat, long, pollutant, date, markersize):
+def googleMaps(df, lat, long, pollutant, date, markersize,zoom):
     """Plots a geographical plot.
 
     Plots a folium plot of longitude and latitude points 
@@ -20,6 +20,8 @@ def googleMaps(df, lat, long, pollutant, date, markersize):
         visualizing the pollutant of a specific date.
     markersize: int
         The int by which the value of pollutant will be multiplied.
+    zoom: int
+        The int by which you want to zoom in the plot
 
     """
     import folium
@@ -29,29 +31,27 @@ def googleMaps(df, lat, long, pollutant, date, markersize):
     import numpy as np
     import pandas as pd
     
-    def googleMaps(df, lat, long, pollutant, date, markersize):
-        df1=df
-        print(date)
-        df1=df[df['date']==date]
-        print(df1)
-    
+   
+    df1 = df[df['date'] == date]
 
     # =============================================================================
     # df = pd.read_csv('interpolData.csv')
     # =============================================================================
 
-        lat= df1[lat].values[0] 
-        long=df1[long].values[0] 
-        my_map4 = folium.Map(location = [lat, long], zoom_start = 10)
+    lat= df1[lat].values[0] 
+    long=df1[long].values[0] 
+    my_map4 = folium.Map(location = [lat, long], zoom_start = zoom)
 
-        for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']):
-            
-            folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"<br>"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4)
+    for lat,long,pol,st in zip(df['latitude'],df['longitude'],df[pollutant],df['station']):
+        folium.CircleMarker([lat, long],radius=markersize * pol, popup=(str(st).capitalize()+"<br>"+ str(round(pol, 3))), fill=True, fill_opacity=0.7, color = 'red').add_to(my_map4)
 
-        my_map4.save("googleMaps.html")
-        print('your map has been saved')
+    my_map4.save("googleMaps.html")
+    print('your map has been saved')
+    return my_map4
 
 
-#Example
+#Example:
 # df = pd.read_csv('interpolData.csv')
-#googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-08-23', 5)
+# Call the function and display the map in Jupyter Notebook
+# map_obj = googleMaps(df, 'latitude', 'longitude', 'pm25', '2022-02-23', 5,10)
+# map_obj

From 4087590b6646067b330fdd80d0796f84acfff761 Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Mon, 10 Jul 2023 13:54:59 +0530
Subject: [PATCH 08/11] code extended with group and time_period

---
 vayu/selectByDate.py | 49 ++++++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 15 deletions(-)

diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py
index 14aa415..4553238 100644
--- a/vayu/selectByDate.py
+++ b/vayu/selectByDate.py
@@ -1,28 +1,47 @@
-def selectByDate(df, year):
-    """ 
-    Utility function to cut given dataframe by the year 
-    and find the average value of each day 
+def selectByDate(df, year, group=None, time_period='day'):
+    """
+    Utility function to cut a given dataframe by year and find the average value
+    of each day, month, or year. Optionally, data can be grouped by specified columns.
     
     Parameters
     ----------
     df: data frame
-        a data frame containing a date field
+        A data frame containing a date field and optional grouping columns.
     year: type string
-        a year to select to cut data
+        A year to select and filter the data.
+    group: list, optional
+        A list of columns to group the data by. Default is None (no grouping).
+    time_period: {'day', 'month', 'year'}, optional
+        The time period to compute the average value. Default is 'day'.
+    
+    Returns
+    -------
+    data frame
+        A data frame with the average value of each day, month, or year.
+        If group is specified, the data will be grouped accordingly.
     """
     import pandas as pd
     import numpy as np
-
-    df.index = pd.to_datetime(df.date)
-    df = df.drop("date", axis=1)
-    df_n = df[year].resample("1D").mean()
-    df_n = df_n.fillna(method="ffill")
-    df_n["month"] = df_n.index.month
-    df_n.index.dayofweek
-    print(df_n)
+    
+    df['date'] = pd.to_datetime(df['date'])
+    df_year = df[df['date'].dt.year == int(year)]
+    
+    if group:
+        df_grouped = df_year.groupby(group).resample(time_period[0], on='date').mean(numeric_only=True)
+        return df_grouped
+    
+    if time_period == 'month':
+        df_month = df_year.resample('M', on='date').mean(numeric_only=True)
+        return df_month
+    elif time_period == 'year':
+        df_yearly = df_year.resample('Y', on='date').mean(numeric_only=True)
+        return df_yearly
+    
+    df_day = df_year.resample('D', on='date').mean(numeric_only=True)
+    return df_day
 
 
 # =============================================================================
 # df = pd.read_csv("mydata.csv")
-# selectByDate(df,'2003')
+#selectByDate(df1,'2022',group=['latitude','longitude','station'], time_period='month')
 # =============================================================================

From b53edd15e0e671b6f57d5b3b0f13c5bcdff91a2f Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Mon, 10 Jul 2023 14:29:31 +0530
Subject: [PATCH 09/11] applied typehints and camelcase

---
 vayu/summary_plot.py | 130 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 130 insertions(+)
 create mode 100644 vayu/summary_plot.py

diff --git a/vayu/summary_plot.py b/vayu/summary_plot.py
new file mode 100644
index 0000000..bcc9303
--- /dev/null
+++ b/vayu/summary_plot.py
@@ -0,0 +1,130 @@
+import datetime as dt
+import matplotlib.pyplot as plt
+import matplotlib as mpl
+import numpy as np
+import pandas as pd
+from numpy import array
+import matplotlib.patches as mpatches
+import seaborn as sns
+from matplotlib.pyplot import figure
+
+def summary_plot(df: pd.DataFrame):
+    """ Plots import summary of data frame given. Plots line plots
+        and histograms for each polutant as well as statiscs such as 
+        mean,max,min,median, and 95th percentile
+        
+        Parameters
+        ----------
+        df: data frame
+            data frame to be summarised. Must contain a date field
+            and at least one other parameter 
+    """
+   
+    # Initialize variables
+    pollutants = ["pm10", "pm25", "sox", "co", "o3", "nox", "pb", "nh3"]
+    categories = ["s", "m", "h"]
+
+    counts = {pollutant: {category: 0 for category in categories} for pollutant in pollutants}
+
+    
+    df.index = pd.to_datetime(df.date)
+    df = df.drop("date", axis=1)
+    df_all = df.resample("1D")
+    df_all = df.copy()
+    df_all = df_all.fillna(method="ffill")
+    #print(df_all.columns)
+
+    # Calculate counts for each pollutant category
+    for pollutant in pollutants:
+        if pollutant in df_all.columns:
+            column_data = df_all[pollutant]
+            #print(df_all)
+            for _, data in column_data.iteritems():
+                if pollutant in ["pm10", "pm25"]:
+                    if data < 100:
+                        counts[pollutant]["s"] += 1
+                    elif data < 250:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "co":
+                    if data < 2:
+                        counts[pollutant]["s"] += 1
+                    elif data < 10:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "sox":
+                    if data <= 80:
+                        counts[pollutant]["s"] += 1
+                    elif data <= 380:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "o3":
+                    if data < 100:
+                        counts[pollutant]["s"] += 1
+                    elif data < 168:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "nox":
+                    if data < 80:
+                        counts[pollutant]["s"] += 1
+                    elif data < 180:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "pb":
+                    if data <= 1:
+                        counts[pollutant]["s"] += 1
+                    elif data <= 2:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+                elif pollutant == "nh3":
+                    if data <= 400:
+                        counts[pollutant]["s"] += 1
+                    elif data <= 800:
+                        counts[pollutant]["m"] += 1
+                    else:
+                        counts[pollutant]["h"] += 1
+         
+                
+
+    # Plot line, histogram, and pie charts for each pollutant
+    fig, axes = plt.subplots(len(df_all.columns), 3, figsize=(25,25))
+
+    for i, pollutant in enumerate(df_all.columns):
+        ax_line = axes[i, 0]
+        ax_hist = axes[i, 1]
+        ax_pie = axes[i, 2]
+
+        df_all[pollutant].plot.line(ax=ax_line, color="gold")
+        ax_line.axes.get_xaxis().set_visible(False)
+        ax_line.yaxis.set_label_position("left")
+        ax_line.set_ylabel(pollutant, fontsize=30, bbox=dict(facecolor="whitesmoke"))
+
+        ax_hist.hist(df_all[pollutant], bins=50, color="green")
+
+        labels = ["Safe", "Moderate", "High"]
+        sizes = [counts[pollutant][category] for category in categories]
+        explode = [0, 0, 1]
+
+        ax_pie.pie(sizes, explode=explode, labels=labels, autopct="%1.1f%%", shadow=False, startangle=90)
+        ax_pie.axis("equal")
+
+        ax_pie.set_xlabel("Statistics")
+      
+        print(f"{pollutant}\nmin = {df_all[pollutant].min():.2f}\nmax = {df_all[pollutant].max():.2f}\nmissing = {df_all[pollutant].isna().sum()}\nmean = {df_all[pollutant].mean():.2f}\nmedian = {df_all[pollutant].median():.2f}\n95th percentile = {df_all[pollutant].quantile(0.95):.2f}\n")
+
+    plt.savefig("summary_plot.png", dpi=300, format="png")
+    plt.show()
+    print("your plots has also been saved")
+    plt.close()
+
+
+# =============================================================================
+# df = pd.read_csv('mydata.csv')
+# summary_plot(df)
+# =============================================================================

From 9ef916ec188d7361a414a159b6055c252bea2e8c Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Mon, 10 Jul 2023 14:58:45 +0530
Subject: [PATCH 10/11] added typehints and snake_case

---
 vayu/timePlot.py | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/vayu/timePlot.py b/vayu/timePlot.py
index 052091b..581063a 100644
--- a/vayu/timePlot.py
+++ b/vayu/timePlot.py
@@ -1,4 +1,10 @@
-def timePlot(df, year, pollutants=["pm25"]):
+import numpy as np
+import pandas as pd
+import matplotlib as mpl
+import matplotlib.pyplot as plt
+import plotly.graph_objects as go
+
+def time_plot(df:pd.DataFrame, year:str, pollutants:list=["pm25"]):
     """
     Plot time series of pollutants for given month and year.
         
@@ -12,12 +18,7 @@ def timePlot(df, year, pollutants=["pm25"]):
     pollutants: list
         column names of pollutatnts to compare
     """
-    import numpy as np
-    import pandas as pd
-    import matplotlib as mpl
-    import matplotlib.pyplot as plt
-    import plotly.graph_objects as go
-
+    
      # Cuts the df down to the month specified
     df.index = pd.to_datetime(df.date)
     df_n_1 = df[(df.index.year == int(year))]
@@ -64,6 +65,6 @@ def timePlot(df, year, pollutants=["pm25"]):
     fig.show()
 
 #Example:
-#timePlot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on])
+#time_plot(df, 2022, pollutants=['pm25','pm10','ws','wd'...and so on])
 #--------------------
     
\ No newline at end of file

From 4e556943e6e6eb4f3810a2f96c944559dd820fef Mon Sep 17 00:00:00 2001
From: Tanvi-Jain01 <tanvij.0072@gmail.com>
Date: Mon, 10 Jul 2023 15:12:10 +0530
Subject: [PATCH 11/11] typehints, snake_case

---
 vayu/selectByDate.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/vayu/selectByDate.py b/vayu/selectByDate.py
index 4553238..a8080c3 100644
--- a/vayu/selectByDate.py
+++ b/vayu/selectByDate.py
@@ -1,4 +1,7 @@
-def selectByDate(df, year, group=None, time_period='day'):
+import pandas as pd
+import numpy as np
+    
+def select_by(df:pd.Dataframe, year:str, group:list=None, time_period:str='day'):
     """
     Utility function to cut a given dataframe by year and find the average value
     of each day, month, or year. Optionally, data can be grouped by specified columns.
@@ -20,8 +23,6 @@ def selectByDate(df, year, group=None, time_period='day'):
         A data frame with the average value of each day, month, or year.
         If group is specified, the data will be grouped accordingly.
     """
-    import pandas as pd
-    import numpy as np
     
     df['date'] = pd.to_datetime(df['date'])
     df_year = df[df['date'].dt.year == int(year)]
@@ -43,5 +44,5 @@ def selectByDate(df, year, group=None, time_period='day'):
 
 # =============================================================================
 # df = pd.read_csv("mydata.csv")
-#selectByDate(df1,'2022',group=['latitude','longitude','station'], time_period='month')
+#select_by(df1,'2022',group=['latitude','longitude','station'], time_period='month')
 # =============================================================================