From eb56e457e6093b2a084603258a049c3f215fd00a Mon Sep 17 00:00:00 2001
From: Margaux Sleckman <msleckman@usgs.gov>
Date: Wed, 23 Mar 2022 13:01:08 -0700
Subject: [PATCH] cleaned before PR submission

---
 gridmet_aggregation_PRMS.py | 34 ++++++++++++++++++----------------
 gridmet_split_script.py     |  3 +--
 2 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/gridmet_aggregation_PRMS.py b/gridmet_aggregation_PRMS.py
index fcb13e0..c4f7f7b 100644
--- a/gridmet_aggregation_PRMS.py
+++ b/gridmet_aggregation_PRMS.py
@@ -1,8 +1,10 @@
+# libraries
 import geopandas as gpd
-import geopandas.geodataframe
 import xarray as xr
+import time
 
-def ncdf_to_df(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_layer = None):
+# ncdf_to_gdf() function converts ncdf to dataset and merged with shpfile information (geometry + area)
+def ncdf_to_gdf(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_layer = None):
 
     """
     :param str ncdf_path: path to regridded ncdf file (output of g2shp_regridding())
@@ -13,26 +15,23 @@ def ncdf_to_df(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_l
     :return: Geodataframe of the original ncdf file with geospatial information (e.g. geom)
     """
 
-    # Read in
+    ## Read in
     xr_mapped_df = xr.open_dataset(ncdf_path, decode_times=True).to_dataframe().reset_index()
-
-    if isinstance(shp, geopandas.geodataframe.GeoDataFrame):
+    if isinstance(shp, gpd.geodataframe.GeoDataFrame):
         gdf = shp
     elif os.path.isfile(shp):
         gdf_prms_path_edited = shp
         gdf = gpd.read_file(gdf_prms_path_edited, layer=gpkg_layer)
-
     else:
         print('shp must be path to geospatial file or a geodataframe')
 
-    ## Merge & convert to GeoDataFrame
+    ## Merge ncdf w/ shapefile (the shpfile has area info) & convert to GeoDataFrame
     gridmet_drb_df = xr_mapped_df.merge(gdf, how ='left', left_on = left_on, right_index = right_on_index)
-
     gridmet_drb_gdf = gpd.GeoDataFrame(gridmet_drb_df)
 
     return gridmet_drb_gdf
 
-## function #2
+# gridmet_prms_area_avg_agg() function aggregates to groupby_cols scale and outputs the aggregated dataset
 def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_path = None):
     """
     :param pd.DataFrame or gpd.GeoDataFrame df:
@@ -48,11 +47,11 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa
     start = time.perf_counter()
 
     df = df.copy()
-    # Calc value x weight col
+    ## 1. Calc value x weight col
     for col in val_colnames:
            df[f'{col}_x_area'] = df[col]*df[wgt_col]
 
-    # Create group by by suming new value cols weighted by are
+    ## 2. Create group by, by summing new value cols weighted by area
     df_grouped = df.groupby(groupby_cols).agg(area_m2_sum = (wgt_col, 'sum'),
                                               tmmx_wgtd_sum = ('tmmx_x_area', 'sum'),
                                               tmmn_wgtd_sum = ('tmmn_x_area', 'sum'),
@@ -63,7 +62,7 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa
                                               rmin_wgtd_sum = ('rmin_x_area', 'sum'),
                                               sph_wgtd_sum = ('sph_x_area', 'sum')
                                               )
-    # Get weighted average
+    ## 3. Get weighted average
     for col in val_colnames:
         df_grouped[f'{col}_wgtd_avg'] = df_grouped[f'{col}_wgtd_sum'] / df_grouped['area_m2_sum']
 
@@ -76,20 +75,21 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa
 
     end = time.perf_counter()
 
-    print('Time (sec) ellapsed:' + end - start)
+    print('Time (sec) ellapsed:', end - start)
 
     return df_final
 
-# Define variables and run functions
+# Define variables and run
 if __name__ =='__main__':
 
     ## Variable definitions
     gdf_prms_path_edited = './data/GFv1_catchments_edited.gpkg'
     gdf = gpd.read_file(gdf_prms_path_edited, layer = 'GFv1_catchments_edited')
+    gridmet_ncdf = './data/t_climate_2022_03_09.nc'
     data_vars_shrt_all = ['tmmx', 'tmmn', 'pr', 'srad', 'vs', 'rmax', 'rmin', 'sph']
 
     ## Create dataframe and merge with shapefile information
-    gridmet_drb_gdf = ncdf_to_df(ncdf_path='./data/t_climate_2022_03_09.nc',
+    gridmet_drb_gdf = ncdf_to_gdf(ncdf_path=gridmet_ncdf,
                                  shp = gdf,
                                  left_on = 'geomid',
                                  right_on_index = True)
@@ -99,5 +99,7 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa
                                          groupby_cols = ['PRMS_segid',"time"],
                                          val_colnames = data_vars_shrt_all,
                                          wgt_col='hru_area_m2',
-                                         output_path= 'data/gridmet_drb_agg.csv')
+                                         output_path= None)
 
+    ## Uncomment to run
+    # df_agg.reset_index().to_csv('../drb-inland-salinity-ml/1_fetch/in/grdmet_drb_agg_032321.csv', index = False)
\ No newline at end of file
diff --git a/gridmet_split_script.py b/gridmet_split_script.py
index d010172..a7b918a 100644
--- a/gridmet_split_script.py
+++ b/gridmet_split_script.py
@@ -2,7 +2,6 @@
 import pickle
 import time
 import geopandas as gpd
-import geopandas.geodataframe
 import grd2shp_xagg
 import xagg as xa
 import xarray as xr
@@ -26,7 +25,7 @@ def get_gridmet_datasets(variable, start_date, end_date, polygon_for_bbox = None
 
     ## check/define bounds for data slicing
     if polygon_for_bbox is not None:
-        if isinstance(polygon_for_bbox, geopandas.geodataframe.GeoDataFrame):
+        if isinstance(polygon_for_bbox, gpd.geodataframe.GeoDataFrame):
             print('polygon is geodataframe')
             print(polygon_for_bbox.total_bounds)
             pass