From eb56e457e6093b2a084603258a049c3f215fd00a Mon Sep 17 00:00:00 2001 From: Margaux Sleckman Date: Wed, 23 Mar 2022 13:01:08 -0700 Subject: [PATCH] cleaned before PR submission --- gridmet_aggregation_PRMS.py | 34 ++++++++++++++++++---------------- gridmet_split_script.py | 3 +-- 2 files changed, 19 insertions(+), 18 deletions(-) diff --git a/gridmet_aggregation_PRMS.py b/gridmet_aggregation_PRMS.py index fcb13e0..c4f7f7b 100644 --- a/gridmet_aggregation_PRMS.py +++ b/gridmet_aggregation_PRMS.py @@ -1,8 +1,10 @@ +# libraries import geopandas as gpd -import geopandas.geodataframe import xarray as xr +import time -def ncdf_to_df(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_layer = None): +# ncdf_to_gdf() function converts ncdf to dataset and merged with shpfile information (geometry + area) +def ncdf_to_gdf(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_layer = None): """ :param str ncdf_path: path to regridded ncdf file (output of g2shp_regridding()) @@ -13,26 +15,23 @@ def ncdf_to_df(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_l :return: Geodataframe of the original ncdf file with geospatial information (e.g. geom) """ - # Read in + ## Read in xr_mapped_df = xr.open_dataset(ncdf_path, decode_times=True).to_dataframe().reset_index() - - if isinstance(shp, geopandas.geodataframe.GeoDataFrame): + if isinstance(shp, gpd.geodataframe.GeoDataFrame): gdf = shp elif os.path.isfile(shp): gdf_prms_path_edited = shp gdf = gpd.read_file(gdf_prms_path_edited, layer=gpkg_layer) - else: print('shp must be path to geospatial file or a geodataframe') - ## Merge & convert to GeoDataFrame + ## Merge ncdf w/ shapefile (the shpfile has area info) & convert to GeoDataFrame gridmet_drb_df = xr_mapped_df.merge(gdf, how ='left', left_on = left_on, right_index = right_on_index) - gridmet_drb_gdf = gpd.GeoDataFrame(gridmet_drb_df) return gridmet_drb_gdf -## function #2 +# gridmet_prms_area_avg_agg() function aggregates to groupby_cols scale and outputs the aggregated dataset def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_path = None): """ :param pd.DataFrame or gpd.GeoDataFrame df: @@ -48,11 +47,11 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa start = time.perf_counter() df = df.copy() - # Calc value x weight col + ## 1. Calc value x weight col for col in val_colnames: df[f'{col}_x_area'] = df[col]*df[wgt_col] - # Create group by by suming new value cols weighted by are + ## 2. Create group by, by summing new value cols weighted by area df_grouped = df.groupby(groupby_cols).agg(area_m2_sum = (wgt_col, 'sum'), tmmx_wgtd_sum = ('tmmx_x_area', 'sum'), tmmn_wgtd_sum = ('tmmn_x_area', 'sum'), @@ -63,7 +62,7 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa rmin_wgtd_sum = ('rmin_x_area', 'sum'), sph_wgtd_sum = ('sph_x_area', 'sum') ) - # Get weighted average + ## 3. Get weighted average for col in val_colnames: df_grouped[f'{col}_wgtd_avg'] = df_grouped[f'{col}_wgtd_sum'] / df_grouped['area_m2_sum'] @@ -76,20 +75,21 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa end = time.perf_counter() - print('Time (sec) ellapsed:' + end - start) + print('Time (sec) ellapsed:', end - start) return df_final -# Define variables and run functions +# Define variables and run if __name__ =='__main__': ## Variable definitions gdf_prms_path_edited = './data/GFv1_catchments_edited.gpkg' gdf = gpd.read_file(gdf_prms_path_edited, layer = 'GFv1_catchments_edited') + gridmet_ncdf = './data/t_climate_2022_03_09.nc' data_vars_shrt_all = ['tmmx', 'tmmn', 'pr', 'srad', 'vs', 'rmax', 'rmin', 'sph'] ## Create dataframe and merge with shapefile information - gridmet_drb_gdf = ncdf_to_df(ncdf_path='./data/t_climate_2022_03_09.nc', + gridmet_drb_gdf = ncdf_to_gdf(ncdf_path=gridmet_ncdf, shp = gdf, left_on = 'geomid', right_on_index = True) @@ -99,5 +99,7 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa groupby_cols = ['PRMS_segid',"time"], val_colnames = data_vars_shrt_all, wgt_col='hru_area_m2', - output_path= 'data/gridmet_drb_agg.csv') + output_path= None) + ## Uncomment to run + # df_agg.reset_index().to_csv('../drb-inland-salinity-ml/1_fetch/in/grdmet_drb_agg_032321.csv', index = False) \ No newline at end of file diff --git a/gridmet_split_script.py b/gridmet_split_script.py index d010172..a7b918a 100644 --- a/gridmet_split_script.py +++ b/gridmet_split_script.py @@ -2,7 +2,6 @@ import pickle import time import geopandas as gpd -import geopandas.geodataframe import grd2shp_xagg import xagg as xa import xarray as xr @@ -26,7 +25,7 @@ def get_gridmet_datasets(variable, start_date, end_date, polygon_for_bbox = None ## check/define bounds for data slicing if polygon_for_bbox is not None: - if isinstance(polygon_for_bbox, geopandas.geodataframe.GeoDataFrame): + if isinstance(polygon_for_bbox, gpd.geodataframe.GeoDataFrame): print('polygon is geodataframe') print(polygon_for_bbox.total_bounds) pass