Skip to content

Commit

Permalink
cleaned before PR submission
Browse files Browse the repository at this point in the history
  • Loading branch information
msleckman committed Mar 23, 2022
1 parent fdf31a3 commit eb56e45
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 18 deletions.
34 changes: 18 additions & 16 deletions gridmet_aggregation_PRMS.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# libraries
import geopandas as gpd
import geopandas.geodataframe
import xarray as xr
import time

def ncdf_to_df(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_layer = None):
# ncdf_to_gdf() function converts ncdf to dataset and merged with shpfile information (geometry + area)
def ncdf_to_gdf(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_layer = None):

"""
:param str ncdf_path: path to regridded ncdf file (output of g2shp_regridding())
Expand All @@ -13,26 +15,23 @@ def ncdf_to_df(ncdf_path, shp, left_on = 'geomid', right_on_index = True, gpkg_l
:return: Geodataframe of the original ncdf file with geospatial information (e.g. geom)
"""

# Read in
## Read in
xr_mapped_df = xr.open_dataset(ncdf_path, decode_times=True).to_dataframe().reset_index()

if isinstance(shp, geopandas.geodataframe.GeoDataFrame):
if isinstance(shp, gpd.geodataframe.GeoDataFrame):
gdf = shp
elif os.path.isfile(shp):
gdf_prms_path_edited = shp
gdf = gpd.read_file(gdf_prms_path_edited, layer=gpkg_layer)

else:
print('shp must be path to geospatial file or a geodataframe')

## Merge & convert to GeoDataFrame
## Merge ncdf w/ shapefile (the shpfile has area info) & convert to GeoDataFrame
gridmet_drb_df = xr_mapped_df.merge(gdf, how ='left', left_on = left_on, right_index = right_on_index)

gridmet_drb_gdf = gpd.GeoDataFrame(gridmet_drb_df)

return gridmet_drb_gdf

## function #2
# gridmet_prms_area_avg_agg() function aggregates to groupby_cols scale and outputs the aggregated dataset
def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_path = None):
"""
:param pd.DataFrame or gpd.GeoDataFrame df:
Expand All @@ -48,11 +47,11 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa
start = time.perf_counter()

df = df.copy()
# Calc value x weight col
## 1. Calc value x weight col
for col in val_colnames:
df[f'{col}_x_area'] = df[col]*df[wgt_col]

# Create group by by suming new value cols weighted by are
## 2. Create group by, by summing new value cols weighted by area
df_grouped = df.groupby(groupby_cols).agg(area_m2_sum = (wgt_col, 'sum'),
tmmx_wgtd_sum = ('tmmx_x_area', 'sum'),
tmmn_wgtd_sum = ('tmmn_x_area', 'sum'),
Expand All @@ -63,7 +62,7 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa
rmin_wgtd_sum = ('rmin_x_area', 'sum'),
sph_wgtd_sum = ('sph_x_area', 'sum')
)
# Get weighted average
## 3. Get weighted average
for col in val_colnames:
df_grouped[f'{col}_wgtd_avg'] = df_grouped[f'{col}_wgtd_sum'] / df_grouped['area_m2_sum']

Expand All @@ -76,20 +75,21 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa

end = time.perf_counter()

print('Time (sec) ellapsed:' + end - start)
print('Time (sec) ellapsed:', end - start)

return df_final

# Define variables and run functions
# Define variables and run
if __name__ =='__main__':

## Variable definitions
gdf_prms_path_edited = './data/GFv1_catchments_edited.gpkg'
gdf = gpd.read_file(gdf_prms_path_edited, layer = 'GFv1_catchments_edited')
gridmet_ncdf = './data/t_climate_2022_03_09.nc'
data_vars_shrt_all = ['tmmx', 'tmmn', 'pr', 'srad', 'vs', 'rmax', 'rmin', 'sph']

## Create dataframe and merge with shapefile information
gridmet_drb_gdf = ncdf_to_df(ncdf_path='./data/t_climate_2022_03_09.nc',
gridmet_drb_gdf = ncdf_to_gdf(ncdf_path=gridmet_ncdf,
shp = gdf,
left_on = 'geomid',
right_on_index = True)
Expand All @@ -99,5 +99,7 @@ def gridmet_prms_area_avg_agg(df, groupby_cols, val_colnames, wgt_col, output_pa
groupby_cols = ['PRMS_segid',"time"],
val_colnames = data_vars_shrt_all,
wgt_col='hru_area_m2',
output_path= 'data/gridmet_drb_agg.csv')
output_path= None)

## Uncomment to run
# df_agg.reset_index().to_csv('../drb-inland-salinity-ml/1_fetch/in/grdmet_drb_agg_032321.csv', index = False)
3 changes: 1 addition & 2 deletions gridmet_split_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
import pickle
import time
import geopandas as gpd
import geopandas.geodataframe
import grd2shp_xagg
import xagg as xa
import xarray as xr
Expand All @@ -26,7 +25,7 @@ def get_gridmet_datasets(variable, start_date, end_date, polygon_for_bbox = None

## check/define bounds for data slicing
if polygon_for_bbox is not None:
if isinstance(polygon_for_bbox, geopandas.geodataframe.GeoDataFrame):
if isinstance(polygon_for_bbox, gpd.geodataframe.GeoDataFrame):
print('polygon is geodataframe')
print(polygon_for_bbox.total_bounds)
pass
Expand Down

0 comments on commit eb56e45

Please sign in to comment.