Skip to content

Commit

Permalink
Merge pull request #1345 from emlys/feature/1328
Browse files Browse the repository at this point in the history
Read CSVs using info from `MODEL_SPEC`
  • Loading branch information
phargogh authored Aug 15, 2023
2 parents 3c0f155 + 24a67b6 commit 6db2256
Show file tree
Hide file tree
Showing 34 changed files with 1,214 additions and 1,248 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ GIT_SAMPLE_DATA_REPO_REV := a58b9c7bdd8a31cab469ea919fe0ebf23a6c668e

GIT_TEST_DATA_REPO := https://bitbucket.org/natcap/invest-test-data.git
GIT_TEST_DATA_REPO_PATH := $(DATA_DIR)/invest-test-data
GIT_TEST_DATA_REPO_REV := a89253d83d5f70a8ea2d8a951b2d47d603505f14
GIT_TEST_DATA_REPO_REV := 2749f8e984c9030ae30ab6d43e7075b7a2d27cf8

GIT_UG_REPO := https://github.com/natcap/invest.users-guide
GIT_UG_REPO_PATH := doc/users-guide
Expand Down
88 changes: 47 additions & 41 deletions src/natcap/invest/annual_water_yield.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,19 @@
}
}
SUBWATERSHED_OUTPUT_FIELDS = {
"subws_id": {
"type": "integer",
"about": gettext("Unique identifier for each subwatershed.")
},
**BASE_OUTPUT_FIELDS,
**SCARCITY_OUTPUT_FIELDS
**SCARCITY_OUTPUT_FIELDS,

}
WATERSHED_OUTPUT_FIELDS = {
"ws_id": {
"type": "integer",
"about": gettext("Unique identifier for each watershed.")
},
**BASE_OUTPUT_FIELDS,
**SCARCITY_OUTPUT_FIELDS,
**VALUATION_OUTPUT_FIELDS
Expand Down Expand Up @@ -209,6 +218,7 @@
"units": u.none,
"about": gettext("Crop coefficient for this LULC class.")}
},
"index_col": "lucode",
"about": gettext(
"Table of biophysical parameters for each LULC class. All "
"values in the LULC raster must have corresponding entries "
Expand Down Expand Up @@ -239,6 +249,7 @@
"units": u.meter**3/u.year/u.pixel
}
},
"index_col": "lucode",
"required": False,
"about": gettext(
"A table of water demand for each LULC class. Each LULC code "
Expand Down Expand Up @@ -310,6 +321,7 @@
"the time span.")
}
},
"index_col": "ws_id",
"required": False,
"about": gettext(
"A table mapping each watershed to the associated valuation "
Expand All @@ -328,6 +340,7 @@
},
"watershed_results_wyield.csv": {
"columns": {**WATERSHED_OUTPUT_FIELDS},
"index_col": "ws_id",
"about": "Table containing biophysical output values per watershed."
},
"subwatershed_results_wyield.shp": {
Expand All @@ -337,6 +350,7 @@
},
"subwatershed_results_wyield.csv": {
"columns": {**SUBWATERSHED_OUTPUT_FIELDS},
"index_col": "subws_id",
"about": "Table containing biophysical output values per subwatershed."
},
"per_pixel": {
Expand Down Expand Up @@ -509,23 +523,23 @@ def execute(args):
if invalid_parameters:
raise ValueError(f'Invalid parameters passed: {invalid_parameters}')

# valuation_params is passed to create_vector_output()
# which computes valuation if valuation_params is not None.
valuation_params = None
# valuation_df is passed to create_vector_output()
# which computes valuation if valuation_df is not None.
valuation_df = None
if 'valuation_table_path' in args and args['valuation_table_path'] != '':
LOGGER.info(
'Checking that watersheds have entries for every `ws_id` in the '
'valuation table.')
# Open/read in valuation parameters from CSV file
valuation_params = utils.read_csv_to_dataframe(
args['valuation_table_path'], 'ws_id').to_dict(orient='index')
valuation_df = utils.read_csv_to_dataframe(
args['valuation_table_path'], MODEL_SPEC['args']['valuation_table_path'])
watershed_vector = gdal.OpenEx(
args['watersheds_path'], gdal.OF_VECTOR)
watershed_layer = watershed_vector.GetLayer()
missing_ws_ids = []
for watershed_feature in watershed_layer:
watershed_ws_id = watershed_feature.GetField('ws_id')
if watershed_ws_id not in valuation_params:
if watershed_ws_id not in valuation_df.index:
missing_ws_ids.append(watershed_ws_id)
watershed_feature = None
watershed_layer = None
Expand Down Expand Up @@ -636,48 +650,43 @@ def execute(args):
'lulc': pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0]}

# Open/read in the csv file into a dictionary and add to arguments
bio_dict = utils.read_csv_to_dataframe(
args['biophysical_table_path'], 'lucode').to_dict(orient='index')
bio_lucodes = set(bio_dict.keys())
bio_df = utils.read_csv_to_dataframe(args['biophysical_table_path'],
MODEL_SPEC['args']['biophysical_table_path'])
bio_lucodes = set(bio_df.index.values)
bio_lucodes.add(nodata_dict['lulc'])
LOGGER.debug(f'bio_lucodes: {bio_lucodes}')

if 'demand_table_path' in args and args['demand_table_path'] != '':
demand_dict = utils.read_csv_to_dataframe(
args['demand_table_path'], 'lucode').to_dict(orient='index')
demand_df = utils.read_csv_to_dataframe(
args['demand_table_path'], MODEL_SPEC['args']['demand_table_path'])
demand_reclassify_dict = dict(
[(lucode, demand_dict[lucode]['demand'])
for lucode in demand_dict])
demand_lucodes = set(demand_dict.keys())
[(lucode, row['demand']) for lucode, row in demand_df.iterrows()])
demand_lucodes = set(demand_df.index.values)
demand_lucodes.add(nodata_dict['lulc'])
LOGGER.debug(f'demand_lucodes: {demand_lucodes}', )
else:
demand_lucodes = None

# Break the bio_dict into three separate dictionaries based on
# Break the bio_df into three separate dictionaries based on
# Kc, root_depth, and LULC_veg fields to use for reclassifying
Kc_dict = {}
root_dict = {}
vegetated_dict = {}

for lulc_code in bio_dict:
Kc_dict[lulc_code] = bio_dict[lulc_code]['kc']
for lulc_code, row in bio_df.iterrows():
Kc_dict[lulc_code] = row['kc']

# Catch invalid LULC_veg values with an informative error.
lulc_veg_value = bio_dict[lulc_code]['lulc_veg']
try:
vegetated_dict[lulc_code] = int(lulc_veg_value)
if vegetated_dict[lulc_code] not in set([0, 1]):
raise ValueError()
except ValueError:
if row['lulc_veg'] not in set([0, 1]):
# If the user provided an invalid LULC_veg value, raise an
# informative error.
raise ValueError(
f'LULC_veg value must be either 1 or 0, not {lulc_veg_value}')
f'LULC_veg value must be either 1 or 0, not {row["lulc_veg"]}')
vegetated_dict[lulc_code] = row['lulc_veg']

# If LULC_veg value is 1 get root depth value
if vegetated_dict[lulc_code] == 1:
root_dict[lulc_code] = bio_dict[lulc_code]['root_depth']
root_dict[lulc_code] = row['root_depth']
# If LULC_veg value is 0 then we do not care about root
# depth value so will just substitute in a 1. This
# value will not end up being used.
Expand Down Expand Up @@ -843,7 +852,7 @@ def execute(args):
write_output_vector_attributes_task = graph.add_task(
func=write_output_vector_attributes,
args=(target_ws_path, ws_id_name, zonal_stats_pickle_list,
valuation_params),
valuation_df),
target_path_list=[target_ws_path],
dependent_task_list=[
*zonal_stats_task_list, copy_watersheds_vector_task],
Expand Down Expand Up @@ -879,7 +888,7 @@ def copy_vector(base_vector_path, target_vector_path):


def write_output_vector_attributes(target_vector_path, ws_id_name,
stats_path_list, valuation_params):
stats_path_list, valuation_df):
"""Add data attributes to the vector outputs of this model.
Join results of zonal stats to copies of the watershed shapefiles.
Expand All @@ -893,7 +902,7 @@ def write_output_vector_attributes(target_vector_path, ws_id_name,
represent watersheds or subwatersheds.
stats_path_list (list): List of file paths to pickles storing the zonal
stats results.
valuation_params (dict): The dictionary built from
valuation_df (pandas.DataFrame): dataframe built from
args['valuation_table_path']. Or None if valuation table was not
provided.
Expand Down Expand Up @@ -929,10 +938,10 @@ def write_output_vector_attributes(target_vector_path, ws_id_name,
_add_zonal_stats_dict_to_shape(
target_vector_path, ws_stats_dict, key_name, 'mean')

if valuation_params:
if valuation_df is not None:
# only do valuation for watersheds, not subwatersheds
if ws_id_name == 'ws_id':
compute_watershed_valuation(target_vector_path, valuation_params)
compute_watershed_valuation(target_vector_path, valuation_df)


def convert_vector_to_csv(base_vector_path, target_csv_path):
Expand Down Expand Up @@ -1141,14 +1150,14 @@ def pet_op(eto_pix, Kc_pix, eto_nodata, output_nodata):
return result


def compute_watershed_valuation(watershed_results_vector_path, val_dict):
def compute_watershed_valuation(watershed_results_vector_path, val_df):
"""Compute net present value and energy for the watersheds.
Args:
watershed_results_vector_path (string):
Path to an OGR shapefile for the watershed results.
Where the results will be added.
val_dict (dict): a python dictionary that has all the valuation
val_df (pandas.DataFrame): a dataframe that has all the valuation
parameters for each watershed.
Returns:
Expand Down Expand Up @@ -1183,26 +1192,23 @@ def compute_watershed_valuation(watershed_results_vector_path, val_dict):
# there won't be a rsupply_vl value if the polygon feature only
# covers nodata raster values, so check before doing math.
if rsupply_vl is not None:
# Get the valuation parameters for watershed 'ws_id'
val_row = val_dict[ws_id]

# Compute hydropower energy production (KWH)
# This is from the equation given in the Users' Guide
energy = (
val_row['efficiency'] * val_row['fraction'] *
val_row['height'] * rsupply_vl * 0.00272)
val_df['efficiency'][ws_id] * val_df['fraction'][ws_id] *
val_df['height'][ws_id] * rsupply_vl * 0.00272)

dsum = 0
# Divide by 100 because it is input at a percent and we need
# decimal value
disc = val_row['discount'] / 100
disc = val_df['discount'][ws_id] / 100
# To calculate the summation of the discount rate term over the life
# span of the dam we can use a geometric series
ratio = 1 / (1 + disc)
if ratio != 1:
dsum = (1 - math.pow(ratio, val_row['time_span'])) / (1 - ratio)
dsum = (1 - math.pow(ratio, val_df['time_span'][ws_id])) / (1 - ratio)

npv = ((val_row['kw_price'] * energy) - val_row['cost']) * dsum
npv = ((val_df['kw_price'][ws_id] * energy) - val_df['cost'][ws_id]) * dsum

# Get the volume field index and add value
ws_feat.SetField(energy_field, energy)
Expand Down
9 changes: 4 additions & 5 deletions src/natcap/invest/carbon.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@
"units": u.metric_ton/u.hectare,
"about": gettext("Carbon density of dead matter.")}
},
"index_col": "lucode",
"about": gettext(
"A table that maps each LULC code to carbon pool data for "
"that LULC type."),
Expand Down Expand Up @@ -366,8 +367,8 @@ def execute(args):
(_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
(_TMP_BASE_FILES, output_dir)], file_suffix)

carbon_pool_table = utils.read_csv_to_dataframe(
args['carbon_pools_path'], 'lucode').to_dict(orient='index')
carbon_pool_df = utils.read_csv_to_dataframe(
args['carbon_pools_path'], MODEL_SPEC['args']['carbon_pools_path'])

work_token_dir = os.path.join(
intermediate_output_dir, '_taskgraph_working_dir')
Expand Down Expand Up @@ -413,9 +414,7 @@ def execute(args):
carbon_map_task_lookup[scenario_type] = []
storage_path_list = []
for pool_type in ['c_above', 'c_below', 'c_soil', 'c_dead']:
carbon_pool_by_type = dict([
(lucode, float(carbon_pool_table[lucode][pool_type]))
for lucode in carbon_pool_table])
carbon_pool_by_type = carbon_pool_df[pool_type].to_dict()

lulc_key = 'lulc_%s_path' % scenario_type
storage_key = '%s_%s' % (pool_type, scenario_type)
Expand Down
Loading

0 comments on commit 6db2256

Please sign in to comment.