Skip to content

Commit

Permalink
Merge pull request #1375 from emlys/task/1373
Browse files Browse the repository at this point in the history
Move validation out of execute methods
  • Loading branch information
davemfish authored Sep 7, 2023
2 parents c33744e + f693dc4 commit 3f44e18
Show file tree
Hide file tree
Showing 18 changed files with 124 additions and 302 deletions.
4 changes: 4 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,10 @@ Unreleased Changes
has been merged into ``utils.read_csv_to_dataframe``
(`#1319 <https://github.com/natcap/invest/issues/1319>`_),
(`#1327 <https://github.com/natcap/invest/issues/1327>`_)
* Standardized on keeping the ``execute`` and ``validate`` functions
orthogonal. Now no models call ``validate`` from ``execute``. This
affected AWY, CV, UFRM, Wave Energy, and Wind Energy.
(`#1373 <https://github.com/natcap/invest/issues/1373>`_)
* Improved the validation message that is returned when not all spatial
inputs overlap (`#502 <https://github.com/natcap/invest/issues/502>`_)
* Standardized the name and location of the taskgraph cache directory for
Expand Down
5 changes: 0 additions & 5 deletions src/natcap/invest/annual_water_yield.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,11 +518,6 @@ def execute(args):
None
"""
LOGGER.info('Validating arguments')
invalid_parameters = validate(args)
if invalid_parameters:
raise ValueError(f'Invalid parameters passed: {invalid_parameters}')

# valuation_df is passed to create_vector_output()
# which computes valuation if valuation_df is not None.
valuation_df = None
Expand Down
4 changes: 0 additions & 4 deletions src/natcap/invest/coastal_vulnerability.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,10 +766,6 @@ def execute(args):
None
"""
LOGGER.info('Validating arguments')
invalid_parameters = validate(args)
if invalid_parameters:
raise ValueError(f"Invalid parameters passed: {invalid_parameters}")
_validate_habitat_table_paths(args['habitat_table_path'])

output_dir = os.path.join(args['workspace_dir'])
Expand Down
28 changes: 11 additions & 17 deletions src/natcap/invest/forest_carbon_edge_effect.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,24 +351,18 @@ def execute(args):
# just check that the AOI exists since it wouldn't crash until the end of
# the whole model run if it didn't.
if 'aoi_vector_path' in args and args['aoi_vector_path'] != '':
aoi_vector = gdal.OpenEx(args['aoi_vector_path'], gdal.OF_VECTOR)
if not aoi_vector:
lulc_raster_bb = pygeoprocessing.get_raster_info(
args['lulc_raster_path'])['bounding_box']
aoi_vector_bb = pygeoprocessing.get_vector_info(
args['aoi_vector_path'])['bounding_box']
try:
merged_bb = pygeoprocessing.merge_bounding_box_list(
[lulc_raster_bb, aoi_vector_bb], 'intersection')
LOGGER.debug(f"merged bounding boxes: {merged_bb}")
except ValueError:
raise ValueError(
f"Unable to open aoi at: {args['aoi_vector_path']}")
else:
aoi_vector = None
lulc_raster_bb = pygeoprocessing.get_raster_info(
args['lulc_raster_path'])['bounding_box']
aoi_vector_bb = pygeoprocessing.get_vector_info(
args['aoi_vector_path'])['bounding_box']
try:
merged_bb = pygeoprocessing.merge_bounding_box_list(
[lulc_raster_bb, aoi_vector_bb], 'intersection')
LOGGER.debug(f"merged bounding boxes: {merged_bb}")
except ValueError:
raise ValueError(
f"The landcover raster {args['lulc_raster_path']} and AOI "
f"{args['aoi_vector_path']} do not touch each other.")
f"The landcover raster {args['lulc_raster_path']} and AOI "
f"{args['aoi_vector_path']} do not touch each other.")

output_dir = args['workspace_dir']
intermediate_dir = os.path.join(
Expand Down
65 changes: 4 additions & 61 deletions src/natcap/invest/ndr/ndr.py
Original file line number Diff line number Diff line change
Expand Up @@ -545,59 +545,6 @@ def execute(args):
None
"""
def _validate_inputs(nutrients_to_process, biophysical_df):
"""Validate common errors in inputs.
Args:
nutrients_to_process (list): list of 'n' and/or 'p'
biophysical_df (pandas.DataFrame): dataframe representation of
the input biophysical table. Used to validate the correct
columns are input
Returns:
None
Raises:
ValueError whenever a missing field in the parameter table is
detected along with a message describing every missing field.
"""
# Make sure all the nutrient inputs are good
if len(nutrients_to_process) == 0:
raise ValueError("Neither phosphorus nor nitrogen was selected"
" to be processed. Choose at least one.")

# Build up a list that'll let us iterate through all the input tables
# and check for the required rows, and report errors if something
# is missing.
row_header_table_list = []

lu_parameter_row = biophysical_df.columns.to_list()
row_header_table_list.append(
(lu_parameter_row, ['load_', 'eff_', 'crit_len_'],
args['biophysical_table_path']))

missing_headers = []
for row, header_prefixes, table_type in row_header_table_list:
for nutrient_id in nutrients_to_process:
for header_prefix in header_prefixes:
header = header_prefix + nutrient_id
if header not in row:
missing_headers.append(
"Missing header %s from %s" % (
header, table_type))

# proportion_subsurface_n is a special case in which phosphorus does
# not have an equivalent.
if ('n' in nutrients_to_process and
'proportion_subsurface_n' not in lu_parameter_row):
missing_headers.append(
"Missing header proportion_subsurface_n from " +
args['biophysical_table_path'])

if len(missing_headers) > 0:
raise ValueError('\n'.join(missing_headers))

# Load all the tables for preprocessing
output_dir = os.path.join(args['workspace_dir'])
intermediate_output_dir = os.path.join(
Expand Down Expand Up @@ -630,8 +577,6 @@ def _validate_inputs(nutrients_to_process, biophysical_df):
args['biophysical_table_path'],
MODEL_SPEC['args']['biophysical_table_path'])

_validate_inputs(nutrients_to_process, biophysical_df)

# these are used for aggregation in the last step
field_pickle_map = {}

Expand Down Expand Up @@ -1143,9 +1088,11 @@ def validate(args, limit_to=None):
LOGGER.debug('Starting logging for biophysical table')
if 'biophysical_table_path' not in invalid_keys:
# Check required fields given the state of ``calc_n`` and ``calc_p``
nutrient_required_fields = []
nutrient_required_fields = ['lucode']
nutrients_selected = set()
for nutrient_letter in ('n', 'p'):
if nutrient_letter == 'n':
nutrient_required_fields += ['proportion_subsurface_n']
do_nutrient_key = f'calc_{nutrient_letter}'
if do_nutrient_key in args and args[do_nutrient_key]:
nutrients_selected.add(do_nutrient_key)
Expand All @@ -1154,20 +1101,16 @@ def validate(args, limit_to=None):
f'eff_{nutrient_letter}',
f'crit_len_{nutrient_letter}'
]

if not nutrients_selected:
validation_warnings.append(
(['calc_n', 'calc_p'], MISSING_NUTRIENT_MSG))

LOGGER.debug('Required nutrient-specific keys in CSV: %s',
nutrient_required_fields)
# Check that these nutrient-specific keys are in the table
# validate has already checked all the other keys
error_msg = validation.check_csv(
args['biophysical_table_path'],
header_patterns=nutrient_required_fields)
columns={key: '' for key in nutrient_required_fields})
if error_msg:
LOGGER.debug('Error: %s', error_msg)
validation_warnings.append(
(['biophysical_table_path'], error_msg))

Expand Down
113 changes: 55 additions & 58 deletions src/natcap/invest/recreation/recmodel_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@
},
"start_year": {
"type": "number",
"expression": "value >= 2005",
"expression": "2005 <= value <= 2017",
"units": u.year_AD,
"about": gettext(
"Year at which to start photo user-day calculations. "
Expand All @@ -140,7 +140,7 @@
},
"end_year": {
"type": "number",
"expression": "value <= 2017",
"expression": "2005 <= value <= 2017",
"units": u.year_AD,
"about": gettext(
"Year at which to end photo user-day calculations. "
Expand Down Expand Up @@ -437,29 +437,10 @@ def execute(args):
None
"""
if ('predictor_table_path' in args and
args['predictor_table_path'] != ''):
_validate_same_id_lengths(args['predictor_table_path'])
_validate_same_projection(
args['aoi_path'], args['predictor_table_path'])
_validate_predictor_types(args['predictor_table_path'])

if ('predictor_table_path' in args and
'scenario_predictor_table_path' in args and
args['predictor_table_path'] != '' and
args['scenario_predictor_table_path'] != ''):
_validate_same_ids_and_types(
args['predictor_table_path'],
args['scenario_predictor_table_path'])
_validate_same_projection(
args['aoi_path'], args['scenario_predictor_table_path'])
_validate_predictor_types(args['scenario_predictor_table_path'])

if int(args['end_year']) < int(args['start_year']):
raise ValueError(
"Start year must be less than or equal to end year.\n"
f"start_year: {args['start_year']}\nend_year: {args['end_year']}")

# in case the user defines a hostname
if 'hostname' in args:
server_url = f"PYRO:natcap.invest.recreation@{args['hostname']}:{args['port']}"
Expand Down Expand Up @@ -1545,9 +1526,8 @@ def _validate_same_id_lengths(table_path):
table_path (string): path to a csv table that has at least
the field 'id'
Raises:
ValueError if any of the fields in 'id' and 'type' don't match between
tables.
Return:
string message if IDs are too long
"""
predictor_df = utils.read_csv_to_dataframe(
Expand All @@ -1557,9 +1537,8 @@ def _validate_same_id_lengths(table_path):
if len(p_id) > 10:
too_long.add(p_id)
if len(too_long) > 0:
raise ValueError(
"The following IDs are more than 10 characters long: "
f"{str(too_long)}")
return (
f'The following IDs are more than 10 characters long: {too_long}')


def _validate_same_ids_and_types(
Expand All @@ -1577,12 +1556,8 @@ def _validate_same_ids_and_types(
at least the fields 'id' and 'type'
Returns:
None
Raises:
ValueError if any of the fields in 'id' and 'type' don't match between
tables.
string message if any of the fields in 'id' and 'type' don't match
between tables.
"""
predictor_df = utils.read_csv_to_dataframe(
predictor_table_path, MODEL_SPEC['args']['predictor_table_path'])
Expand All @@ -1596,10 +1571,8 @@ def _validate_same_ids_and_types(
scenario_predictor_pairs = set([
(p_id, row['type']) for p_id, row in scenario_predictor_df.iterrows()])
if predictor_pairs != scenario_predictor_pairs:
raise ValueError('table pairs unequal.\n\t'
f'predictor: {predictor_pairs}\n\t'
f'scenario:{scenario_predictor_pairs}')
LOGGER.info('tables validate correctly')
return (f'table pairs unequal. predictor: {predictor_pairs} '
f'scenario: {scenario_predictor_pairs}')


def _validate_same_projection(base_vector_path, table_path):
Expand All @@ -1611,12 +1584,8 @@ def _validate_same_projection(base_vector_path, table_path):
the field 'path'
Returns:
None
Raises:
ValueError if the projections in each of the GIS types in the table
string message if the projections in each of the GIS types in the table
are not identical to the projection in base_vector_path
"""
# This will load the table as a list of paths which we can iterate through
# without bothering the rest of the table structure
Expand Down Expand Up @@ -1647,21 +1616,17 @@ def error_handler(err_level, err_no, err_msg):
else:
vector = gdal.OpenEx(path, gdal.OF_VECTOR)
if vector is None:
raise ValueError(f"{path} did not load")
return f"{path} did not load"
layer = vector.GetLayer()
ref = osr.SpatialReference(layer.GetSpatialRef().ExportToWkt())
layer = None
vector = None
if not base_ref.IsSame(ref):
LOGGER.warning(
f"{path} might have a different projection than the base AOI\n"
f"base:{base_ref.ExportToPrettyWkt()}\n"
f"current:{ref.ExportToPrettyWkt()}")
invalid_projections = True
if invalid_projections:
raise ValueError(
"One or more of the projections in the table did not match the "
"projection of the base vector")
return (
f"One or more of the projections in the table ({path}) did not "
f"match the projection of the base vector ({base_vector_path})")


def _validate_predictor_types(table_path):
Expand All @@ -1672,11 +1637,8 @@ def _validate_predictor_types(table_path):
the field 'type'
Returns:
None
Raises:
ValueError if any value in the ``type`` column does not match a valid
type, ignoring leading/trailing whitespace.
string message if any value in the ``type`` column does not match a
valid type, ignoring leading/trailing whitespace.
"""
df = utils.read_csv_to_dataframe(
table_path, MODEL_SPEC['args']['predictor_table_path'])
Expand All @@ -1687,8 +1649,8 @@ def _validate_predictor_types(table_path):
'polygon_area_coverage', 'polygon_percent_coverage'})
difference = set(df['type']).difference(valid_types)
if difference:
raise ValueError('The table contains invalid type value(s): '
f'{difference}. The allowed types are: {valid_types}')
return (f'The table contains invalid type value(s): {difference}. '
f'The allowed types are: {valid_types}')


def delay_op(last_time, time_delay, func):
Expand Down Expand Up @@ -1731,4 +1693,39 @@ def validate(args, limit_to=None):
be an empty list if validation succeeds.
"""
return validation.validate(args, MODEL_SPEC['args'])
validation_messages = validation.validate(args, MODEL_SPEC['args'])
sufficient_valid_keys = (validation.get_sufficient_keys(args) -
validation.get_invalid_keys(validation_messages))

validation_tuples = []
if 'predictor_table_path' in sufficient_valid_keys:
validation_tuples += [
(_validate_same_id_lengths, ['predictor_table_path']),
(_validate_predictor_types, ['predictor_table_path'])]
if 'aoi_path' in sufficient_valid_keys:
validation_tuples.append(
(_validate_same_projection, ['aoi_path', 'predictor_table_path']))
if 'scenario_predictor_table_path' in sufficient_valid_keys:
validation_tuples.append((
_validate_same_ids_and_types,
['predictor_table_path', 'scenario_predictor_table_path']))
if 'scenario_predictor_table_path' in sufficient_valid_keys:
validation_tuples.append((
_validate_predictor_types, ['scenario_predictor_table_path']))
if 'aoi_path' in sufficient_valid_keys:
validation_tuples.append((_validate_same_projection,
['aoi_path', 'scenario_predictor_table_path']))


for validate_func, key_list in validation_tuples:
msg = validate_func(*[args[key] for key in key_list])
if msg:
validation_messages.append((key_list, msg))

if 'start_year' in sufficient_valid_keys and 'end_year' in sufficient_valid_keys:
if int(args['end_year']) < int(args['start_year']):
validation_messages.append((
['start_year', 'end_year'],
"Start year must be less than or equal to end year."))

return validation_messages
10 changes: 1 addition & 9 deletions src/natcap/invest/routedem.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,16 +345,8 @@ def execute(args):

if ('calculate_flow_direction' in args and
bool(args['calculate_flow_direction'])):
# All routing functions depend on this one task.
# Check the algorithm early so we can fail quickly, but only if we're
# doing some sort of hydological routing
algorithm = args['algorithm'].upper()
try:
routing_funcs = _ROUTING_FUNCS[algorithm]
except KeyError:
raise RuntimeError(
'Invalid algorithm specified (%s). Must be one of %s' % (
args['algorithm'], ', '.join(sorted(_ROUTING_FUNCS.keys()))))
routing_funcs = _ROUTING_FUNCS[algorithm]

if 'dem_band_index' in args and args['dem_band_index'] not in (None, ''):
band_index = int(args['dem_band_index'])
Expand Down
Loading

0 comments on commit 3f44e18

Please sign in to comment.