Merge pull request #1375 from emlys/task/1373

Move validation out of execute methods
natcap · Sep 7, 2023 · 3f44e18 · 3f44e18
2 parents c33744e + f693dc4
commit 3f44e18
Show file tree

Hide file tree

Showing 18 changed files with 124 additions and 302 deletions.
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -54,6 +54,10 @@ Unreleased Changes
       has been merged into ``utils.read_csv_to_dataframe``
       (`#1319 <https://github.com/natcap/invest/issues/1319>`_),
       (`#1327 <https://github.com/natcap/invest/issues/1327>`_)
+    * Standardized on keeping the ``execute`` and ``validate`` functions
+      orthogonal. Now no models call ``validate`` from ``execute``. This
+      affected AWY, CV, UFRM, Wave Energy, and Wind Energy.
+      (`#1373 <https://github.com/natcap/invest/issues/1373>`_)
     * Improved the validation message that is returned when not all spatial
       inputs overlap (`#502 <https://github.com/natcap/invest/issues/502>`_)
     * Standardized the name and location of the taskgraph cache directory for

diff --git a/src/natcap/invest/annual_water_yield.py b/src/natcap/invest/annual_water_yield.py
@@ -518,11 +518,6 @@ def execute(args):
         None
 
     """
-    LOGGER.info('Validating arguments')
-    invalid_parameters = validate(args)
-    if invalid_parameters:
-        raise ValueError(f'Invalid parameters passed: {invalid_parameters}')
-
     # valuation_df is passed to create_vector_output()
     # which computes valuation if valuation_df is not None.
     valuation_df = None

diff --git a/src/natcap/invest/coastal_vulnerability.py b/src/natcap/invest/coastal_vulnerability.py
@@ -766,10 +766,6 @@ def execute(args):
         None
 
     """
-    LOGGER.info('Validating arguments')
-    invalid_parameters = validate(args)
-    if invalid_parameters:
-        raise ValueError(f"Invalid parameters passed: {invalid_parameters}")
     _validate_habitat_table_paths(args['habitat_table_path'])
 
     output_dir = os.path.join(args['workspace_dir'])

diff --git a/src/natcap/invest/forest_carbon_edge_effect.py b/src/natcap/invest/forest_carbon_edge_effect.py
@@ -351,24 +351,18 @@ def execute(args):
     # just check that the AOI exists since it wouldn't crash until the end of
     # the whole model run if it didn't.
     if 'aoi_vector_path' in args and args['aoi_vector_path'] != '':
-        aoi_vector = gdal.OpenEx(args['aoi_vector_path'], gdal.OF_VECTOR)
-        if not aoi_vector:
+        lulc_raster_bb = pygeoprocessing.get_raster_info(
+            args['lulc_raster_path'])['bounding_box']
+        aoi_vector_bb = pygeoprocessing.get_vector_info(
+            args['aoi_vector_path'])['bounding_box']
+        try:
+            merged_bb = pygeoprocessing.merge_bounding_box_list(
+                [lulc_raster_bb, aoi_vector_bb], 'intersection')
+            LOGGER.debug(f"merged bounding boxes: {merged_bb}")
+        except ValueError:
             raise ValueError(
-                f"Unable to open aoi at: {args['aoi_vector_path']}")
-        else:
-            aoi_vector = None
-            lulc_raster_bb = pygeoprocessing.get_raster_info(
-                args['lulc_raster_path'])['bounding_box']
-            aoi_vector_bb = pygeoprocessing.get_vector_info(
-                args['aoi_vector_path'])['bounding_box']
-            try:
-                merged_bb = pygeoprocessing.merge_bounding_box_list(
-                    [lulc_raster_bb, aoi_vector_bb], 'intersection')
-                LOGGER.debug(f"merged bounding boxes: {merged_bb}")
-            except ValueError:
-                raise ValueError(
-                    f"The landcover raster {args['lulc_raster_path']} and AOI "
-                    f"{args['aoi_vector_path']} do not touch each other.")
+                f"The landcover raster {args['lulc_raster_path']} and AOI "
+                f"{args['aoi_vector_path']} do not touch each other.")
 
     output_dir = args['workspace_dir']
     intermediate_dir = os.path.join(

diff --git a/src/natcap/invest/ndr/ndr.py b/src/natcap/invest/ndr/ndr.py
@@ -545,59 +545,6 @@ def execute(args):
         None
 
     """
-    def _validate_inputs(nutrients_to_process, biophysical_df):
-        """Validate common errors in inputs.
-
-        Args:
-            nutrients_to_process (list): list of 'n' and/or 'p'
-            biophysical_df (pandas.DataFrame): dataframe representation of
-                the input biophysical table. Used to validate the correct
-                columns are input
-
-        Returns:
-            None
-
-        Raises:
-            ValueError whenever a missing field in the parameter table is
-            detected along with a message describing every missing field.
-
-        """
-        # Make sure all the nutrient inputs are good
-        if len(nutrients_to_process) == 0:
-            raise ValueError("Neither phosphorus nor nitrogen was selected"
-                             " to be processed.  Choose at least one.")
-
-        # Build up a list that'll let us iterate through all the input tables
-        # and check for the required rows, and report errors if something
-        # is missing.
-        row_header_table_list = []
-
-        lu_parameter_row = biophysical_df.columns.to_list()
-        row_header_table_list.append(
-            (lu_parameter_row, ['load_', 'eff_', 'crit_len_'],
-             args['biophysical_table_path']))
-
-        missing_headers = []
-        for row, header_prefixes, table_type in row_header_table_list:
-            for nutrient_id in nutrients_to_process:
-                for header_prefix in header_prefixes:
-                    header = header_prefix + nutrient_id
-                    if header not in row:
-                        missing_headers.append(
-                            "Missing header %s from %s" % (
-                                header, table_type))
-
-        # proportion_subsurface_n is a special case in which phosphorus does
-        # not have an equivalent.
-        if ('n' in nutrients_to_process and
-                'proportion_subsurface_n' not in lu_parameter_row):
-            missing_headers.append(
-                "Missing header proportion_subsurface_n from " +
-                args['biophysical_table_path'])
-
-        if len(missing_headers) > 0:
-            raise ValueError('\n'.join(missing_headers))
-
     # Load all the tables for preprocessing
     output_dir = os.path.join(args['workspace_dir'])
     intermediate_output_dir = os.path.join(
@@ -630,8 +577,6 @@ def _validate_inputs(nutrients_to_process, biophysical_df):
         args['biophysical_table_path'],
         MODEL_SPEC['args']['biophysical_table_path'])
 
-    _validate_inputs(nutrients_to_process, biophysical_df)
-
     # these are used for aggregation in the last step
     field_pickle_map = {}
 
@@ -1143,9 +1088,11 @@ def validate(args, limit_to=None):
     LOGGER.debug('Starting logging for biophysical table')
     if 'biophysical_table_path' not in invalid_keys:
         # Check required fields given the state of ``calc_n`` and ``calc_p``
-        nutrient_required_fields = []
+        nutrient_required_fields = ['lucode']
         nutrients_selected = set()
         for nutrient_letter in ('n', 'p'):
+            if nutrient_letter == 'n':
+                nutrient_required_fields += ['proportion_subsurface_n']
             do_nutrient_key = f'calc_{nutrient_letter}'
             if do_nutrient_key in args and args[do_nutrient_key]:
                 nutrients_selected.add(do_nutrient_key)
@@ -1154,20 +1101,16 @@ def validate(args, limit_to=None):
                     f'eff_{nutrient_letter}',
                     f'crit_len_{nutrient_letter}'
                 ]
-
         if not nutrients_selected:
             validation_warnings.append(
                 (['calc_n', 'calc_p'], MISSING_NUTRIENT_MSG))
 
-        LOGGER.debug('Required nutrient-specific keys in CSV: %s',
-                     nutrient_required_fields)
         # Check that these nutrient-specific keys are in the table
         # validate has already checked all the other keys
         error_msg = validation.check_csv(
             args['biophysical_table_path'],
-            header_patterns=nutrient_required_fields)
+            columns={key: '' for key in nutrient_required_fields})
         if error_msg:
-            LOGGER.debug('Error: %s', error_msg)
             validation_warnings.append(
                 (['biophysical_table_path'], error_msg))
 

diff --git a/src/natcap/invest/recreation/recmodel_client.py b/src/natcap/invest/recreation/recmodel_client.py
@@ -129,7 +129,7 @@
         },
         "start_year": {
             "type": "number",
-            "expression": "value >= 2005",
+            "expression": "2005 <= value <= 2017",
             "units": u.year_AD,
             "about": gettext(
                 "Year at which to start photo user-day calculations. "
@@ -140,7 +140,7 @@
         },
         "end_year": {
             "type": "number",
-            "expression": "value <= 2017",
+            "expression": "2005 <= value <= 2017",
             "units": u.year_AD,
             "about": gettext(
                 "Year at which to end photo user-day calculations. "
@@ -437,29 +437,10 @@ def execute(args):
         None
 
     """
-    if ('predictor_table_path' in args and
-            args['predictor_table_path'] != ''):
-        _validate_same_id_lengths(args['predictor_table_path'])
-        _validate_same_projection(
-            args['aoi_path'], args['predictor_table_path'])
-        _validate_predictor_types(args['predictor_table_path'])
-
-    if ('predictor_table_path' in args and
-            'scenario_predictor_table_path' in args and
-            args['predictor_table_path'] != '' and
-            args['scenario_predictor_table_path'] != ''):
-        _validate_same_ids_and_types(
-            args['predictor_table_path'],
-            args['scenario_predictor_table_path'])
-        _validate_same_projection(
-            args['aoi_path'], args['scenario_predictor_table_path'])
-        _validate_predictor_types(args['scenario_predictor_table_path'])
-
     if int(args['end_year']) < int(args['start_year']):
         raise ValueError(
             "Start year must be less than or equal to end year.\n"
             f"start_year: {args['start_year']}\nend_year: {args['end_year']}")
-
     # in case the user defines a hostname
     if 'hostname' in args:
         server_url = f"PYRO:natcap.invest.recreation@{args['hostname']}:{args['port']}"
@@ -1545,9 +1526,8 @@ def _validate_same_id_lengths(table_path):
         table_path (string):  path to a csv table that has at least
             the field 'id'
 
-    Raises:
-        ValueError if any of the fields in 'id' and 'type' don't match between
-        tables.
+    Return:
+        string message if IDs are too long
 
     """
     predictor_df = utils.read_csv_to_dataframe(
@@ -1557,9 +1537,8 @@ def _validate_same_id_lengths(table_path):
         if len(p_id) > 10:
             too_long.add(p_id)
     if len(too_long) > 0:
-        raise ValueError(
-            "The following IDs are more than 10 characters long: "
-            f"{str(too_long)}")
+        return (
+            f'The following IDs are more than 10 characters long: {too_long}')
 
 
 def _validate_same_ids_and_types(
@@ -1577,12 +1556,8 @@ def _validate_same_ids_and_types(
             at least the fields 'id' and 'type'
 
     Returns:
-        None
-
-    Raises:
-        ValueError if any of the fields in 'id' and 'type' don't match between
-        tables.
-
+        string message if any of the fields in 'id' and 'type' don't match
+        between tables.
     """
     predictor_df = utils.read_csv_to_dataframe(
         predictor_table_path, MODEL_SPEC['args']['predictor_table_path'])
@@ -1596,10 +1571,8 @@ def _validate_same_ids_and_types(
     scenario_predictor_pairs = set([
         (p_id, row['type']) for p_id, row in scenario_predictor_df.iterrows()])
     if predictor_pairs != scenario_predictor_pairs:
-        raise ValueError('table pairs unequal.\n\t'
-                         f'predictor: {predictor_pairs}\n\t'
-                         f'scenario:{scenario_predictor_pairs}')
-    LOGGER.info('tables validate correctly')
+        return (f'table pairs unequal. predictor: {predictor_pairs} '
+                f'scenario: {scenario_predictor_pairs}')
 
 
 def _validate_same_projection(base_vector_path, table_path):
@@ -1611,12 +1584,8 @@ def _validate_same_projection(base_vector_path, table_path):
             the field 'path'
 
     Returns:
-        None
-
-    Raises:
-        ValueError if the projections in each of the GIS types in the table
+        string message if the projections in each of the GIS types in the table
             are not identical to the projection in base_vector_path
-
     """
     # This will load the table as a list of paths which we can iterate through
     # without bothering the rest of the table structure
@@ -1647,21 +1616,17 @@ def error_handler(err_level, err_no, err_msg):
         else:
             vector = gdal.OpenEx(path, gdal.OF_VECTOR)
             if vector is None:
-                raise ValueError(f"{path} did not load")
+                return f"{path} did not load"
             layer = vector.GetLayer()
             ref = osr.SpatialReference(layer.GetSpatialRef().ExportToWkt())
             layer = None
             vector = None
         if not base_ref.IsSame(ref):
-            LOGGER.warning(
-                f"{path} might have a different projection than the base AOI\n"
-                f"base:{base_ref.ExportToPrettyWkt()}\n"
-                f"current:{ref.ExportToPrettyWkt()}")
             invalid_projections = True
     if invalid_projections:
-        raise ValueError(
-            "One or more of the projections in the table did not match the "
-            "projection of the base vector")
+        return (
+            f"One or more of the projections in the table ({path}) did not "
+            f"match the projection of the base vector ({base_vector_path})")
 
 
 def _validate_predictor_types(table_path):
@@ -1672,11 +1637,8 @@ def _validate_predictor_types(table_path):
             the field 'type'
 
     Returns:
-        None
-
-    Raises:
-        ValueError if any value in the ``type`` column does not match a valid
-        type, ignoring leading/trailing whitespace.
+        string message if any value in the ``type`` column does not match a
+        valid type, ignoring leading/trailing whitespace.
     """
     df = utils.read_csv_to_dataframe(
         table_path, MODEL_SPEC['args']['predictor_table_path'])
@@ -1687,8 +1649,8 @@ def _validate_predictor_types(table_path):
                        'polygon_area_coverage', 'polygon_percent_coverage'})
     difference = set(df['type']).difference(valid_types)
     if difference:
-        raise ValueError('The table contains invalid type value(s): '
-                         f'{difference}. The allowed types are: {valid_types}')
+        return (f'The table contains invalid type value(s): {difference}. '
+                f'The allowed types are: {valid_types}')
 
 
 def delay_op(last_time, time_delay, func):
@@ -1731,4 +1693,39 @@ def validate(args, limit_to=None):
             be an empty list if validation succeeds.
 
     """
-    return validation.validate(args, MODEL_SPEC['args'])
+    validation_messages = validation.validate(args, MODEL_SPEC['args'])
+    sufficient_valid_keys = (validation.get_sufficient_keys(args) -
+                             validation.get_invalid_keys(validation_messages))
+
+    validation_tuples = []
+    if 'predictor_table_path' in sufficient_valid_keys:
+        validation_tuples += [
+            (_validate_same_id_lengths, ['predictor_table_path']),
+            (_validate_predictor_types, ['predictor_table_path'])]
+        if 'aoi_path' in sufficient_valid_keys:
+            validation_tuples.append(
+                (_validate_same_projection, ['aoi_path', 'predictor_table_path']))
+        if 'scenario_predictor_table_path' in sufficient_valid_keys:
+            validation_tuples.append((
+                _validate_same_ids_and_types,
+                ['predictor_table_path', 'scenario_predictor_table_path']))
+    if 'scenario_predictor_table_path' in sufficient_valid_keys:
+        validation_tuples.append((
+            _validate_predictor_types, ['scenario_predictor_table_path']))
+        if 'aoi_path' in sufficient_valid_keys:
+            validation_tuples.append((_validate_same_projection,
+                ['aoi_path', 'scenario_predictor_table_path']))
+
+
+    for validate_func, key_list in validation_tuples:
+        msg = validate_func(*[args[key] for key in key_list])
+        if msg:
+            validation_messages.append((key_list, msg))
+
+    if 'start_year' in sufficient_valid_keys and 'end_year' in sufficient_valid_keys:
+        if int(args['end_year']) < int(args['start_year']):
+            validation_messages.append((
+                ['start_year', 'end_year'],
+                "Start year must be less than or equal to end year."))
+
+    return validation_messages
diff --git a/src/natcap/invest/routedem.py b/src/natcap/invest/routedem.py
@@ -345,16 +345,8 @@ def execute(args):
 
     if ('calculate_flow_direction' in args and
             bool(args['calculate_flow_direction'])):
-        # All routing functions depend on this one task.
-        # Check the algorithm early so we can fail quickly, but only if we're
-        # doing some sort of hydological routing
         algorithm = args['algorithm'].upper()
-        try:
-            routing_funcs = _ROUTING_FUNCS[algorithm]
-        except KeyError:
-            raise RuntimeError(
-                'Invalid algorithm specified (%s). Must be one of %s' % (
-                    args['algorithm'], ', '.join(sorted(_ROUTING_FUNCS.keys()))))
+        routing_funcs = _ROUTING_FUNCS[algorithm]
 
     if 'dem_band_index' in args and args['dem_band_index'] not in (None, ''):
         band_index = int(args['dem_band_index'])