diff --git a/HISTORY.rst b/HISTORY.rst
index 3608338b4..4554c375c 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -49,6 +49,9 @@ Unreleased Changes
       (`#1374 <https://github.com/natcap/invest/issues/1374>`_)
     * Datastack archives will now be correctly extracted
       (`#1308 <https://github.com/natcap/invest/issues/1308>`_)
+    * Validation of tables has been improved and standardized, which should
+      result in more readable validation errors.
+      (`#1379 <https://github.com/natcap/invest/issues/1379>`_)
     * Updated to ``pygeoprocessing`` 2.4.2. This includes an update to
       ``pygeoprocessing.zonal_statistics``, which is now more correct on certain
       edge cases. Aggregated model results may change slightly.
diff --git a/Makefile b/Makefile
index 39514077a..95df9b5b3 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ GIT_TEST_DATA_REPO_REV      := da013683e80ea094fbb2309197e2488c02794da8
 
 GIT_UG_REPO                 := https://github.com/natcap/invest.users-guide
 GIT_UG_REPO_PATH            := doc/users-guide
-GIT_UG_REPO_REV             := 1db6aa847e07b774700ad1432172c791c4729dde
+GIT_UG_REPO_REV             := 6d40e3c8e56cfb09e579c58312d653086e69d6c4
 
 ENV = "./env"
 ifeq ($(OS),Windows_NT)
diff --git a/src/natcap/invest/annual_water_yield.py b/src/natcap/invest/annual_water_yield.py
index 314ddb815..b0ebf1e9d 100644
--- a/src/natcap/invest/annual_water_yield.py
+++ b/src/natcap/invest/annual_water_yield.py
@@ -526,8 +526,9 @@ def execute(args):
             'Checking that watersheds have entries for every `ws_id` in the '
             'valuation table.')
         # Open/read in valuation parameters from CSV file
-        valuation_df = utils.read_csv_to_dataframe(
-            args['valuation_table_path'], MODEL_SPEC['args']['valuation_table_path'])
+        valuation_df = validation.get_validated_dataframe(
+            args['valuation_table_path'],
+            **MODEL_SPEC['args']['valuation_table_path'])
         watershed_vector = gdal.OpenEx(
             args['watersheds_path'], gdal.OF_VECTOR)
         watershed_layer = watershed_vector.GetLayer()
@@ -645,15 +646,15 @@ def execute(args):
         'lulc': pygeoprocessing.get_raster_info(clipped_lulc_path)['nodata'][0]}
 
     # Open/read in the csv file into a dictionary and add to arguments
-    bio_df = utils.read_csv_to_dataframe(args['biophysical_table_path'],
-                                         MODEL_SPEC['args']['biophysical_table_path'])
+    bio_df = validation.get_validated_dataframe(args['biophysical_table_path'],
+                                         **MODEL_SPEC['args']['biophysical_table_path'])
     bio_lucodes = set(bio_df.index.values)
     bio_lucodes.add(nodata_dict['lulc'])
     LOGGER.debug(f'bio_lucodes: {bio_lucodes}')
 
     if 'demand_table_path' in args and args['demand_table_path'] != '':
-        demand_df = utils.read_csv_to_dataframe(
-            args['demand_table_path'], MODEL_SPEC['args']['demand_table_path'])
+        demand_df = validation.get_validated_dataframe(
+            args['demand_table_path'], **MODEL_SPEC['args']['demand_table_path'])
         demand_reclassify_dict = dict(
             [(lucode, row['demand']) for lucode, row in demand_df.iterrows()])
         demand_lucodes = set(demand_df.index.values)
diff --git a/src/natcap/invest/carbon.py b/src/natcap/invest/carbon.py
index 0ea0025a7..9752426c8 100644
--- a/src/natcap/invest/carbon.py
+++ b/src/natcap/invest/carbon.py
@@ -364,8 +364,8 @@ def execute(args):
          (_INTERMEDIATE_BASE_FILES, intermediate_output_dir),
          (_TMP_BASE_FILES, output_dir)], file_suffix)
 
-    carbon_pool_df = utils.read_csv_to_dataframe(
-        args['carbon_pools_path'], MODEL_SPEC['args']['carbon_pools_path'])
+    carbon_pool_df = validation.get_validated_dataframe(
+        args['carbon_pools_path'], **MODEL_SPEC['args']['carbon_pools_path'])
 
     try:
         n_workers = int(args['n_workers'])
diff --git a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
index e610aef97..f1f7fc67a 100644
--- a/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
+++ b/src/natcap/invest/coastal_blue_carbon/coastal_blue_carbon.py
@@ -570,9 +570,9 @@ def execute(args):
     task_graph, n_workers, intermediate_dir, output_dir, suffix = (
         _set_up_workspace(args))
 
-    snapshots = utils.read_csv_to_dataframe(
+    snapshots = validation.get_validated_dataframe(
         args['landcover_snapshot_csv'],
-        MODEL_SPEC['args']['landcover_snapshot_csv']
+        **MODEL_SPEC['args']['landcover_snapshot_csv']
     )['raster_path'].to_dict()
 
     # Phase 1: alignment and preparation of inputs
@@ -593,9 +593,9 @@ def execute(args):
 
     # We're assuming that the LULC initial variables and the carbon pool
     # transient table are combined into a single lookup table.
-    biophysical_df = utils.read_csv_to_dataframe(
+    biophysical_df = validation.get_validated_dataframe(
         args['biophysical_table_path'],
-        MODEL_SPEC['args']['biophysical_table_path'])
+        **MODEL_SPEC['args']['biophysical_table_path'])
 
     # LULC Classnames are critical to the transition mapping, so they must be
     # unique.  This check is here in ``execute`` because it's possible that
@@ -963,9 +963,9 @@ def execute(args):
     prices = None
     if args.get('do_economic_analysis', False):  # Do if truthy
         if args.get('use_price_table', False):
-            prices = utils.read_csv_to_dataframe(
+            prices = validation.get_validated_dataframe(
                 args['price_table_path'],
-                MODEL_SPEC['args']['price_table_path']
+                **MODEL_SPEC['args']['price_table_path']
             )['price'].to_dict()
         else:
             inflation_rate = float(args['inflation_rate']) * 0.01
@@ -1948,8 +1948,8 @@ def _read_transition_matrix(transition_csv_path, biophysical_df):
         landcover transition, and the second contains accumulation rates for
         the pool for the landcover transition.
     """
-    table = utils.read_csv_to_dataframe(
-        transition_csv_path, MODEL_SPEC['args']['landcover_transitions_table']
+    table = validation.get_validated_dataframe(
+        transition_csv_path, **MODEL_SPEC['args']['landcover_transitions_table']
     ).reset_index()
 
     lulc_class_to_lucode = {}
@@ -2172,9 +2172,9 @@ def validate(args, limit_to=None):
 
     if ("landcover_snapshot_csv" not in invalid_keys and
             "landcover_snapshot_csv" in sufficient_keys):
-        snapshots = utils.read_csv_to_dataframe(
+        snapshots = validation.get_validated_dataframe(
             args['landcover_snapshot_csv'],
-            MODEL_SPEC['args']['landcover_snapshot_csv']
+            **MODEL_SPEC['args']['landcover_snapshot_csv']
         )['raster_path'].to_dict()
 
         for snapshot_year, snapshot_raster_path in snapshots.items():
@@ -2204,8 +2204,8 @@ def validate(args, limit_to=None):
             transitions_spec['columns']['[LULC CODE]']['options'].keys())
         # lowercase options since utils call will lowercase table values
         transition_options = [x.lower() for x in transition_options]
-        transitions_df = utils.read_csv_to_dataframe(
-            args['landcover_transitions_table'], transitions_spec)
+        transitions_df = validation.get_validated_dataframe(
+            args['landcover_transitions_table'], **transitions_spec)
         transitions_mask = ~transitions_df.isin(transition_options) & ~transitions_df.isna()
         if transitions_mask.any(axis=None):
             transition_numpy_mask = transitions_mask.values
diff --git a/src/natcap/invest/coastal_blue_carbon/preprocessor.py b/src/natcap/invest/coastal_blue_carbon/preprocessor.py
index 9cef76c8f..03fda7714 100644
--- a/src/natcap/invest/coastal_blue_carbon/preprocessor.py
+++ b/src/natcap/invest/coastal_blue_carbon/preprocessor.py
@@ -180,9 +180,9 @@ def execute(args):
         os.path.join(args['workspace_dir'], 'taskgraph_cache'),
         n_workers, reporting_interval=5.0)
 
-    snapshots_dict = utils.read_csv_to_dataframe(
+    snapshots_dict = validation.get_validated_dataframe(
         args['landcover_snapshot_csv'],
-        MODEL_SPEC['args']['landcover_snapshot_csv']
+        **MODEL_SPEC['args']['landcover_snapshot_csv']
     )['raster_path'].to_dict()
 
     # Align the raster stack for analyzing the various transitions.
@@ -213,9 +213,9 @@ def execute(args):
         target_path_list=aligned_snapshot_paths,
         task_name='Align input landcover rasters')
 
-    landcover_df = utils.read_csv_to_dataframe(
+    landcover_df = validation.get_validated_dataframe(
         args['lulc_lookup_table_path'],
-        MODEL_SPEC['args']['lulc_lookup_table_path'])
+        **MODEL_SPEC['args']['lulc_lookup_table_path'])
 
     target_transition_table = os.path.join(
         output_dir, TRANSITION_TABLE.format(suffix=suffix))
diff --git a/src/natcap/invest/coastal_vulnerability.py b/src/natcap/invest/coastal_vulnerability.py
index 3f591231c..ced65f9d7 100644
--- a/src/natcap/invest/coastal_vulnerability.py
+++ b/src/natcap/invest/coastal_vulnerability.py
@@ -461,10 +461,19 @@
                                 "Shore points with associated habitat data"),
                             "index_col": "shore_id",
                             "columns": {
+                                # shore_id and R_hab come first so that they get
+                                # matched before [HABITAT], which matches everything
                                 "shore_id": {
                                     "type": "integer",
                                     "about": "Shore point ID"
                                 },
+                                "R_hab": {
+                                    "about": (
+                                        "Overall habitat exposure rank, the "
+                                        "result of equation (15)"),
+                                    "type": "number",
+                                    "units": u.none
+                                },
                                 "[HABITAT]": {
                                     "about": (
                                         "Habitat exposure rank for the given "
@@ -477,13 +486,6 @@
                                         "rank defined in the Habitats Table input."),
                                     "type": "number",
                                     "units": u.none
-                                },
-                                "R_hab": {
-                                    "about": (
-                                        "Overall habitat exposure rank, the "
-                                        "result of equation (15)"),
-                                    "type": "number",
-                                    "units": u.none
                                 }
                             }
                         }
@@ -2302,8 +2304,8 @@ def _schedule_habitat_tasks(
         list of pickle file path strings
 
     """
-    habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, MODEL_SPEC['args']['habitat_table_path']
+    habitat_dataframe = validation.get_validated_dataframe(
+        habitat_table_path, **MODEL_SPEC['args']['habitat_table_path']
     ).rename(columns={'protection distance (m)': 'distance'})
 
     habitat_task_list = []
@@ -2831,8 +2833,8 @@ def assemble_results_and_calculate_exposure(
             with open(pickle_path, 'rb') as file:
                 final_values_dict[var_name] = pickle.load(file)
 
-    habitat_df = utils.read_csv_to_dataframe(
-        habitat_protection_path, MODEL_SPEC['outputs']['intermediate'][
+    habitat_df = validation.get_validated_dataframe(
+        habitat_protection_path, **MODEL_SPEC['outputs']['intermediate'][
             'contents']['habitats']['contents']['habitat_protection.csv']
     ).rename(columns={'r_hab': 'R_hab'})
     output_layer.StartTransaction()
@@ -3459,8 +3461,8 @@ def _validate_habitat_table_paths(habitat_table_path):
     Raises:
         ValueError if any vector in the ``path`` column cannot be opened.
     """
-    habitat_dataframe = utils.read_csv_to_dataframe(
-        habitat_table_path, MODEL_SPEC['args']['habitat_table_path'])
+    habitat_dataframe = validation.get_validated_dataframe(
+        habitat_table_path, **MODEL_SPEC['args']['habitat_table_path'])
     bad_paths = []
     for habitat_row in habitat_dataframe.itertuples():
         try:
diff --git a/src/natcap/invest/crop_production_percentile.py b/src/natcap/invest/crop_production_percentile.py
index c91fb5ae9..a2ad8e772 100644
--- a/src/natcap/invest/crop_production_percentile.py
+++ b/src/natcap/invest/crop_production_percentile.py
@@ -468,9 +468,9 @@ def execute(args):
         None.
 
     """
-    crop_to_landcover_df = utils.read_csv_to_dataframe(
+    crop_to_landcover_df = validation.get_validated_dataframe(
         args['landcover_to_crop_table_path'],
-        MODEL_SPEC['args']['landcover_to_crop_table_path'])
+        **MODEL_SPEC['args']['landcover_to_crop_table_path'])
     bad_crop_name_list = []
     for crop_name in crop_to_landcover_df.index:
         crop_climate_bin_raster_path = os.path.join(
@@ -549,9 +549,9 @@ def execute(args):
         climate_percentile_yield_table_path = os.path.join(
             args['model_data_path'],
             _CLIMATE_PERCENTILE_TABLE_PATTERN % crop_name)
-        crop_climate_percentile_df = utils.read_csv_to_dataframe(
+        crop_climate_percentile_df = validation.get_validated_dataframe(
             climate_percentile_yield_table_path,
-            MODEL_SPEC['args']['model_data_path']['contents'][
+            **MODEL_SPEC['args']['model_data_path']['contents'][
                 'climate_percentile_yield_tables']['contents'][
                 '[CROP]_percentile_yield_table.csv'])
         yield_percentile_headers = [
@@ -707,9 +707,9 @@ def execute(args):
 
     # both 'crop_nutrient.csv' and 'crop' are known data/header values for
     # this model data.
-    nutrient_df = utils.read_csv_to_dataframe(
+    nutrient_df = validation.get_validated_dataframe(
         os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
-        MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv'])
+        **MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv'])
     result_table_path = os.path.join(
         output_dir, 'result_table%s.csv' % file_suffix)
 
diff --git a/src/natcap/invest/crop_production_regression.py b/src/natcap/invest/crop_production_regression.py
index da29ec52f..4b10ffdd3 100644
--- a/src/natcap/invest/crop_production_regression.py
+++ b/src/natcap/invest/crop_production_regression.py
@@ -495,13 +495,13 @@ def execute(args):
 
     LOGGER.info(
         "Checking if the landcover raster is missing lucodes")
-    crop_to_landcover_df = utils.read_csv_to_dataframe(
+    crop_to_landcover_df = validation.get_validated_dataframe(
         args['landcover_to_crop_table_path'],
-        MODEL_SPEC['args']['landcover_to_crop_table_path'])
+        **MODEL_SPEC['args']['landcover_to_crop_table_path'])
 
-    crop_to_fertilization_rate_df = utils.read_csv_to_dataframe(
+    crop_to_fertilization_rate_df = validation.get_validated_dataframe(
         args['fertilization_rate_table_path'],
-        MODEL_SPEC['args']['fertilization_rate_table_path'])
+        **MODEL_SPEC['args']['fertilization_rate_table_path'])
 
     crop_lucodes = list(crop_to_landcover_df[_EXPECTED_LUCODE_TABLE_HEADER])
 
@@ -576,10 +576,10 @@ def execute(args):
             task_name='crop_climate_bin')
         dependent_task_list.append(crop_climate_bin_task)
 
-        crop_regression_df = utils.read_csv_to_dataframe(
+        crop_regression_df = validation.get_validated_dataframe(
             os.path.join(args['model_data_path'],
                          _REGRESSION_TABLE_PATTERN % crop_name),
-            MODEL_SPEC['args']['model_data_path']['contents'][
+            **MODEL_SPEC['args']['model_data_path']['contents'][
                 'climate_regression_yield_tables']['contents'][
                 '[CROP]_regression_yield_table.csv'])
         for _, row in crop_regression_df.iterrows():
@@ -803,9 +803,9 @@ def execute(args):
 
     # both 'crop_nutrient.csv' and 'crop' are known data/header values for
     # this model data.
-    nutrient_df = utils.read_csv_to_dataframe(
+    nutrient_df = validation.get_validated_dataframe(
         os.path.join(args['model_data_path'], 'crop_nutrient.csv'),
-        MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv'])
+        **MODEL_SPEC['args']['model_data_path']['contents']['crop_nutrient.csv'])
 
     LOGGER.info("Generating report table")
     crop_names = list(crop_to_landcover_df.index)
diff --git a/src/natcap/invest/datastack.py b/src/natcap/invest/datastack.py
index 4a9cd52d4..461a12d60 100644
--- a/src/natcap/invest/datastack.py
+++ b/src/natcap/invest/datastack.py
@@ -35,6 +35,7 @@
 from osgeo import gdal
 
 from . import utils
+from . import validation
 
 try:
     from . import __version__
@@ -333,8 +334,8 @@ def build_datastack_archive(args, model_name, datastack_path):
                 contained_files_dir = os.path.join(
                     data_dir, f'{key}_csv_data')
 
-                dataframe = utils.read_csv_to_dataframe(
-                    source_path, args_spec[key])
+                dataframe = validation.get_validated_dataframe(
+                    source_path, **args_spec[key])
                 csv_source_dir = os.path.abspath(os.path.dirname(source_path))
                 for spatial_column_name in spatial_columns:
                     # Iterate through the spatial columns, identify the set of
diff --git a/src/natcap/invest/forest_carbon_edge_effect.py b/src/natcap/invest/forest_carbon_edge_effect.py
index ee73f3ac4..bb0fb399d 100644
--- a/src/natcap/invest/forest_carbon_edge_effect.py
+++ b/src/natcap/invest/forest_carbon_edge_effect.py
@@ -413,9 +413,9 @@ def execute(args):
     # Map non-forest landcover codes to carbon biomasses
     LOGGER.info('Calculating direct mapped carbon stocks')
     carbon_maps = []
-    biophysical_df = utils.read_csv_to_dataframe(
+    biophysical_df = validation.get_validated_dataframe(
         args['biophysical_table_path'],
-        MODEL_SPEC['args']['biophysical_table_path'])
+        **MODEL_SPEC['args']['biophysical_table_path'])
     pool_list = [('c_above', True)]
     if args['pools_to_calculate'] == 'all':
         pool_list.extend([
@@ -624,8 +624,8 @@ def _calculate_lulc_carbon_map(
 
     """
     # classify forest pixels from lulc
-    biophysical_df = utils.read_csv_to_dataframe(
-        biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path'])
+    biophysical_df = validation.get_validated_dataframe(
+        biophysical_table_path, **MODEL_SPEC['args']['biophysical_table_path'])
 
     lucode_to_per_cell_carbon = {}
     cell_size = pygeoprocessing.get_raster_info(
@@ -688,8 +688,8 @@ def _map_distance_from_tropical_forest_edge(
 
     """
     # Build a list of forest lucodes
-    biophysical_df = utils.read_csv_to_dataframe(
-        biophysical_table_path, MODEL_SPEC['args']['biophysical_table_path'])
+    biophysical_df = validation.get_validated_dataframe(
+        biophysical_table_path, **MODEL_SPEC['args']['biophysical_table_path'])
     forest_codes = biophysical_df[biophysical_df['is_tropical_forest']].index.values
 
     # Make a raster where 1 is non-forest landcover types and 0 is forest
diff --git a/src/natcap/invest/habitat_quality.py b/src/natcap/invest/habitat_quality.py
index 3f8de3f7e..993edeb9a 100644
--- a/src/natcap/invest/habitat_quality.py
+++ b/src/natcap/invest/habitat_quality.py
@@ -372,12 +372,12 @@ def execute(args):
 
     LOGGER.info("Checking Threat and Sensitivity tables for compliance")
     # Get CSVs as dictionaries and ensure the key is a string for threats.
-    threat_df = utils.read_csv_to_dataframe(
-        args['threats_table_path'], MODEL_SPEC['args']['threats_table_path']
+    threat_df = validation.get_validated_dataframe(
+        args['threats_table_path'], **MODEL_SPEC['args']['threats_table_path']
     ).fillna('')
-    sensitivity_df = utils.read_csv_to_dataframe(
+    sensitivity_df = validation.get_validated_dataframe(
         args['sensitivity_table_path'],
-        MODEL_SPEC['args']['sensitivity_table_path'])
+        **MODEL_SPEC['args']['sensitivity_table_path'])
 
     half_saturation_constant = float(args['half_saturation_constant'])
 
@@ -1086,12 +1086,12 @@ def validate(args, limit_to=None):
             "sensitivity_table_path" not in invalid_keys and
             "threat_raster_folder" not in invalid_keys):
         # Get CSVs as dictionaries and ensure the key is a string for threats.
-        threat_df = utils.read_csv_to_dataframe(
+        threat_df = validation.get_validated_dataframe(
                 args['threats_table_path'],
-                MODEL_SPEC['args']['threats_table_path']).fillna('')
-        sensitivity_df = utils.read_csv_to_dataframe(
+                **MODEL_SPEC['args']['threats_table_path']).fillna('')
+        sensitivity_df = validation.get_validated_dataframe(
             args['sensitivity_table_path'],
-            MODEL_SPEC['args']['sensitivity_table_path'])
+            **MODEL_SPEC['args']['sensitivity_table_path'])
 
         # check that the threat names in the threats table match with the
         # threats columns in the sensitivity table.
diff --git a/src/natcap/invest/hra.py b/src/natcap/invest/hra.py
index 99a8f035c..1b964f499 100644
--- a/src/natcap/invest/hra.py
+++ b/src/natcap/invest/hra.py
@@ -551,8 +551,7 @@ def execute(args):
                 f"  Missing from criteria table: {missing_from_criteria_table}"
             )
 
-    criteria_df = pandas.read_csv(composite_criteria_table_path,
-                                  index_col=False)
+    criteria_df = utils.read_csv_to_dataframe(composite_criteria_table_path)
     # Because criteria may be spatial, we need to prepare those spatial inputs
     # as well.
     spatial_criteria_attrs = {}
@@ -1786,8 +1785,8 @@ def _parse_info_table(info_table_path):
     info_table_path = os.path.abspath(info_table_path)
 
     try:
-        table = utils.read_csv_to_dataframe(
-            info_table_path, MODEL_SPEC['args']['info_table_path'])
+        table = validation.get_validated_dataframe(
+            info_table_path, **MODEL_SPEC['args']['info_table_path'])
     except ValueError as err:
         if 'Index has duplicate keys' in str(err):
             raise ValueError("Habitat and stressor names may not overlap.")
@@ -1829,8 +1828,8 @@ def _parse_criteria_table(criteria_table_path, target_composite_csv_path):
     """
     # This function requires that the table is read as a numpy array, so it's
     # easiest to read the table directly.
-    table = pandas.read_csv(criteria_table_path, header=None, sep=None,
-                            engine='python').to_numpy()
+    table = utils.read_csv_to_dataframe(
+        criteria_table_path, header=None).to_numpy()
 
     # clean up any leading or trailing whitespace.
     for row_num in range(table.shape[0]):
@@ -2379,8 +2378,8 @@ def _override_datastack_archive_criteria_table_path(
         the data dir.
     """
     args_key = 'criteria_table_path'
-    criteria_table_array = pandas.read_csv(
-        criteria_table_path, header=None, sep=None, engine='python').to_numpy()
+    criteria_table_array = utils.read_csv_to_dataframe(
+        criteria_table_path, header=None).to_numpy()
     contained_data_dir = os.path.join(data_dir, f'{args_key}_data')
 
     known_rating_cols = set()
diff --git a/src/natcap/invest/ndr/ndr.py b/src/natcap/invest/ndr/ndr.py
index 9d7370631..4958a6252 100644
--- a/src/natcap/invest/ndr/ndr.py
+++ b/src/natcap/invest/ndr/ndr.py
@@ -1,4 +1,5 @@
 """InVEST Nutrient Delivery Ratio (NDR) module."""
+import copy
 import itertools
 import logging
 import os
@@ -574,9 +575,9 @@ def execute(args):
         if args['calc_' + nutrient_id]:
             nutrients_to_process.append(nutrient_id)
 
-    biophysical_df = utils.read_csv_to_dataframe(
+    biophysical_df = validation.get_validated_dataframe(
         args['biophysical_table_path'],
-        MODEL_SPEC['args']['biophysical_table_path'])
+        **MODEL_SPEC['args']['biophysical_table_path'])
 
     # these are used for aggregation in the last step
     field_pickle_map = {}
@@ -1161,39 +1162,30 @@ def validate(args, limit_to=None):
             be an empty list if validation succeeds.
 
     """
+    spec_copy = copy.deepcopy(MODEL_SPEC['args'])
+    # Check required fields given the state of ``calc_n`` and ``calc_p``
+    nutrients_selected = []
+    for nutrient_letter in ('n', 'p'):
+        if f'calc_{nutrient_letter}' in args and args[f'calc_{nutrient_letter}']:
+            nutrients_selected.append(nutrient_letter)
+
+    for param in ['load', 'eff', 'crit_len']:
+        for nutrient in nutrients_selected:
+            spec_copy['biophysical_table_path']['columns'][f'{param}_{nutrient}'] = (
+                spec_copy['biophysical_table_path']['columns'][f'{param}_[NUTRIENT]'])
+            spec_copy['biophysical_table_path']['columns'][f'{param}_{nutrient}']['required'] = True
+        spec_copy['biophysical_table_path']['columns'].pop(f'{param}_[NUTRIENT]')
+
+    if 'n' in nutrients_selected:
+        spec_copy['biophysical_table_path']['columns']['proportion_subsurface_n'][
+            'required'] = True
+
     validation_warnings = validation.validate(
-        args, MODEL_SPEC['args'], MODEL_SPEC['args_with_spatial_overlap'])
-
-    invalid_keys = validation.get_invalid_keys(validation_warnings)
-
-    LOGGER.debug('Starting logging for biophysical table')
-    if 'biophysical_table_path' not in invalid_keys:
-        # Check required fields given the state of ``calc_n`` and ``calc_p``
-        nutrient_required_fields = ['lucode']
-        nutrients_selected = set()
-        for nutrient_letter in ('n', 'p'):
-            if nutrient_letter == 'n':
-                nutrient_required_fields += ['proportion_subsurface_n']
-            do_nutrient_key = f'calc_{nutrient_letter}'
-            if do_nutrient_key in args and args[do_nutrient_key]:
-                nutrients_selected.add(do_nutrient_key)
-                nutrient_required_fields += [
-                    f'load_{nutrient_letter}',
-                    f'eff_{nutrient_letter}',
-                    f'crit_len_{nutrient_letter}'
-                ]
-        if not nutrients_selected:
-            validation_warnings.append(
-                (['calc_n', 'calc_p'], MISSING_NUTRIENT_MSG))
-
-        # Check that these nutrient-specific keys are in the table
-        # validate has already checked all the other keys
-        error_msg = validation.check_csv(
-            args['biophysical_table_path'],
-            columns={key: '' for key in nutrient_required_fields})
-        if error_msg:
-            validation_warnings.append(
-                (['biophysical_table_path'], error_msg))
+        args, spec_copy, MODEL_SPEC['args_with_spatial_overlap'])
+
+    if not nutrients_selected:
+        validation_warnings.append(
+            (['calc_n', 'calc_p'], MISSING_NUTRIENT_MSG))
 
     return validation_warnings
 
diff --git a/src/natcap/invest/pollination.py b/src/natcap/invest/pollination.py
index c5e5fc369..f6fe6d221 100644
--- a/src/natcap/invest/pollination.py
+++ b/src/natcap/invest/pollination.py
@@ -1205,8 +1205,8 @@ def _parse_scenario_variables(args):
     else:
         farm_vector_path = None
 
-    guild_df = utils.read_csv_to_dataframe(
-        guild_table_path, MODEL_SPEC['args']['guild_table_path'])
+    guild_df = validation.get_validated_dataframe(
+        guild_table_path, **MODEL_SPEC['args']['guild_table_path'])
 
     LOGGER.info('Checking to make sure guild table has all expected headers')
     for header in _EXPECTED_GUILD_HEADERS:
@@ -1217,9 +1217,9 @@ def _parse_scenario_variables(args):
                 f"'{header}' but was unable to find one. Here are all the "
                 f"headers from {guild_table_path}: {', '.join(guild_df.columns)}")
 
-    landcover_biophysical_df = utils.read_csv_to_dataframe(
+    landcover_biophysical_df = validation.get_validated_dataframe(
         landcover_biophysical_table_path,
-        MODEL_SPEC['args']['landcover_biophysical_table_path'])
+        **MODEL_SPEC['args']['landcover_biophysical_table_path'])
     biophysical_table_headers = landcover_biophysical_df.columns
     for header in _EXPECTED_BIOPHYSICAL_HEADERS:
         matches = re.findall(header, " ".join(biophysical_table_headers))
diff --git a/src/natcap/invest/recreation/recmodel_client.py b/src/natcap/invest/recreation/recmodel_client.py
index f212ecb82..c695eef54 100644
--- a/src/natcap/invest/recreation/recmodel_client.py
+++ b/src/natcap/invest/recreation/recmodel_client.py
@@ -842,8 +842,8 @@ def _schedule_predictor_data_processing(
         'line_intersect_length': _line_intersect_length,
     }
 
-    predictor_df = utils.read_csv_to_dataframe(
-        predictor_table_path, MODEL_SPEC['args']['predictor_table_path'])
+    predictor_df = validation.get_validated_dataframe(
+        predictor_table_path, **MODEL_SPEC['args']['predictor_table_path'])
     predictor_task_list = []
     predictor_json_list = []  # tracks predictor files to add to shp
 
@@ -1530,8 +1530,8 @@ def _validate_same_id_lengths(table_path):
         string message if IDs are too long
 
     """
-    predictor_df = utils.read_csv_to_dataframe(
-        table_path, MODEL_SPEC['args']['predictor_table_path'])
+    predictor_df = validation.get_validated_dataframe(
+        table_path, **MODEL_SPEC['args']['predictor_table_path'])
     too_long = set()
     for p_id in predictor_df.index:
         if len(p_id) > 10:
@@ -1559,12 +1559,12 @@ def _validate_same_ids_and_types(
         string message if any of the fields in 'id' and 'type' don't match
         between tables.
     """
-    predictor_df = utils.read_csv_to_dataframe(
-        predictor_table_path, MODEL_SPEC['args']['predictor_table_path'])
+    predictor_df = validation.get_validated_dataframe(
+        predictor_table_path, **MODEL_SPEC['args']['predictor_table_path'])
 
-    scenario_predictor_df = utils.read_csv_to_dataframe(
+    scenario_predictor_df = validation.get_validated_dataframe(
         scenario_predictor_table_path,
-        MODEL_SPEC['args']['scenario_predictor_table_path'])
+        **MODEL_SPEC['args']['scenario_predictor_table_path'])
 
     predictor_pairs = set([
         (p_id, row['type']) for p_id, row in predictor_df.iterrows()])
@@ -1589,8 +1589,8 @@ def _validate_same_projection(base_vector_path, table_path):
     """
     # This will load the table as a list of paths which we can iterate through
     # without bothering the rest of the table structure
-    data_paths = utils.read_csv_to_dataframe(
-        table_path, MODEL_SPEC['args']['predictor_table_path']
+    data_paths = validation.get_validated_dataframe(
+        table_path, **MODEL_SPEC['args']['predictor_table_path']
     )['path'].tolist()
 
     base_vector = gdal.OpenEx(base_vector_path, gdal.OF_VECTOR)
@@ -1640,8 +1640,8 @@ def _validate_predictor_types(table_path):
         string message if any value in the ``type`` column does not match a
         valid type, ignoring leading/trailing whitespace.
     """
-    df = utils.read_csv_to_dataframe(
-        table_path, MODEL_SPEC['args']['predictor_table_path'])
+    df = validation.get_validated_dataframe(
+        table_path, **MODEL_SPEC['args']['predictor_table_path'])
     # ignore leading/trailing whitespace because it will be removed
     # when the type values are used
     valid_types = set({'raster_mean', 'raster_sum', 'point_count',
diff --git a/src/natcap/invest/sdr/sdr.py b/src/natcap/invest/sdr/sdr.py
index 5833ac174..9a8ad2d66 100644
--- a/src/natcap/invest/sdr/sdr.py
+++ b/src/natcap/invest/sdr/sdr.py
@@ -494,8 +494,9 @@ def execute(args):
 
     """
     file_suffix = utils.make_suffix_string(args, 'results_suffix')
-    biophysical_df = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path'])
+    biophysical_df = validation.get_validated_dataframe(
+        args['biophysical_table_path'],
+        **MODEL_SPEC['args']['biophysical_table_path'])
 
     # Test to see if c or p values are outside of 0..1
     for key in ['usle_c', 'usle_p']:
diff --git a/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py b/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py
index e969be9e0..4f8d7396a 100644
--- a/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py
+++ b/src/natcap/invest/seasonal_water_yield/seasonal_water_yield.py
@@ -587,19 +587,19 @@ def execute(args):
     # fail early on a missing required rain events table
     if (not args['user_defined_local_recharge'] and
             not args['user_defined_climate_zones']):
-        rain_events_df = utils.read_csv_to_dataframe(
+        rain_events_df = validation.get_validated_dataframe(
             args['rain_events_table_path'],
-            MODEL_SPEC['args']['rain_events_table_path'])
+            **MODEL_SPEC['args']['rain_events_table_path'])
 
-    biophysical_df = utils.read_csv_to_dataframe(
+    biophysical_df = validation.get_validated_dataframe(
         args['biophysical_table_path'],
-        MODEL_SPEC['args']['biophysical_table_path'])
+        **MODEL_SPEC['args']['biophysical_table_path'])
 
     if args['monthly_alpha']:
         # parse out the alpha lookup table of the form (month_id: alpha_val)
-        alpha_month_map = utils.read_csv_to_dataframe(
+        alpha_month_map = validation.get_validated_dataframe(
             args['monthly_alpha_path'],
-            MODEL_SPEC['args']['monthly_alpha_path']
+            **MODEL_SPEC['args']['monthly_alpha_path']
         )['alpha'].to_dict()
     else:
         # make all 12 entries equal to args['alpha_m']
@@ -766,9 +766,9 @@ def execute(args):
             'table_name': 'Climate Zone'}
         for month_id in range(N_MONTHS):
             if args['user_defined_climate_zones']:
-                cz_rain_events_df = utils.read_csv_to_dataframe(
+                cz_rain_events_df = validation.get_validated_dataframe(
                     args['climate_zone_table_path'],
-                    MODEL_SPEC['args']['climate_zone_table_path'])
+                    **MODEL_SPEC['args']['climate_zone_table_path'])
                 climate_zone_rain_events_month = (
                     cz_rain_events_df[MONTH_ID_TO_LABEL[month_id]].to_dict())
                 n_events_task = task_graph.add_task(
diff --git a/src/natcap/invest/stormwater.py b/src/natcap/invest/stormwater.py
index 5bd7f5366..ba10e31d2 100644
--- a/src/natcap/invest/stormwater.py
+++ b/src/natcap/invest/stormwater.py
@@ -486,8 +486,8 @@ def execute(args):
     # Build a lookup dictionary mapping each LULC code to its row
     # sort by the LULC codes upfront because we use the sorted list in multiple
     # places. it's more efficient to do this once.
-    biophysical_df = utils.read_csv_to_dataframe(
-        args['biophysical_table'], MODEL_SPEC['args']['biophysical_table']
+    biophysical_df = validation.get_validated_dataframe(
+        args['biophysical_table'], **MODEL_SPEC['args']['biophysical_table']
     ).sort_index()
     sorted_lucodes = biophysical_df.index.to_list()
 
diff --git a/src/natcap/invest/urban_cooling_model.py b/src/natcap/invest/urban_cooling_model.py
index 1f1f86b53..1581d7521 100644
--- a/src/natcap/invest/urban_cooling_model.py
+++ b/src/natcap/invest/urban_cooling_model.py
@@ -413,8 +413,9 @@ def execute(args):
     intermediate_dir = os.path.join(
         args['workspace_dir'], 'intermediate')
     utils.make_directories([args['workspace_dir'], intermediate_dir])
-    biophysical_df = utils.read_csv_to_dataframe(
-        args['biophysical_table_path'], MODEL_SPEC['args']['biophysical_table_path'])
+    biophysical_df = validation.get_validated_dataframe(
+        args['biophysical_table_path'],
+        **MODEL_SPEC['args']['biophysical_table_path'])
 
     # cast to float and calculate relative weights
     # Use default weights for shade, albedo, eti if the user didn't provide
@@ -1084,9 +1085,9 @@ def calculate_energy_savings(
                   for field in target_building_layer.schema]
     type_field_index = fieldnames.index('type')
 
-    energy_consumption_df = utils.read_csv_to_dataframe(
+    energy_consumption_df = validation.get_validated_dataframe(
         energy_consumption_table_path,
-        MODEL_SPEC['args']['energy_consumption_table_path'])
+        **MODEL_SPEC['args']['energy_consumption_table_path'])
 
     target_building_layer.StartTransaction()
     last_time = time.time()
diff --git a/src/natcap/invest/urban_flood_risk_mitigation.py b/src/natcap/invest/urban_flood_risk_mitigation.py
index 7703dfcd1..7f865e5e3 100644
--- a/src/natcap/invest/urban_flood_risk_mitigation.py
+++ b/src/natcap/invest/urban_flood_risk_mitigation.py
@@ -298,9 +298,9 @@ def execute(args):
         task_name='align raster stack')
 
     # Load CN table
-    cn_df = utils.read_csv_to_dataframe(
+    cn_df = validation.get_validated_dataframe(
         args['curve_number_table_path'],
-        MODEL_SPEC['args']['curve_number_table_path'])
+        **MODEL_SPEC['args']['curve_number_table_path'])
 
     # make cn_table into a 2d array where first dim is lucode, second is
     # 0..3 to correspond to CN_A..CN_D
@@ -640,9 +640,9 @@ def _calculate_damage_to_infrastructure_in_aoi(
     infrastructure_vector = gdal.OpenEx(structures_vector_path, gdal.OF_VECTOR)
     infrastructure_layer = infrastructure_vector.GetLayer()
 
-    damage_type_map = utils.read_csv_to_dataframe(
+    damage_type_map = validation.get_validated_dataframe(
         structures_damage_table,
-        MODEL_SPEC['args']['infrastructure_damage_loss_table_path']
+        **MODEL_SPEC['args']['infrastructure_damage_loss_table_path']
     )['damage'].to_dict()
 
     infrastructure_layer_defn = infrastructure_layer.GetLayerDefn()
@@ -942,9 +942,9 @@ def validate(args, limit_to=None):
     if ("curve_number_table_path" not in invalid_keys and
             "curve_number_table_path" in sufficient_keys):
         # Load CN table. Resulting DF has index and CN_X columns only.
-        cn_df = utils.read_csv_to_dataframe(
+        cn_df = validation.get_validated_dataframe(
             args['curve_number_table_path'],
-            MODEL_SPEC['args']['curve_number_table_path'])
+            **MODEL_SPEC['args']['curve_number_table_path'])
         # Check for NaN values.
         nan_mask = cn_df.isna()
         if nan_mask.any(axis=None):
diff --git a/src/natcap/invest/urban_nature_access.py b/src/natcap/invest/urban_nature_access.py
index f02a44361..c0fc76296 100644
--- a/src/natcap/invest/urban_nature_access.py
+++ b/src/natcap/invest/urban_nature_access.py
@@ -924,9 +924,9 @@ def execute(args):
                     aoi_reprojection_task, lulc_mask_task]
             )
 
-    attr_table = utils.read_csv_to_dataframe(
+    attr_table = validation.get_validated_dataframe(
         args['lulc_attribute_table'],
-        MODEL_SPEC['args']['lulc_attribute_table'])
+        **MODEL_SPEC['args']['lulc_attribute_table'])
     kernel_paths = {}  # search_radius, kernel path
     kernel_tasks = {}  # search_radius, kernel task
 
@@ -944,9 +944,9 @@ def execute(args):
         lucode_to_search_radii = list(
             urban_nature_attrs[['search_radius_m']].itertuples(name=None))
     elif args['search_radius_mode'] == RADIUS_OPT_POP_GROUP:
-        pop_group_table = utils.read_csv_to_dataframe(
+        pop_group_table = validation.get_validated_dataframe(
             args['population_group_radii_table'],
-            MODEL_SPEC['args']['population_group_radii_table'])
+            **MODEL_SPEC['args']['population_group_radii_table'])
         search_radii = set(pop_group_table['search_radius_m'].unique())
         # Build a dict of {pop_group: search_radius_m}
         search_radii_by_pop_group = pop_group_table['search_radius_m'].to_dict()
@@ -1835,8 +1835,8 @@ def _reclassify_urban_nature_area(
     Returns:
         ``None``
     """
-    lulc_attribute_df = utils.read_csv_to_dataframe(
-        lulc_attribute_table, MODEL_SPEC['args']['lulc_attribute_table'])
+    lulc_attribute_df = validation.get_validated_dataframe(
+        lulc_attribute_table, **MODEL_SPEC['args']['lulc_attribute_table'])
 
     squared_pixel_area = abs(
         numpy.multiply(*_square_off_pixels(lulc_raster_path)))
diff --git a/src/natcap/invest/utils.py b/src/natcap/invest/utils.py
index d845444e0..9ee61a060 100644
--- a/src/natcap/invest/utils.py
+++ b/src/natcap/invest/utils.py
@@ -410,125 +410,48 @@ def expand_path(path, base_path):
     return os.path.abspath(os.path.join(os.path.dirname(base_path), path))
 
 
-def read_csv_to_dataframe(path, spec, **kwargs):
+def read_csv_to_dataframe(path, **kwargs):
     """Return a dataframe representation of the CSV.
 
-    Wrapper around ``pandas.read_csv`` that performs some common data cleaning
-    based on information in the arg spec.
-
-    Columns are filtered to just those that match a pattern in the spec.
-    Column names are lowercased and whitespace is stripped off. Empty rows are
-    dropped. Values in each column are processed and cast to an appropriate
-    dtype according to the type in the spec:
-
-    - Values in raster, vector, csv, file, and directory columns are cast to
-      str, whitespace stripped, and expanded as paths relative to the input path
-    - Values in freestyle_string and option_string columns are cast to str,
-      whitespace stripped, and converted to lowercase
-    - Values in number, ratio, and percent columns are cast to float
-    - Values in integer columns are cast to int
-    - Values in boolean columns are cast to bool
-
-    Empty or NA cells are returned as ``numpy.nan`` (for floats) or
-    ``pandas.NA`` (for all other types).
-
-    Also sets custom defaults for some kwargs passed to ``pandas.read_csv``,
-    which you can override with kwargs:
+    Wrapper around ``pandas.read_csv`` that performs some common data cleaning.
+    Column names are lowercased and whitespace is stripped off. Empty rows and
+    columns are dropped. Sets custom defaults for some kwargs passed to
+    ``pandas.read_csv``, which you can override with kwargs:
 
     - sep=None: lets the Python engine infer the separator
     - engine='python': The 'python' engine supports the sep=None option.
     - encoding='utf-8-sig': 'utf-8-sig' handles UTF-8 with or without BOM.
+    - index_col=False: force pandas not to index by any column, useful in
+        case of trailing separators
 
     Args:
         path (str): path to a CSV file
-        spec (dict): dictionary specifying the structure of the CSV table
         **kwargs: additional kwargs will be passed to ``pandas.read_csv``
 
     Returns:
         pandas.DataFrame with the contents of the given CSV
     """
-    # build up a list of regex patterns to match columns against columns from
-    # the table that match a pattern in this list (after stripping whitespace
-    # and lowercasing) will be included in the dataframe
-    patterns = []
-    for column in spec['columns']:
-        column = column.lower()
-        match = re.match(r'(.*)\[(.+)\](.*)', column)
-        if match:
-            # for column name patterns, convert it to a regex pattern
-            groups = match.groups()
-            patterns.append(f'{groups[0]}(.+){groups[2]}')
-        else:
-            # for regular column names, use the exact name as the pattern
-            patterns.append(column.replace('(', '\(').replace(')', '\)'))
-
     try:
-        # set index_col=False to force pandas not to index by any column
-        # this is useful in case of trailing separators
-        # we'll explicitly set the index column later on
         df = pandas.read_csv(
             path,
-            index_col=False,
-            usecols=lambda col: any(
-                re.fullmatch(pattern, col.strip().lower()) for pattern in patterns
-            ),
             **{
+                'index_col': False,
                 'sep': None,
                 'engine': 'python',
                 'encoding': 'utf-8-sig',
                 **kwargs
             })
     except UnicodeDecodeError as error:
-        LOGGER.error(
+        raise ValueError(
             f'The file {path} must be encoded as UTF-8 or ASCII')
-        raise error
+
+    # drop columns whose header is NA
+    df = df[[col for col in df.columns if not pandas.isna(col)]]
 
     # strip whitespace from column names and convert to lowercase
     # this won't work on integer types, which happens if you set header=None
     # however, there's little reason to use this function if there's no header
-    df.columns = df.columns.str.strip().str.lower()
-
-    # drop any empty rows
-    df = df.dropna(how="all")
-
-    available_cols = set(df.columns)
-
-    for col_spec, pattern in zip(spec['columns'].values(), patterns):
-        matching_cols = [c for c in available_cols if re.match(pattern, c)]
-        available_cols -= set(matching_cols)
-        for col in matching_cols:
-            try:
-                if col_spec['type'] in ['csv', 'directory', 'file', 'raster', 'vector', {'vector', 'raster'}]:
-                    df[col] = df[col].apply(
-                        lambda p: p if pandas.isna(p) else expand_path(str(p).strip(), path))
-                    df[col] = df[col].astype(pandas.StringDtype())
-                elif col_spec['type'] in {'freestyle_string', 'option_string'}:
-                    df[col] = df[col].apply(
-                        lambda s: s if pandas.isna(s) else str(s).strip().lower())
-                    df[col] = df[col].astype(pandas.StringDtype())
-                elif col_spec['type'] in {'number', 'percent', 'ratio'}:
-                    df[col] = df[col].astype(float)
-                elif col_spec['type'] == 'integer':
-                    df[col] = df[col].astype(pandas.Int64Dtype())
-                elif col_spec['type'] == 'boolean':
-                    df[col] = df[col].astype('boolean')
-            except ValueError as err:
-                raise ValueError(
-                    f'Value(s) in the "{col}" column of the table {path} '
-                    f'could not be interpreted as {col_spec["type"]}s. '
-                    f'Original error: {err}')
-
-     # set the index column, if specified
-    if 'index_col' in spec and spec['index_col'] is not None:
-        index_col = spec['index_col'].lower()
-        try:
-            df = df.set_index(index_col, verify_integrity=True)
-        except KeyError:
-            # If 'index_col' is not a column then KeyError is raised for using
-            # it as the index column
-            LOGGER.error(f"The column '{index_col}' could not be found "
-                         f"in the table {path}")
-            raise
+    df.columns = df.columns.astype(str).str.strip().str.lower()
 
     return df
 
diff --git a/src/natcap/invest/validation.py b/src/natcap/invest/validation.py
index b98e193de..12278b91e 100644
--- a/src/natcap/invest/validation.py
+++ b/src/natcap/invest/validation.py
@@ -33,8 +33,10 @@
     'MISSING_VALUE': gettext('Input is required but has no value'),
     'MATCHED_NO_HEADERS': gettext('Expected the {header} "{header_name}" but did '
                             'not find it'),
+    'PATTERN_MATCHED_NONE': gettext('Expected to find at least one {header} matching '
+                                    'the pattern "{header_name}" but found none'),
     'DUPLICATE_HEADER': gettext('Expected the {header} "{header_name}" only once '
-                          'but found it {number} times'),
+                                'but found it {number} times'),
     'NOT_A_NUMBER': gettext('Value "{value}" could not be interpreted as a number'),
     'WRONG_PROJECTION_UNIT': gettext('Layer must be projected in this unit: '
                                '"{unit_a}" but found this unit: "{unit_b}"'),
@@ -47,8 +49,6 @@
     'NOT_GDAL_RASTER': gettext('File could not be opened as a GDAL raster'),
     'OVR_FILE': gettext('File found to be an overview ".ovr" file.'),
     'NOT_GDAL_VECTOR': gettext('File could not be opened as a GDAL vector'),
-    'NOT_CSV': gettext('File could not be opened as a CSV. File must be encoded as '
-                 'a UTF-8 CSV.'),
     'REGEXP_MISMATCH': gettext("Value did not match expected pattern {regexp}"),
     'INVALID_OPTION': gettext("Value must be one of: {option_list}"),
     'INVALID_VALUE': gettext('Value does not meet condition {condition}'),
@@ -572,19 +572,111 @@ def check_boolean(value, **kwargs):
         return MESSAGES['NOT_BOOLEAN'].format(value=value)
 
 
-def check_csv(filepath, rows=None, columns=None, **kwargs):
+def get_validated_dataframe(csv_path, columns=None, rows=None, index_col=None,
+        read_csv_kwargs={}, **kwargs):
+    """Read a CSV into a dataframe that is guaranteed to match the spec."""
+
+    if not (columns or rows):
+        raise ValueError('One of columns or rows must be provided')
+
+    # build up a list of regex patterns to match columns against columns from
+    # the table that match a pattern in this list (after stripping whitespace
+    # and lowercasing) will be included in the dataframe
+    axis = 'column' if columns else 'row'
+
+    if rows:
+        read_csv_kwargs = read_csv_kwargs.copy()
+        read_csv_kwargs['header'] = None
+
+    df = utils.read_csv_to_dataframe(csv_path, **read_csv_kwargs)
+
+    if rows:
+        # swap rows and column
+        df = df.set_index(df.columns[0]).rename_axis(None, axis=0).T.reset_index(drop=True)
+
+    spec = columns if columns else rows
+
+    patterns = []
+    for column in spec:
+        column = column.lower()
+        match = re.match(r'(.*)\[(.+)\](.*)', column)
+        if match:
+            # for column name patterns, convert it to a regex pattern
+            groups = match.groups()
+            patterns.append(f'{groups[0]}(.+){groups[2]}')
+        else:
+            # for regular column names, use the exact name as the pattern
+            patterns.append(column.replace('(', '\(').replace(')', '\)'))
+
+    # select only the columns that match a pattern
+    df = df[[col for col in df.columns if any(
+        re.fullmatch(pattern, col) for pattern in patterns)]]
+
+    # drop any empty rows
+    df = df.dropna(how="all").reset_index(drop=True)
+
+    available_cols = set(df.columns)
+
+    for (col_name, col_spec), pattern in zip(spec.items(), patterns):
+        matching_cols = [c for c in available_cols if re.fullmatch(pattern, c)]
+        if col_spec.get('required', True) is True and '[' not in col_name and not matching_cols:
+            raise ValueError(MESSAGES['MATCHED_NO_HEADERS'].format(
+                header=axis,
+                header_name=col_name))
+        available_cols -= set(matching_cols)
+        for col in matching_cols:
+            try:
+                # frozenset needed to make the set hashable.  A frozenset and set with the same members are equal.
+                if col_spec['type'] in {'csv', 'directory', 'file', 'raster', 'vector', frozenset({'vector', 'raster'})}:
+                    df[col] = df[col].apply(
+                        lambda p: p if pandas.isna(p) else utils.expand_path(str(p).strip(), csv_path))
+                    df[col] = df[col].astype(pandas.StringDtype())
+                elif col_spec['type'] in {'freestyle_string', 'option_string'}:
+                    df[col] = df[col].apply(
+                        lambda s: s if pandas.isna(s) else str(s).strip().lower())
+                    df[col] = df[col].astype(pandas.StringDtype())
+                elif col_spec['type'] in {'number', 'percent', 'ratio'}:
+                    df[col] = df[col].astype(float)
+                elif col_spec['type'] == 'integer':
+                    df[col] = df[col].astype(pandas.Int64Dtype())
+                elif col_spec['type'] == 'boolean':
+                    df[col] = df[col].astype('boolean')
+                else:
+                    raise ValueError(f"Unknown type: {col_spec['type']}")
+            except Exception as err:
+                raise ValueError(
+                    f'Value(s) in the "{col}" column could not be interpreted '
+                    f'as {col_spec["type"]}s. Original error: {err}')
+
+    if any(df.columns.duplicated()):
+        duplicated_columns = df.columns[df.columns.duplicated]
+        return MESSAGES['DUPLICATE_HEADER'].format(
+            header=header_type,
+            header_name=expected,
+            number=count)
+
+     # set the index column, if specified
+    if index_col is not None:
+        index_col = index_col.lower()
+        try:
+            df = df.set_index(index_col, verify_integrity=True)
+        except KeyError:
+            # If 'index_col' is not a column then KeyError is raised for using
+            # it as the index column
+            LOGGER.error(f"The column '{index_col}' could not be found "
+                         f"in the table {csv_path}")
+            raise
+
+    return df
+
+
+
+
+def check_csv(filepath, **kwargs):
     """Validate a table.
 
     Args:
         filepath (string): The string filepath to the table.
-        rows (dict): A dictionary spec of row names that are expected to exist
-            in the first column of the table. See the docstring of
-            ``check_headers`` for details on validation rules. No more than one
-            of `rows` and `columns` should be defined.
-        columns (dict): A dictionary spec of column names that are expected to
-            exist in the first row of the table. See the docstring of
-            ``check_headers`` for details on validation rules. No more than one
-            of `rows` and `columns` should be defined.
 
     Returns:
         A string error message if an error was found. ``None`` otherwise.
@@ -593,28 +685,11 @@ def check_csv(filepath, rows=None, columns=None, **kwargs):
     file_warning = check_file(filepath, permissions='r')
     if file_warning:
         return file_warning
-
-    try:
-        # Check if the file encoding is UTF-8 BOM first
-        encoding = None
-        if utils.has_utf8_bom(filepath):
-            encoding = 'utf-8-sig'
-        # engine=python handles unknown characters by replacing them with a
-        # replacement character, instead of raising an error
-        # use sep=None, engine='python' to infer what the separator is
-        dataframe = pandas.read_csv(
-            filepath, sep=None, engine='python', encoding=encoding,
-            header=None)
-    except Exception:
-        return MESSAGES['NOT_CSV']
-
-    # assume that at most one of `rows` and `columns` is defined
-    if columns:
-        headers = [str(name).strip() for name in dataframe.iloc[0]]
-        return check_headers(get_headers_to_validate(columns), headers, 'column')
-    elif rows:
-        headers = [str(name).strip() for name in dataframe.iloc[:, 0]]
-        return check_headers(get_headers_to_validate(rows), headers, 'row')
+    if 'columns' in kwargs or 'rows' in kwargs:
+        try:
+            get_validated_dataframe(filepath, **kwargs)
+        except Exception as e:
+            return str(e)
 
 
 def check_headers(expected_headers, actual_headers, header_type='header'):
diff --git a/src/natcap/invest/wave_energy.py b/src/natcap/invest/wave_energy.py
index dc0a9a7ad..be91ea839 100644
--- a/src/natcap/invest/wave_energy.py
+++ b/src/natcap/invest/wave_energy.py
@@ -287,26 +287,22 @@
         },
         "machine_param_path": {
             "type": "csv",
-            "rows": {
-                "capmax": {
-                    "about": gettext("Maximum capacity for device."),
-                    "type": "number",
-                    "units": u.kilowatt
-                },
-                "hsmax": {
+            # use columns because of the non standard format of this table,
+            # we cannot validate it with the rows as headers.
+            "columns": {
+                "name": {
+                    "type": "freestyle_string",
                     "about": gettext(
-                        "Upper limit of wave height for device operation. The "
-                        "device shuts down when waves are higher than this."),
-                    "type": "number",
-                    "units": u.meter
+                        "Name of the machine parameter. Expected parameters are: "
+                        "'capmax' (maximum capacity for device, in kilowatts), "
+                        "'hsmax' (upper limit of wave height for device operation, "
+                        "in meters), and 'tpmax' (upper limit of wave period for "
+                        "device operation, in seconds).")
                 },
-                "tpmax": {
-                    "about": gettext(
-                        "Upper limit of wave period for device operation. The "
-                        "device shuts down when the wave period is longer "
-                        "than this."),
+                "value": {
                     "type": "number",
-                    "units": u.second
+                    "units": u.none,
+                    "about": gettext("Value of the machine parameter.")
                 }
             },
             "about": gettext("Table of parameters for the wave energy machine in use."),
@@ -335,52 +331,28 @@
         },
         "machine_econ_path": {
             "type": "csv",
-            "rows": {
-                "capmax": {
-                    "type": "number",
-                    "units": u.kilowatt,
-                    "about": gettext("Maximum capacity of the device.")
-                },
-                "cc": {
-                    "type": "number",
-                    "units": u.currency/u.kilowatt,
-                    "about": gettext("Capital cost per device installed.")
-                },
-                "cml": {
-                    "type": "number",
-                    "units": u.currency/u.meter,
-                    "about": gettext("Cost of mooring lines.")
-                },
-                "cul": {
-                    "type": "number",
-                    "units": u.currency/u.kilometer,
-                    "about": gettext("Cost of underwater cable.")
-                },
-                "col": {
-                    "type": "number",
-                    "units": u.currency/u.kilometer,
-                    "about": gettext("Cost of overland transmission lines.")
-                },
-                "omc": {
-                    "type": "number",
-                    "units": u.currency/u.kilowatt_hour,
-                    "about": gettext("Operating and maintenance cost.")
-                },
-                "p": {
-                    "type": "number",
-                    "units": u.currency/u.kilowatt_hour,
-                    "about": gettext("Price of electricity.")
-                },
-                "r": {
-                    "type": "ratio",
-                    "about": gettext("Discount rate.")
+            # use columns because of the non standard format of this table,
+            # we cannot validate it with the rows as headers.
+            "columns": {
+                "name": {
+                    "type": "freestyle_string",
+                    "about": gettext(
+                        "Name of the machine parameter. Expected parameters are: "
+                        "'capmax' (maximum capacity for device, in kilowatts), "
+                        "'cc' (capital cost per device installed, $/kilowatt), "
+                        "'cml' (cost of mooring lines, $/kilometer), "
+                        "'cul' (cost of underwater cable, $/kilometer), "
+                        "'col' (cost of overland transmission lines, $/kilometer), "
+                        "'omc' (operating and maintenance cost, $/kilowatt hour), "
+                        "'p' (price of electricity, $/kilowatt hour), "
+                        "'r' (discount rate, between 0 and 1), "
+                        "'smlpm' (number of slack lines required per machine)")
                 },
-                "smlpm": {
+                "value": {
                     "type": "number",
                     "units": u.none,
-                    "about": gettext("Number of slack lines required per machine.")
+                    "about": gettext("Value of the machine parameter.")
                 }
-
             },
             "required": "valuation_container",
             "about": gettext(
@@ -741,7 +713,7 @@ def execute(args):
     # arrays. Also store the amount of energy the machine produces
     # in a certain wave period/height state as a 2D array
     machine_perf_dict = {}
-    machine_perf_data = pandas.read_csv(args['machine_perf_path'])
+    machine_perf_data = utils.read_csv_to_dataframe(args['machine_perf_path'])
     # Get the wave period fields, starting from the second column of the table
     machine_perf_dict['periods'] = machine_perf_data.columns.values[1:]
     # Build up the height field by taking the first column of the table
@@ -769,14 +741,21 @@ def execute(args):
     LOGGER.debug('Machine Performance Rows : %s', machine_perf_dict['periods'])
     LOGGER.debug('Machine Performance Cols : %s', machine_perf_dict['heights'])
 
-    machine_param_dict = _machine_csv_to_dict(args['machine_param_path'])
+    machine_param_dict = validation.get_validated_dataframe(
+        args['machine_param_path'],
+        index_col='name',
+        columns={
+            'name': {'type': 'option_string'},
+            'value': {'type': 'number'}
+        },
+    )['value'].to_dict()
 
     # Check if required column fields are entered in the land grid csv file
     if 'land_gridPts_path' in args:
         # Create a grid_land_df dataframe for later use in valuation
-        grid_land_df = utils.read_csv_to_dataframe(
+        grid_land_df = validation.get_validated_dataframe(
             args['land_gridPts_path'],
-            MODEL_SPEC['args']['land_gridPts_path'])
+            **MODEL_SPEC['args']['land_gridPts_path'])
         missing_grid_land_fields = []
         for field in ['id', 'type', 'lat', 'long', 'location']:
             if field not in grid_land_df.columns:
@@ -788,7 +767,14 @@ def execute(args):
                 'Connection Points File: %s' % missing_grid_land_fields)
 
     if 'valuation_container' in args and args['valuation_container']:
-        machine_econ_dict = _machine_csv_to_dict(args['machine_econ_path'])
+        machine_econ_dict = validation.get_validated_dataframe(
+            args['machine_econ_path'],
+            index_col='name',
+            columns={
+                'name': {'type': 'option_string'},
+                'value': {'type': 'number'}
+            }
+        )['value'].to_dict()
 
     # Build up a dictionary of possible analysis areas where the key
     # is the analysis area selected and the value is a dictionary
@@ -1626,42 +1612,6 @@ def _binary_wave_data_to_dict(wave_file_path):
     return wave_dict
 
 
-def _machine_csv_to_dict(machine_csv_path):
-    """Create a dictionary from the table in machine csv file.
-
-    The dictionary's keys are the 'NAME' from the machine table and its values
-    are from the corresponding 'VALUE' field. No need to check for missing
-    columns since the file is validated by validate() function.
-
-    Args:
-        machine_csv_path (str): path to the input machine CSV file.
-
-    Returns:
-        machine_dict (dict): a dictionary of keys from the first column of the
-            CSV file and corresponding values from the `VALUE` column.
-
-    """
-    machine_dict = {}
-    # make columns and indexes lowercased and strip whitespace
-    machine_data = utils.read_csv_to_dataframe(
-        machine_csv_path,
-        {
-            'index_col': 'name',
-            'columns': {
-                'name': {'type': 'freestyle_string'},
-                'value': {'type': 'number'}
-        }})
-
-    # drop NaN indexed rows in dataframe
-    machine_data = machine_data[machine_data.index.notnull()]
-    LOGGER.debug('machine_data dataframe from %s: %s' %
-                 (machine_csv_path, machine_data))
-    machine_dict = machine_data.to_dict('index')
-    for key in machine_dict.keys():
-        machine_dict[key] = machine_dict[key]['value']
-    return machine_dict
-
-
 def _get_vector_spatial_ref(base_vector_path):
     """Get the spatial reference of an OGR vector (datasource).
 
diff --git a/src/natcap/invest/wind_energy.py b/src/natcap/invest/wind_energy.py
index bdb33ede6..f7fa4cc73 100644
--- a/src/natcap/invest/wind_energy.py
+++ b/src/natcap/invest/wind_energy.py
@@ -7,7 +7,6 @@
 import tempfile
 
 import numpy
-import pandas
 from scipy import integrate
 
 import shapely.wkb
@@ -707,18 +706,21 @@ def execute(args):
     ]
 
     # Read the biophysical turbine parameters into a dictionary
-    bio_turbine_dict = _read_csv_wind_parameters(
-        args['turbine_parameters_path'], biophysical_params)
-
+    turbine_dict = validation.get_validated_dataframe(
+        args['turbine_parameters_path'],
+        **MODEL_SPEC['args']['turbine_parameters_path']
+    ).iloc[0].to_dict()
     # Read the biophysical global parameters into a dictionary
-    bio_global_params_dict = _read_csv_wind_parameters(
-        args['global_wind_parameters_path'], biophysical_params)
+    global_params_dict = validation.get_validated_dataframe(
+        args['global_wind_parameters_path'],
+        **MODEL_SPEC['args']['global_wind_parameters_path']
+    ).iloc[0].to_dict()
 
     # Combine the turbine and global parameters into one dictionary
-    bio_parameters_dict = bio_global_params_dict.copy()
-    bio_parameters_dict.update(bio_turbine_dict)
+    parameters_dict = global_params_dict.copy()
+    parameters_dict.update(turbine_dict)
 
-    LOGGER.debug('Biophysical Turbine Parameters: %s', bio_parameters_dict)
+    LOGGER.debug('Biophysical Turbine Parameters: %s', parameters_dict)
 
     if ('valuation_container' not in args or
             args['valuation_container'] is False):
@@ -727,33 +729,11 @@ def execute(args):
         LOGGER.info(
             'Valuation Selected. Checking required parameters from CSV files.')
 
-        # Create a list of the valuation parameters we are looking for from the
-        # input files
-        valuation_turbine_params = ['turbine_cost', 'turbine_rated_pwr']
-        # Read the biophysical turbine parameters into a dictionary
-        val_turbine_dict = _read_csv_wind_parameters(
-            args['turbine_parameters_path'], valuation_turbine_params)
-
-        valuation_global_params = [
-            'carbon_coefficient', 'time_period', 'infield_cable_cost',
-            'infield_cable_length', 'installation_cost',
-            'miscellaneous_capex_cost', 'operation_maintenance_cost',
-            'decommission_cost', 'ac_dc_distance_break', 'mw_coef_ac',
-            'mw_coef_dc', 'cable_coef_ac', 'cable_coef_dc'
-        ]
-        # Read the biophysical global parameters into a dictionary
-        val_global_param_dict = _read_csv_wind_parameters(
-            args['global_wind_parameters_path'], valuation_global_params)
-
-        # Combine the turbine and global parameters into one dictionary
-        val_parameters_dict = val_global_param_dict.copy()
-        val_parameters_dict.update(val_turbine_dict)
-
         # If Price Table provided use that for price of energy, validate inputs
-        time = int(val_parameters_dict['time_period'])
+        time = parameters_dict['time_period']
         if args['price_table']:
-            wind_price_df = utils.read_csv_to_dataframe(
-                args['wind_schedule'], MODEL_SPEC['args']['wind_schedule']
+            wind_price_df = validation.get_validated_dataframe(
+                args['wind_schedule'], **MODEL_SPEC['args']['wind_schedule']
             ).sort_index()  # sort by year
 
             year_count = len(wind_price_df)
@@ -773,17 +753,26 @@ def execute(args):
             # are the time steps for the lifespan of the farm and values
             # are adjusted based on the rate of change
             price_list = []
-            for time_step in range(time + 1):
+            for time_step in range(int(time) + 1):
                 price_list.append(wind_price * (1 + change_rate)**(time_step))
 
     # Hub Height to use for setting Weibull parameters
-    hub_height = int(bio_parameters_dict['hub_height'])
+    hub_height = parameters_dict['hub_height']
 
     LOGGER.debug('hub_height : %s', hub_height)
 
     # Read the wind energy data into a dictionary
     LOGGER.info('Reading in Wind Data into a dictionary')
-    wind_data = _read_csv_wind_data(args['wind_data_path'], hub_height)
+    wind_point_df = validation.get_validated_dataframe(
+        args['wind_data_path'], **MODEL_SPEC['args']['wind_data_path'])
+    wind_point_df.columns = wind_point_df.columns.str.upper()
+    # Calculate scale value at new hub height given reference values.
+    # See equation 3 in users guide
+    wind_point_df.rename(columns={'LAM': 'REF_LAM'}, inplace=True)
+    wind_point_df['LAM'] = wind_point_df.apply(
+        lambda row: row.REF_LAM * (hub_height / row.REF)**_ALPHA, axis=1)
+    wind_point_df.drop(['REF'], axis=1)  # REF is not needed after calculation
+    wind_data = wind_point_df.to_dict('index')  # so keys will be 0, 1, 2, ...
 
     # Compute Wind Density and Harvested Wind Energy, adding the values to the
     # points to the dictionary, and pickle the dictionary
@@ -791,7 +780,7 @@ def execute(args):
         inter_dir, 'wind_data%s.pickle' % suffix)
     compute_density_harvested_task = task_graph.add_task(
         func=_compute_density_harvested_fields,
-        args=(wind_data, bio_parameters_dict, number_of_turbines,
+        args=(wind_data, parameters_dict, number_of_turbines,
               wind_data_pickle_path),
         target_path_list=[wind_data_pickle_path],
         task_name='compute_density_harvested_fields')
@@ -1132,8 +1121,8 @@ def execute(args):
         LOGGER.info('Grid Points Provided. Reading in the grid points')
 
         # Read the grid points csv, and convert it to land and grid dictionary
-        grid_land_df = utils.read_csv_to_dataframe(
-            args['grid_points_path'], MODEL_SPEC['args']['grid_points_path'])
+        grid_land_df = validation.get_validated_dataframe(
+            args['grid_points_path'], **MODEL_SPEC['args']['grid_points_path'])
 
         # Convert the dataframes to dictionaries, using 'ID' (the index) as key
         grid_dict = grid_land_df[grid_land_df['type'] == 'grid'].to_dict('index')
@@ -1306,7 +1295,7 @@ def execute(args):
     task_graph.add_task(
         func=_calculate_npv_levelized_rasters,
         args=(harvested_masked_path, final_dist_raster_path, npv_raster_path,
-              levelized_raster_path, val_parameters_dict, args, price_list),
+              levelized_raster_path, parameters_dict, args, price_list),
         target_path_list=[npv_raster_path, levelized_raster_path],
         task_name='calculate_npv_levelized_rasters',
         dependent_task_list=[final_dist_task])
@@ -1316,7 +1305,7 @@ def execute(args):
 
     # The amount of CO2 not released into the atmosphere, with the constant
     # conversion factor provided in the users guide by Rob Griffin
-    carbon_coef = float(val_parameters_dict['carbon_coefficient'])
+    carbon_coef = parameters_dict['carbon_coefficient']
 
     task_graph.add_task(
         func=pygeoprocessing.raster_calculator,
@@ -1335,7 +1324,7 @@ def execute(args):
 def _calculate_npv_levelized_rasters(
         base_harvested_raster_path, base_dist_raster_path,
         target_npv_raster_path, target_levelized_raster_path,
-        val_parameters_dict, args, price_list):
+        parameters_dict, args, price_list):
     """Calculate NPV and levelized rasters from harvested and dist rasters.
 
     Args:
@@ -1352,7 +1341,7 @@ def _calculate_npv_levelized_rasters(
             store the unit price of energy that would be required to set the
             present value of the farm centered at each pixel equal to zero.
 
-        val_parameters_dict (dict): a dictionary of the turbine and biophysical
+        parameters_dict (dict): a dictionary of the turbine and biophysical
             global parameters.
 
         args (dict): a dictionary that contains information on
@@ -1383,35 +1372,35 @@ def _calculate_npv_levelized_rasters(
         target_levelized_raster_path, gdal.OF_RASTER | gdal.GA_Update)
     levelized_band = levelized_raster.GetRasterBand(1)
 
-    # Get constants from val_parameters_dict to make it more readable
+    # Get constants from parameters_dict to make it more readable
     # The length of infield cable in km
-    infield_length = float(val_parameters_dict['infield_cable_length'])
+    infield_length = parameters_dict['infield_cable_length']
     # The cost of infield cable in currency units per km
-    infield_cost = float(val_parameters_dict['infield_cable_cost'])
+    infield_cost = parameters_dict['infield_cable_cost']
     # The cost of the foundation in currency units
-    foundation_cost = float(args['foundation_cost'])
+    foundation_cost = args['foundation_cost']
     # The cost of each turbine unit in currency units
-    unit_cost = float(val_parameters_dict['turbine_cost'])
+    unit_cost = parameters_dict['turbine_cost']
     # The installation cost as a decimal
-    install_cost = float(val_parameters_dict['installation_cost'])
+    install_cost = parameters_dict['installation_cost']
     # The miscellaneous costs as a decimal factor of capex_arr
-    misc_capex_cost = float(val_parameters_dict['miscellaneous_capex_cost'])
+    misc_capex_cost = parameters_dict['miscellaneous_capex_cost']
     # The operations and maintenance costs as a decimal factor of capex_arr
-    op_maint_cost = float(val_parameters_dict['operation_maintenance_cost'])
+    op_maint_cost = parameters_dict['operation_maintenance_cost']
     # The discount rate as a decimal
-    discount_rate = float(args['discount_rate'])
+    discount_rate = args['discount_rate']
     # The cost to decommission the farm as a decimal factor of capex_arr
-    decom = float(val_parameters_dict['decommission_cost'])
+    decom = parameters_dict['decommission_cost']
     # The mega watt value for the turbines in MW
-    mega_watt = float(val_parameters_dict['turbine_rated_pwr'])
+    mega_watt = parameters_dict['turbine_rated_pwr']
     # The distance at which AC switches over to DC power
-    circuit_break = float(val_parameters_dict['ac_dc_distance_break'])
+    circuit_break = parameters_dict['ac_dc_distance_break']
     # The coefficients for the AC/DC megawatt and cable cost from the CAP
     # function
-    mw_coef_ac = float(val_parameters_dict['mw_coef_ac'])
-    mw_coef_dc = float(val_parameters_dict['mw_coef_dc'])
-    cable_coef_ac = float(val_parameters_dict['cable_coef_ac'])
-    cable_coef_dc = float(val_parameters_dict['cable_coef_dc'])
+    mw_coef_ac = parameters_dict['mw_coef_ac']
+    mw_coef_dc = parameters_dict['mw_coef_dc']
+    cable_coef_ac = parameters_dict['cable_coef_ac']
+    cable_coef_dc = parameters_dict['cable_coef_dc']
 
     # The total mega watt capacity of the wind farm where mega watt is the
     # turbines rated power
@@ -1437,7 +1426,7 @@ def _calculate_npv_levelized_rasters(
 
     # Discount constant raised to the total time, a constant found in the NPV
     # calculation (1+i)^T
-    disc_time = disc_const**int(val_parameters_dict['time_period'])
+    disc_time = disc_const**parameters_dict['time_period']
     LOGGER.debug('disc_time : %s', disc_time)
 
     for (harvest_block_info, harvest_block_data), (_, dist_block_data) in zip(
@@ -1502,7 +1491,7 @@ def _calculate_npv_levelized_rasters(
         # the wind farm. Starting at year 1, because year 0 yields no revenue
         for year in range(1, len(price_list)):
             # currency units per kilowatt-hour of that year
-            currency_per_kwh = float(price_list[year])
+            currency_per_kwh = price_list[year]
 
             # The revenue for the wind farm. The energy_val_arr is in kWh/yr
             rev_arr = energy_val_arr * currency_per_kwh
@@ -1824,36 +1813,6 @@ def _calculate_land_to_grid_distance(
     LOGGER.info('Finished _calculate_land_to_grid_distance.')
 
 
-def _read_csv_wind_parameters(csv_path, parameter_list):
-    """Construct a dictionary from a csv file given a list of keys.
-
-    The list of keys corresponds to the parameters names in 'csv_path' which
-    are represented in the first column of the file.
-
-    Args:
-        csv_path (str): a path to a CSV file where every row is a parameter
-            with the parameter name in the first column followed by the value
-            in the second column
-        parameter_list (list) : a List of strs that represent the parameter
-            names to be found in 'csv_path'. These strs will be the keys in
-            the returned dictionary
-
-    Returns: a Dictionary where the 'parameter_list' strs are the
-            keys that have values pulled from 'csv_path'
-
-    """
-    # use the parameters in the first column as indices for the dataframe
-    # this doesn't benefit from `utils.read_csv_to_dataframe` because there
-    # is no header to strip whitespace
-    # use sep=None, engine='python' to infer what the separator is
-    wind_param_df = pandas.read_csv(csv_path, header=None, index_col=0)
-    # only get the required parameters and leave out the rest
-    wind_param_df = wind_param_df[wind_param_df.index.isin(parameter_list)]
-    wind_dict = wind_param_df.to_dict()[1]
-
-    return wind_dict
-
-
 def _mask_by_distance(base_raster_path, min_dist, max_dist, out_nodata,
                       target_raster_path):
     """Create a raster whose pixel values are bound by min and max distances.
@@ -1950,37 +1909,8 @@ def _create_distance_raster(base_raster_path, base_vector_path,
     LOGGER.info("Finished _create_distance_raster")
 
 
-def _read_csv_wind_data(wind_data_path, hub_height):
-    """Unpack the csv wind data into a dictionary.
-
-    Args:
-        wind_data_path (str): a path for the csv wind data file with header
-            of: "LONG","LATI","LAM","K","REF"
-        hub_height (int): the hub height to use for calculating Weibull
-            parameters and wind energy values
-
-    Returns:
-        A dictionary where the keys are lat/long tuples which point
-            to dictionaries that hold wind data at that location.
-
-    """
-    wind_point_df = utils.read_csv_to_dataframe(
-        wind_data_path, MODEL_SPEC['args']['wind_data_path'])
-    wind_point_df.columns = wind_point_df.columns.str.upper()
-
-    # Calculate scale value at new hub height given reference values.
-    # See equation 3 in users guide
-    wind_point_df.rename(columns={'LAM': 'REF_LAM'}, inplace=True)
-    wind_point_df['LAM'] = wind_point_df.apply(
-        lambda row: row.REF_LAM * (hub_height / row.REF)**_ALPHA, axis=1)
-    wind_point_df.drop(['REF'], axis=1)  # REF is not needed after calculation
-    wind_dict = wind_point_df.to_dict('index')  # so keys will be 0, 1, 2, ...
-
-    return wind_dict
-
-
 def _compute_density_harvested_fields(
-        wind_dict, bio_parameters_dict, number_of_turbines,
+        wind_dict, parameters_dict, number_of_turbines,
         target_pickle_path):
     """Compute the density and harvested energy based on scale and shape keys.
 
@@ -1989,7 +1919,7 @@ def _compute_density_harvested_fields(
             keys ``LAM``, ``LATI``, ``K``, ``LONG``, ``REF_LAM``, and ``REF``,
             and numbers indicating their corresponding values.
 
-        bio_parameters_dict (dict): a dictionary where the 'parameter_list'
+        parameters_dict (dict): a dictionary where the 'parameter_list'
             strings are the keys that have values pulled from bio-parameters
             CSV.
 
@@ -2009,20 +1939,20 @@ def _compute_density_harvested_fields(
     # The rated power is expressed in units of MW but the harvested energy
     # equation calls for it in terms of Wh. Thus we multiply by a million to
     # get to Wh.
-    rated_power = float(bio_parameters_dict['turbine_rated_pwr']) * 1000000
+    rated_power = parameters_dict['turbine_rated_pwr'] * 1000000
 
     # Get the rest of the inputs needed to compute harvested wind energy
     # from the dictionary so that it is in a more readable format
-    exp_pwr_curve = int(bio_parameters_dict['exponent_power_curve'])
-    air_density_standard = float(bio_parameters_dict['air_density'])
-    v_rate = float(bio_parameters_dict['rated_wspd'])
-    v_out = float(bio_parameters_dict['cut_out_wspd'])
-    v_in = float(bio_parameters_dict['cut_in_wspd'])
-    air_density_coef = float(bio_parameters_dict['air_density_coefficient'])
-    losses = float(bio_parameters_dict['loss_parameter'])
+    exp_pwr_curve = parameters_dict['exponent_power_curve']
+    air_density_standard = parameters_dict['air_density']
+    v_rate = parameters_dict['rated_wspd']
+    v_out = parameters_dict['cut_out_wspd']
+    v_in = parameters_dict['cut_in_wspd']
+    air_density_coef = parameters_dict['air_density_coefficient']
+    losses = parameters_dict['loss_parameter']
 
     # Hub Height to use for setting Weibull parameters
-    hub_height = int(bio_parameters_dict['hub_height'])
+    hub_height = parameters_dict['hub_height']
 
     # Compute the mean air density, given by CKs formulas
     mean_air_density = air_density_standard - air_density_coef * hub_height
@@ -2202,8 +2132,8 @@ def _dictionary_to_point_vector(
     # For each inner dictionary (for each point) create a point and set its
     # fields
     for point_dict in base_dict_data.values():
-        latitude = float(point_dict['lati'])
-        longitude = float(point_dict['long'])
+        latitude = point_dict['lati']
+        longitude = point_dict['long']
 
         geom = ogr.Geometry(ogr.wkbPoint)
         geom.AddPoint_2D(longitude, latitude)
@@ -2469,8 +2399,8 @@ def _wind_data_to_point_vector(wind_data_pickle_path,
     # For each inner dictionary (for each point) create a point
     for point_dict in wind_data.values():
         geom = ogr.Geometry(ogr.wkbPoint)
-        latitude = float(point_dict['LATI'])
-        longitude = float(point_dict['LONG'])
+        latitude = point_dict['LATI']
+        longitude = point_dict['LONG']
         # When projecting to WGS84, extents -180 to 180 are used for
         # longitude. In case input longitude is from -360 to 0 convert
         if longitude < -180:
@@ -2770,17 +2700,23 @@ def validate(args, limit_to=None):
     """
     validation_warnings = validation.validate(args, MODEL_SPEC['args'],
                                MODEL_SPEC['args_with_spatial_overlap'])
-
     invalid_keys = validation.get_invalid_keys(validation_warnings)
     sufficient_keys = validation.get_sufficient_keys(args)
     valid_sufficient_keys = sufficient_keys - invalid_keys
 
     if ('wind_schedule' in valid_sufficient_keys and
             'global_wind_parameters_path' in valid_sufficient_keys):
-        year_count = pandas.read_csv(args['wind_schedule']).shape[0]
-        time = int(_read_csv_wind_parameters(
-            args['global_wind_parameters_path'], ['time_period']
-        )['time_period'])
+        year_count = utils.read_csv_to_dataframe(
+            args['wind_schedule']).shape[0]
+        time = validation.get_validated_dataframe(
+            args['global_wind_parameters_path'],
+            index_col='0',
+            columns={
+                '0': {'type': 'freestyle_string'},
+                '1': {'type': 'number'}
+            },
+            read_csv_kwargs={'header': None}
+        )['1']['time_period']
         if year_count != time + 1:
             validation_warnings.append((
                 ['wind_schedule'],
diff --git a/tests/test_coastal_blue_carbon.py b/tests/test_coastal_blue_carbon.py
index 3bec14303..5af17b57e 100644
--- a/tests/test_coastal_blue_carbon.py
+++ b/tests/test_coastal_blue_carbon.py
@@ -13,6 +13,7 @@
 import pandas
 import pygeoprocessing
 from natcap.invest import utils
+from natcap.invest import validation
 from osgeo import gdal
 from osgeo import osr
 
@@ -188,9 +189,9 @@ def test_transition_table(self):
             lulc_csv.write('0,mangrove,True\n')
             lulc_csv.write('1,parking lot,False\n')
 
-        landcover_df = utils.read_csv_to_dataframe(
+        landcover_df = validation.get_validated_dataframe(
             landcover_table_path,
-            preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path'])
+            **preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path'])
         target_table_path = os.path.join(self.workspace_dir,
                                          'transition_table.csv')
 
@@ -204,9 +205,9 @@ def test_transition_table(self):
                       str(context.exception))
 
         # Re-load the landcover table
-        landcover_df = utils.read_csv_to_dataframe(
+        landcover_df = validation.get_validated_dataframe(
             landcover_table_path,
-            preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path'])
+            **preprocessor.MODEL_SPEC['args']['lulc_lookup_table_path'])
         preprocessor._create_transition_table(
             landcover_df, [filename_a, filename_b], target_table_path)
 
@@ -618,9 +619,9 @@ def test_model_one_transition_no_analysis_year(self):
         args = TestCBC2._create_model_args(self.workspace_dir)
         args['workspace_dir'] = os.path.join(self.workspace_dir, 'workspace')
 
-        prior_snapshots = utils.read_csv_to_dataframe(
+        prior_snapshots = validation.get_validated_dataframe(
             args['landcover_snapshot_csv'],
-            coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
+            **coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
         )['raster_path'].to_dict()
         baseline_year = min(prior_snapshots.keys())
         baseline_raster = prior_snapshots[baseline_year]
@@ -796,9 +797,9 @@ def test_model_no_transitions(self):
         args = TestCBC2._create_model_args(self.workspace_dir)
         args['workspace_dir'] = os.path.join(self.workspace_dir, 'workspace')
 
-        prior_snapshots = utils.read_csv_to_dataframe(
+        prior_snapshots = validation.get_validated_dataframe(
             args['landcover_snapshot_csv'],
-            coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
+            **coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
         )['raster_path'].to_dict()
         baseline_year = min(prior_snapshots.keys())
         baseline_raster = prior_snapshots[baseline_year]
@@ -862,9 +863,9 @@ def test_validation(self):
             raster.write('not a raster')
 
         # Write over the landcover snapshot CSV
-        prior_snapshots = utils.read_csv_to_dataframe(
+        prior_snapshots = validation.get_validated_dataframe(
             args['landcover_snapshot_csv'],
-            coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
+            **coastal_blue_carbon.MODEL_SPEC['args']['landcover_snapshot_csv']
         )['raster_path'].to_dict()
         baseline_year = min(prior_snapshots)
         with open(args['landcover_snapshot_csv'], 'w') as snapshot_table:
diff --git a/tests/test_datastack.py b/tests/test_datastack.py
index 33a01de2f..45d2e590b 100644
--- a/tests/test_datastack.py
+++ b/tests/test_datastack.py
@@ -303,6 +303,7 @@ def test_archive_extraction(self):
         """Datastack: test archive extraction."""
         from natcap.invest import datastack
         from natcap.invest import utils
+        from natcap.invest import validation
 
         params = {
             'blank': '',
@@ -379,14 +380,12 @@ def test_archive_extraction(self):
             self.assertTrue(
                 filecmp.cmp(archive_params[key], params[key], shallow=False))
 
-        spatial_csv_dict = utils.read_csv_to_dataframe(
+        spatial_csv_dict = validation.get_validated_dataframe(
             archive_params['spatial_table'],
-            {
-                'index_col': 'id',
-                'columns': {
-                    'id': {'type': 'integer'},
-                    'path': {'type': 'file'}
-                }
+            index_col='id',
+            columns={
+                'id': {'type': 'integer'},
+                'path': {'type': 'file'}
             }).to_dict(orient='index')
         spatial_csv_dir = os.path.dirname(archive_params['spatial_table'])
         numpy.testing.assert_allclose(
diff --git a/tests/test_habitat_quality.py b/tests/test_habitat_quality.py
index 560793d2c..0e2bbebe3 100644
--- a/tests/test_habitat_quality.py
+++ b/tests/test_habitat_quality.py
@@ -1317,7 +1317,7 @@ def test_habitat_quality_validation_missing_sens_header(self):
         args['sensitivity_table_path'] = os.path.join(
             args['workspace_dir'], 'sensitivity_samp.csv')
         make_sensitivity_samp_csv(
-            args['sensitivity_table_path'], include_threat=False)
+            args['sensitivity_table_path'], include_threat=True)
 
         make_threats_raster(
             args['workspace_dir'], threat_values=[1, 1],
@@ -1333,7 +1333,7 @@ def test_habitat_quality_validation_missing_sens_header(self):
             open_table.write(
                 '0.04,0.7,threat_1,linear,,threat_1_c.tif,threat_1_f.tif\n')
             open_table.write(
-                '0.07,1.0,threat_2,exponential,,threat_2_c.tif,'
+                '0.07,1.0,threat_3,exponential,,threat_2_c.tif,'
                 'threat_2_f.tif\n')
 
         # At least one threat header is expected, so there should be a message
diff --git a/tests/test_recreation.py b/tests/test_recreation.py
index 19ea26595..21771479d 100644
--- a/tests/test_recreation.py
+++ b/tests/test_recreation.py
@@ -954,6 +954,7 @@ def test_existing_output_shapefiles(self):
     def test_existing_regression_coef(self):
         """Recreation test regression coefficients handle existing output."""
         from natcap.invest.recreation import recmodel_client
+        from natcap.invest import validation
 
         # Initialize a TaskGraph
         taskgraph_db_dir = os.path.join(
@@ -971,9 +972,9 @@ def test_existing_regression_coef(self):
         predictor_table_path = os.path.join(SAMPLE_DATA, 'predictors.csv')
 
         # make outputs to be overwritten
-        predictor_dict = utils.read_csv_to_dataframe(
+        predictor_dict = validation.get_validated_dataframe(
             predictor_table_path,
-            recmodel_client.MODEL_SPEC['args']['predictor_table_path']
+            **recmodel_client.MODEL_SPEC['args']['predictor_table_path']
         ).to_dict(orient='index')
         predictor_list = predictor_dict.keys()
         tmp_working_dir = tempfile.mkdtemp(dir=self.workspace_dir)
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 74f69821b..3006b44fa 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -527,199 +527,7 @@ def tearDown(self):
         shutil.rmtree(self.workspace_dir)
 
     def test_read_csv_to_dataframe(self):
-        """utils: test the default behavior"""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                header,
-                a,
-                b
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file,
-            {'columns': {'header': {'type': 'freestyle_string'}}})
-        # header and table values should be lowercased
-        self.assertEqual(df.columns[0], 'header')
-        self.assertEqual(df['header'][0], 'a')
-        self.assertEqual(df['header'][1], 'b')
-
-    def test_unique_key_not_first_column(self):
-        """utils: test success when key field is not first column."""
-        from natcap.invest import utils
-        csv_text = ("desc,lucode,val1,val2\n"
-                    "corn,1,0.5,2\n"
-                    "bread,2,1,4\n"
-                    "beans,3,0.5,4\n"
-                    "butter,4,9,1")
-        table_path = os.path.join(self.workspace_dir, 'table.csv')
-        with open(table_path, 'w') as table_file:
-            table_file.write(csv_text)
-
-        df = utils.read_csv_to_dataframe(
-            table_path,
-            {
-                'index_col': 'lucode',
-                'columns': {
-                    'desc': {'type': 'freestyle_string'},
-                    'lucode': {'type': 'integer'},
-                    'val1': {'type': 'number'},
-                    'val2': {'type': 'number'}
-            }})
-        self.assertEqual(df.index.name, 'lucode')
-        self.assertEqual(list(df.index.values), [1, 2, 3, 4])
-        self.assertEqual(df['desc'][2], 'bread')
-
-    def test_non_unique_keys(self):
-        """utils: test error is raised if keys are not unique."""
-        from natcap.invest import utils
-        csv_text = ("lucode,desc,val1,val2\n"
-                    "1,corn,0.5,2\n"
-                    "2,bread,1,4\n"
-                    "2,beans,0.5,4\n"
-                    "4,butter,9,1")
-        table_path = os.path.join(self.workspace_dir, 'table.csv')
-        with open(table_path, 'w') as table_file:
-            table_file.write(csv_text)
-
-        with self.assertRaises(ValueError):
-            utils.read_csv_to_dataframe(
-                table_path,
-                {
-                    'index_col': 'lucode',
-                    'columns': {
-                        'desc': {'type': 'freestyle_string'},
-                        'lucode': {'type': 'integer'},
-                        'val1': {'type': 'number'},
-                        'val2': {'type': 'number'}
-                }})
-
-    def test_missing_key_field(self):
-        """utils: test error is raised when missing key field."""
-        from natcap.invest import utils
-        csv_text = ("luode,desc,val1,val2\n"
-                    "1,corn,0.5,2\n"
-                    "2,bread,1,4\n"
-                    "3,beans,0.5,4\n"
-                    "4,butter,9,1")
-        table_path = os.path.join(self.workspace_dir, 'table.csv')
-        with open(table_path, 'w') as table_file:
-            table_file.write(csv_text)
-
-        with self.assertRaises(KeyError):
-            utils.read_csv_to_dataframe(
-                table_path,
-                {
-                    'index_col': 'lucode',
-                    'columns': {
-                        'desc': {'type': 'freestyle_string'},
-                        'lucode': {'type': 'integer'},
-                        'val1': {'type': 'number'},
-                        'val2': {'type': 'number'}
-                }})
-
-    def test_nan_row(self):
-        """utils: test NaN row is dropped."""
-        from natcap.invest import utils
-        csv_text = ("lucode,desc,val1,val2\n"
-                    "1,corn,0.5,2\n"
-                    ",,,\n"
-                    "3,beans,0.5,4\n"
-                    "4,butter,9,1")
-        table_path = os.path.join(self.workspace_dir, 'table.csv')
-        with open(table_path, 'w') as table_file:
-            table_file.write(csv_text)
-
-        result = utils.read_csv_to_dataframe(
-            table_path,
-            {
-                'index_col': 'lucode',
-                'columns': {
-                    'desc': {'type': 'freestyle_string'},
-                    'lucode': {'type': 'integer'},
-                    'val1': {'type': 'number'},
-                    'val2': {'type': 'number'}
-            }}).to_dict(orient='index')
-        expected_result = {
-            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2},
-            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4},
-            4: {'desc': 'butter', 'val1': 9, 'val2': 1}}
-
-        self.assertDictEqual(result, expected_result)
-
-    def test_column_subset(self):
-        """utils: test column subset is properly returned."""
-        from natcap.invest import utils
-        table_path = os.path.join(self.workspace_dir, 'table.csv')
-        with open(table_path, 'w') as table_file:
-            table_file.write(
-                "lucode,desc,val1,val2\n"
-                "1,corn,0.5,2\n"
-                "2,bread,1,4\n"
-                "3,beans,0.5,4\n"
-                "4,butter,9,1")
-        df = utils.read_csv_to_dataframe(
-            table_path,
-            {
-                'columns': {
-                    'lucode': {'type': 'integer'},
-                    'val1': {'type': 'number'},
-                    'val2': {'type': 'number'}
-            }
-        })
-        self.assertEqual(list(df.columns), ['lucode', 'val1', 'val2'])
-
-    def test_column_pattern_matching(self):
-        """utils: test column subset is properly returned."""
-        from natcap.invest import utils
-        table_path = os.path.join(self.workspace_dir, 'table.csv')
-        with open(table_path, 'w') as table_file:
-            table_file.write(
-                "lucode,grassland_value,forest_value,wetland_valueee\n"
-                "1,0.5,2\n"
-                "2,1,4\n"
-                "3,0.5,4\n"
-                "4,9,1")
-        df = utils.read_csv_to_dataframe(
-            table_path, {
-                'columns': {
-                    'lucode': {'type': 'integer'},
-                    '[HABITAT]_value': {'type': 'number'}
-            }
-        })
-        self.assertEqual(
-            list(df.columns), ['lucode', 'grassland_value', 'forest_value'])
-
-    def test_trailing_comma(self):
-        """utils: test a trailing comma on first line is handled properly."""
-        from natcap.invest import utils
-        table_path = os.path.join(self.workspace_dir, 'table.csv')
-        with open(table_path, 'w') as table_file:
-            table_file.write(
-                "lucode,desc,val1,val2\n"
-                "1,corn,0.5,2,\n"
-                "2,bread,1,4\n"
-                "3,beans,0.5,4\n"
-                "4,butter,9,1")
-        result = utils.read_csv_to_dataframe(
-            table_path,
-            {
-                'columns': {
-                    'desc': {'type': 'freestyle_string'},
-                    'lucode': {'type': 'integer'},
-                    'val1': {'type': 'number'},
-                    'val2': {'type': 'number'}
-            }})
-        self.assertEqual(result['val2'][0], 2)
-        self.assertEqual(result['lucode'][1], 2)
-
-
-    def test_trailing_comma_second_line(self):
-        """utils: test a trailing comma on second line is handled properly."""
+        """utils: read csv with no row or column specs provided"""
         from natcap.invest import utils
         csv_text = ("lucode,desc,val1,val2\n"
                     "1,corn,0.5,2\n"
@@ -730,216 +538,12 @@ def test_trailing_comma_second_line(self):
         with open(table_path, 'w') as table_file:
             table_file.write(csv_text)
 
-        result = utils.read_csv_to_dataframe(
-            table_path,
-            {
-                'index_col': 'lucode',
-                'columns': {
-                    'desc': {'type': 'freestyle_string'},
-                    'lucode': {'type': 'integer'},
-                    'val1': {'type': 'number'},
-                    'val2': {'type': 'number'}
-            }}).to_dict(orient='index')
-
-        expected_result = {
-            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2},
-            2: {'desc': 'bread', 'val1': 1, 'val2': 4},
-            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4},
-            4: {'desc': 'butter', 'val1': 9, 'val2': 1}}
-
-        self.assertDictEqual(result, expected_result)
-
-    def test_csv_dialect_detection_semicolon_delimited(self):
-        """utils: test that we can parse semicolon-delimited CSVs."""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                header1;HEADER2;header3;
-                1;2;3;
-                4;FOO;bar;
-                """
-            ))
-
-        df = utils.read_csv_to_dataframe(
-            csv_file,
-            {'columns': {
-                'header1': {'type': 'integer'},
-                'header2': {'type': 'freestyle_string'},
-                'header3': {'type': 'freestyle_string'}
-            }
-        })
-        self.assertEqual(df['header2'][1], 'foo')
-        self.assertEqual(df['header3'][1], 'bar')
-        self.assertEqual(df['header1'][0], 1)
-
-    def test_convert_cols_to_lower(self):
-        """utils: test that column names are converted to lowercase"""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                header,
-                A,
-                b
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, {'columns': {
-                'header': {'type': 'freestyle_string'}
-            }})
-        self.assertEqual(df['header'][0], 'a')
-
-    def test_convert_vals_to_lower(self):
-        """utils: test that values are converted to lowercase"""
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                HEADER,
-                a,
-                b
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, {'columns': {
-                'header': {'type': 'freestyle_string'}
-            }})
-        self.assertEqual(df.columns[0], 'header')
-
-    def test_integer_type_columns(self):
-        """utils: integer column values are returned as integers."""
-        from natcap.invest import utils
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                id,header,
-                1,5.0,
-                2,-1,
-                3,
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, {'columns': {
-                'id': {'type': 'integer'},
-                'header': {'type': 'integer', 'na_allowed': True}}})
-        self.assertIsInstance(df['header'][0], numpy.int64)
-        self.assertIsInstance(df['header'][1], numpy.int64)
-        # empty values are returned as pandas.NA
-        self.assertTrue(pd.isna(df['header'][2]))
-
-    def test_float_type_columns(self):
-        """utils: float column values are returned as floats."""
-        from natcap.invest import utils
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                h1,h2,h3
-                5,0.5,.4
-                -1,-.3,
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, {'columns': {
-                'h1': {'type': 'number'},
-                'h2': {'type': 'ratio'},
-                'h3': {'type': 'percent', 'na_allowed': True},
-            }})
-        self.assertEqual(df['h1'].dtype, float)
-        self.assertEqual(df['h2'].dtype, float)
-        self.assertEqual(df['h3'].dtype, float)
-        # empty values are returned as numpy.nan
-        self.assertTrue(numpy.isnan(df['h3'][1]))
-
-    def test_string_type_columns(self):
-        """utils: string column values are returned as strings."""
-        from natcap.invest import utils
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                h1,h2,h3
-                1,a,foo
-                2,b,
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, {'columns': {
-                'h1': {'type': 'freestyle_string'},
-                'h2': {'type': 'option_string'},
-                'h3': {'type': 'freestyle_string'},
-            }})
-        self.assertEqual(df['h1'][0], '1')
-        self.assertEqual(df['h2'][1], 'b')
-        # empty values are returned as NA
-        self.assertTrue(pd.isna(df['h3'][1]))
-
-    def test_boolean_type_columns(self):
-        """utils: boolean column values are returned as booleans."""
-        from natcap.invest import utils
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                index,h1
-                a,1
-                b,0
-                c,
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, {'columns': {
-                'index': {'type': 'freestyle_string'},
-                'h1': {'type': 'bool', 'na_allowed': True}}})
-        self.assertEqual(df['h1'][0], True)
-        self.assertEqual(df['h1'][1], False)
-        # empty values are returned as pandas.NA
-        self.assertTrue(pd.isna(df['h1'][2]))
-
-    def test_expand_path_columns(self):
-        """utils: test values in path columns are expanded."""
-        from natcap.invest import utils
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                f"""\
-                bar,path
-                1,foo.txt
-                2,foo/bar.txt
-                3,foo\\bar.txt
-                4,{self.workspace_dir}/foo.txt
-                5,
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, {'columns': {
-                'bar': {'type': 'integer'},
-                'path': {'type': 'file'}
-            }})
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo.txt',
-            df['path'][0])
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt',
-            df['path'][1])
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo\\bar.txt',
-            df['path'][2])
-        self.assertEqual(
-            f'{self.workspace_dir}{os.sep}foo.txt',
-            df['path'][3])
-        # empty values are returned as empty strings
-        self.assertTrue(pd.isna(df['path'][4]))
+        df = utils.read_csv_to_dataframe(table_path)
+        self.assertEqual(list(df.columns), ['lucode', 'desc', 'val1', 'val2'])
+        self.assertEqual(df['lucode'][0], 1)
+        self.assertEqual(df['desc'][1], 'bread')
+        self.assertEqual(df['val1'][2], 0.5)
+        self.assertEqual(df['val2'][3], 1)
 
     def test_csv_utf8_encoding(self):
         """utils: test that CSV read correctly with UTF-8 encoding."""
@@ -955,16 +559,9 @@ def test_csv_utf8_encoding(self):
                 """
             ))
         lookup_dict = utils.read_csv_to_dataframe(
-            csv_file,
-            {
-                'index_col': 'header1',
-                'columns': {
-                    'header1': {'type': 'integer'},
-                    'header2': {'type': 'integer'},
-                    'header3': {'type': 'freestyle_string'}
-            }}).to_dict(orient='index')
-        self.assertEqual(lookup_dict[4]['header2'], 5)
-        self.assertEqual(lookup_dict[4]['header3'], 'foo')
+            csv_file).to_dict(orient='index')
+        self.assertEqual(lookup_dict[1]['header2'], 5)
+        self.assertEqual(lookup_dict[1]['header3'], 'FOO')
 
     def test_utf8_bom_encoding(self):
         """utils: test that CSV read correctly with UTF-8 BOM encoding."""
@@ -982,13 +579,7 @@ def test_utf8_bom_encoding(self):
         # confirm that the file has the BOM prefix
         with open(csv_file, 'rb') as file_obj:
             self.assertTrue(file_obj.read().startswith(codecs.BOM_UTF8))
-        df = utils.read_csv_to_dataframe(csv_file,
-            {
-                'columns': {
-                    'header1': {'type': 'integer'},
-                    'header2': {'type': 'integer'},
-                    'header3': {'type': 'freestyle_string'}
-            }})
+        df = utils.read_csv_to_dataframe(csv_file)
         # assert the BOM prefix was correctly parsed and skipped
         self.assertEqual(df.columns[0], 'header1')
         self.assertEqual(df['header2'][1], 5)
@@ -1005,15 +596,9 @@ def test_csv_latin_1_encoding(self):
                 4,5,FOO
                 """
             ))
-        df = utils.read_csv_to_dataframe(
-            csv_file,
-            {'columns': {
-                'header 1': {'type': 'integer'},
-                'header 2': {'type': 'integer'},
-                'header 3': {'type': 'freestyle_string'}
-        }})
+        df = utils.read_csv_to_dataframe(csv_file)
         self.assertEqual(df['header 2'][1], 5)
-        self.assertEqual(df['header 3'][1], 'foo')
+        self.assertEqual(df['header 3'][1], 'FOO')
         self.assertEqual(df['header 1'][0], 1)
 
     def test_csv_error_non_utf8_character(self):
@@ -1029,16 +614,8 @@ def test_csv_error_non_utf8_character(self):
                 4,5,FÖÖ
                 """
             ))
-        with self.assertRaises(UnicodeDecodeError):
-            utils.read_csv_to_dataframe(
-                csv_file,
-                {
-                    'index_col': 'header1',
-                    'columns': {
-                        'header1': {'type': 'integer'},
-                        'header2': {'type': 'integer'},
-                        'header3': {'type': 'freestyle_string'}
-                    }})
+        with self.assertRaises(ValueError):
+            utils.read_csv_to_dataframe(csv_file)
 
     def test_override_default_encoding(self):
         """utils: test that you can override the default encoding kwarg"""
@@ -1055,99 +632,31 @@ def test_override_default_encoding(self):
                 bar
                 """
             ))
-        df = utils.read_csv_to_dataframe(
-            csv_file, {
-                'columns': {'header': {'type': 'freestyle_string'}
-            }}, encoding='iso8859_5')
+        df = utils.read_csv_to_dataframe(csv_file, encoding='iso8859_5')
         # with the encoding specified, special characters should work
-        # and be lowercased
-        self.assertEqual(df['header'][0], 'fюю')
+        self.assertEqual(df['header'][0], 'fЮЮ')
         self.assertEqual(df['header'][1], 'bar')
 
-    def test_other_kwarg(self):
-        """utils: any other kwarg should be passed to pandas.read_csv"""
+    def test_csv_dialect_detection_semicolon_delimited(self):
+        """utils: test that we can parse semicolon-delimited CSVs."""
         from natcap.invest import utils
 
         csv_file = os.path.join(self.workspace_dir, 'csv.csv')
-
         with open(csv_file, 'w') as file_obj:
             file_obj.write(textwrap.dedent(
                 """\
-                h1;h2;h3
-                a;b;c
-                d;e;f
+                header1;HEADER2;header3;
+                1;2;3;
+                4;FOO;bar;
                 """
             ))
-        # using sep=None with the default engine='python',
-        # it should infer what the separator is
-        df = utils.read_csv_to_dataframe(
-            csv_file, {
-                'columns': {
-                    'h1': {'type': 'freestyle_string'},
-                    'h2': {'type': 'freestyle_string'},
-                    'h3': {'type': 'freestyle_string'}
-            }}, converters={'h2': lambda val: f'foo_{val}'})
-
-        self.assertEqual(df.columns[0], 'h1')
-        self.assertEqual(df['h2'][1], 'foo_e')
-
-    def test_csv_with_integer_headers(self):
-        """
-        utils: CSV with integer headers should be read into strings.
-
-        This shouldn't matter for any of the models, but if a user inputs a CSV
-        with extra columns that are labeled with numbers, it should still work.
-        """
-        from natcap.invest import utils
-
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
 
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(textwrap.dedent(
-                """\
-                1,2,3
-                a,b,c
-                d,e,f
-                """
-            ))
-        df = utils.read_csv_to_dataframe(
-            csv_file,
-            {'columns': {
-                '1': {'type': 'freestyle_string'},
-                '2': {'type': 'freestyle_string'},
-                '3': {'type': 'freestyle_string'}
-            }})
-        # expect headers to be strings
-        self.assertEqual(df.columns[0], '1')
-        self.assertEqual(df['1'][0], 'a')
-
-    def test_removal_whitespace(self):
-        """utils: test that leading/trailing whitespace is removed."""
-        from natcap.invest import utils
+        df = utils.read_csv_to_dataframe(csv_file)
+        self.assertEqual(df['header2'][1], 'FOO')
+        self.assertEqual(df['header3'][1], 'bar')
+        self.assertEqual(df['header1'][0], 1)
 
-        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
 
-        with open(csv_file, 'w') as file_obj:
-            file_obj.write(" Col1, Col2 ,Col3 \n")
-            file_obj.write(" val1, val2 ,val3 \n")
-            file_obj.write(" , 2 1 ,  ")
-        df = utils.read_csv_to_dataframe(
-            csv_file, {
-                'columns': {
-                    'col1': {'type': 'freestyle_string'},
-                    'col2': {'type': 'freestyle_string'},
-                    'col3': {'type': 'freestyle_string'}
-            }})
-        # header should have no leading / trailing whitespace
-        self.assertEqual(list(df.columns), ['col1', 'col2', 'col3'])
-
-        # values should have no leading / trailing whitespace
-        self.assertEqual(df['col1'][0], 'val1')
-        self.assertEqual(df['col2'][0], 'val2')
-        self.assertEqual(df['col3'][0], 'val3')
-        self.assertEqual(df['col1'][1], '')
-        self.assertEqual(df['col2'][1], '2 1')
-        self.assertEqual(df['col3'][1], '')
 
 
 class CreateCoordinateTransformationTests(unittest.TestCase):
diff --git a/tests/test_validation.py b/tests/test_validation.py
index 8f9e10acf..2a5e4fa81 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -1,15 +1,17 @@
 """Testing module for validation."""
-# encoding=UTF-8
-import tempfile
-import unittest
-from unittest.mock import Mock
+import codecs
 import functools
 import os
 import shutil
 import string
+import tempfile
+import textwrap
 import time
+import unittest
+from unittest.mock import Mock
 import warnings
 
+import numpy
 from osgeo import gdal, osr, ogr
 import pandas
 
@@ -717,7 +719,8 @@ def test_csv_fieldnames(self):
         df.to_csv(target_file)
 
         self.assertEqual(None, validation.check_csv(
-            target_file, columns={'foo': {}, 'bar': {}}))
+            target_file, columns={
+                'foo': {'type': 'integer'}, 'bar': {'type': 'integer'}}))
 
     def test_csv_bom_fieldnames(self):
         """Validation: test that we can check fieldnames in a CSV with BOM."""
@@ -732,7 +735,8 @@ def test_csv_bom_fieldnames(self):
         df.to_csv(target_file, encoding='utf-8-sig')
 
         self.assertEqual(None, validation.check_csv(
-            target_file, columns={'foo': {}, 'bar': {}}))
+            target_file, columns={
+                'foo': {'type': 'integer'}, 'bar': {'type': 'integer'}}))
 
     def test_csv_missing_fieldnames(self):
         """Validation: test that we can check missing fieldnames in a CSV."""
@@ -765,7 +769,7 @@ def test_wrong_filetype(self):
         df.to_pickle(target_file)
 
         error_msg = validation.check_csv(target_file, columns={'field_a': {}})
-        self.assertEqual(error_msg, validation.MESSAGES['NOT_CSV'])
+        self.assertIn('must be encoded as UTF-8', error_msg)
 
     def test_slow_to_open(self):
         """Test timeout by mocking a CSV that is slow to open"""
@@ -837,6 +841,508 @@ def test_check_headers(self):
         self.assertEqual(result, None)
 
 
+class TestGetValidatedDataframe(unittest.TestCase):
+    """Tests for validation.get_validated_dataframe."""
+    def setUp(self):
+        """Create a new workspace to use for each test."""
+        self.workspace_dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        """Remove the workspace created for this test."""
+        shutil.rmtree(self.workspace_dir)
+
+    def test_get_validated_dataframe(self):
+        """validation: test the default behavior"""
+        from natcap.invest import validation
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                header, ,
+                a, ,
+                b,c
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={'header': {'type': 'freestyle_string'}})
+        # header and table values should be lowercased
+        self.assertEqual(df.columns[0], 'header')
+        self.assertEqual(df['header'][0], 'a')
+        self.assertEqual(df['header'][1], 'b')
+
+    def test_unique_key_not_first_column(self):
+        """validation: test success when key field is not first column."""
+        from natcap.invest import validation
+        csv_text = ("desc,lucode,val1,val2\n"
+                    "corn,1,0.5,2\n"
+                    "bread,2,1,4\n"
+                    "beans,3,0.5,4\n"
+                    "butter,4,9,1")
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(csv_text)
+
+        df = validation.get_validated_dataframe(
+            table_path,
+            index_col='lucode',
+            columns={
+                    'desc': {'type': 'freestyle_string'},
+                    'lucode': {'type': 'integer'},
+                    'val1': {'type': 'number'},
+                    'val2': {'type': 'number'}
+            })
+        self.assertEqual(df.index.name, 'lucode')
+        self.assertEqual(list(df.index.values), [1, 2, 3, 4])
+        self.assertEqual(df['desc'][2], 'bread')
+
+    def test_non_unique_keys(self):
+        """validation: test error is raised if keys are not unique."""
+        from natcap.invest import validation
+        csv_text = ("lucode,desc,val1,val2\n"
+                    "1,corn,0.5,2\n"
+                    "2,bread,1,4\n"
+                    "2,beans,0.5,4\n"
+                    "4,butter,9,1")
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(csv_text)
+
+        with self.assertRaises(ValueError):
+            validation.get_validated_dataframe(
+                table_path,
+                index_col='lucode',
+                columns={
+                    'desc': {'type': 'freestyle_string'},
+                    'lucode': {'type': 'integer'},
+                    'val1': {'type': 'number'},
+                    'val2': {'type': 'number'}
+                })
+
+    def test_missing_key_field(self):
+        """validation: test error is raised when missing key field."""
+        from natcap.invest import validation
+        csv_text = ("luode,desc,val1,val2\n"
+                    "1,corn,0.5,2\n"
+                    "2,bread,1,4\n"
+                    "3,beans,0.5,4\n"
+                    "4,butter,9,1")
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(csv_text)
+
+        with self.assertRaises(ValueError):
+            validation.get_validated_dataframe(
+                table_path,
+                index_col='lucode',
+                columns={
+                    'desc': {'type': 'freestyle_string'},
+                    'lucode': {'type': 'integer'},
+                    'val1': {'type': 'number'},
+                    'val2': {'type': 'number'}
+                })
+
+    def test_column_subset(self):
+        """validation: test column subset is properly returned."""
+        from natcap.invest import validation
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(
+                "lucode,desc,val1,val2\n"
+                "1,corn,0.5,2\n"
+                "2,bread,1,4\n"
+                "3,beans,0.5,4\n"
+                "4,butter,9,1")
+        df = validation.get_validated_dataframe(
+            table_path,
+            columns={
+                'lucode': {'type': 'integer'},
+                'val1': {'type': 'number'},
+                'val2': {'type': 'number'}
+            })
+        self.assertEqual(list(df.columns), ['lucode', 'val1', 'val2'])
+
+    def test_column_pattern_matching(self):
+        """validation: test column subset is properly returned."""
+        from natcap.invest import validation
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(
+                "lucode,grassland_value,forest_value,wetland_valueee\n"
+                "1,0.5,2\n"
+                "2,1,4\n"
+                "3,0.5,4\n"
+                "4,9,1")
+        df = validation.get_validated_dataframe(
+            table_path,
+            columns={
+                'lucode': {'type': 'integer'},
+                '[HABITAT]_value': {'type': 'number'}
+            })
+        self.assertEqual(
+            list(df.columns), ['lucode', 'grassland_value', 'forest_value'])
+
+    def test_trailing_comma(self):
+        """validation: test a trailing comma on first line is handled properly."""
+        from natcap.invest import validation
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(
+                "lucode,desc,val1,val2\n"
+                "1,corn,0.5,2,\n"
+                "2,bread,1,4\n"
+                "3,beans,0.5,4\n"
+                "4,butter,9,1")
+        result = validation.get_validated_dataframe(
+            table_path,
+            columns={
+                'desc': {'type': 'freestyle_string'},
+                'lucode': {'type': 'integer'},
+                'val1': {'type': 'number'},
+                'val2': {'type': 'number'}
+            })
+        self.assertEqual(result['val2'][0], 2)
+        self.assertEqual(result['lucode'][1], 2)
+
+    def test_trailing_comma_second_line(self):
+        """validation: test a trailing comma on second line is handled properly."""
+        from natcap.invest import validation
+        csv_text = ("lucode,desc,val1,val2\n"
+                    "1,corn,0.5,2\n"
+                    "2,bread,1,4,\n"
+                    "3,beans,0.5,4\n"
+                    "4,butter,9,1")
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(csv_text)
+
+        result = validation.get_validated_dataframe(
+            table_path,
+            index_col='lucode',
+            columns={
+                'desc': {'type': 'freestyle_string'},
+                'lucode': {'type': 'integer'},
+                'val1': {'type': 'number'},
+                'val2': {'type': 'number'}
+            }).to_dict(orient='index')
+
+        expected_result = {
+            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2},
+            2: {'desc': 'bread', 'val1': 1, 'val2': 4},
+            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4},
+            4: {'desc': 'butter', 'val1': 9, 'val2': 1}}
+
+        self.assertDictEqual(result, expected_result)
+
+    def test_convert_cols_to_lower(self):
+        """validation: test that column names are converted to lowercase"""
+        from natcap.invest import validation
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                header,
+                A,
+                b
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={'header': {'type': 'freestyle_string'}})
+        self.assertEqual(df['header'][0], 'a')
+
+    def test_convert_vals_to_lower(self):
+        """validation: test that values are converted to lowercase"""
+        from natcap.invest import validation
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                HEADER,
+                a,
+                b
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file, columns={'header': {'type': 'freestyle_string'}})
+        self.assertEqual(df.columns[0], 'header')
+
+    def test_integer_type_columns(self):
+        """validation: integer column values are returned as integers."""
+        from natcap.invest import validation
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                id,header,
+                1,5.0,
+                2,-1,
+                3,
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={
+                'id': {'type': 'integer'},
+                'header': {'type': 'integer', 'na_allowed': True}})
+        self.assertIsInstance(df['header'][0], numpy.int64)
+        self.assertIsInstance(df['header'][1], numpy.int64)
+        # empty values are returned as pandas.NA
+        self.assertTrue(pandas.isna(df['header'][2]))
+
+    def test_float_type_columns(self):
+        """validation: float column values are returned as floats."""
+        from natcap.invest import validation
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                h1,h2,h3
+                5,0.5,.4
+                -1,-.3,
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={
+                'h1': {'type': 'number'},
+                'h2': {'type': 'ratio'},
+                'h3': {'type': 'percent', 'na_allowed': True},
+            })
+        self.assertEqual(df['h1'].dtype, float)
+        self.assertEqual(df['h2'].dtype, float)
+        self.assertEqual(df['h3'].dtype, float)
+        # empty values are returned as numpy.nan
+        self.assertTrue(numpy.isnan(df['h3'][1]))
+
+    def test_string_type_columns(self):
+        """validation: string column values are returned as strings."""
+        from natcap.invest import validation
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                h1,h2,h3
+                1,a,foo
+                2,b,
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={
+                'h1': {'type': 'freestyle_string'},
+                'h2': {'type': 'option_string'},
+                'h3': {'type': 'freestyle_string'},
+            })
+        self.assertEqual(df['h1'][0], '1')
+        self.assertEqual(df['h2'][1], 'b')
+        # empty values are returned as NA
+        self.assertTrue(pandas.isna(df['h3'][1]))
+
+    def test_boolean_type_columns(self):
+        """validation: boolean column values are returned as booleans."""
+        from natcap.invest import validation
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                index,h1
+                a,1
+                b,0
+                c,
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={
+                'index': {'type': 'freestyle_string'},
+                'h1': {'type': 'boolean', 'na_allowed': True}})
+        self.assertEqual(df['h1'][0], True)
+        self.assertEqual(df['h1'][1], False)
+        # empty values are returned as pandas.NA
+        self.assertTrue(pandas.isna(df['h1'][2]))
+
+    def test_expand_path_columns(self):
+        """validation: test values in path columns are expanded."""
+        from natcap.invest import validation
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                f"""\
+                bar,path
+                1,foo.txt
+                2,foo/bar.txt
+                3,foo\\bar.txt
+                4,{self.workspace_dir}/foo.txt
+                5,
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={
+                'bar': {'type': 'integer'},
+                'path': {'type': 'file'}
+            })
+        self.assertEqual(
+            f'{self.workspace_dir}{os.sep}foo.txt',
+            df['path'][0])
+        self.assertEqual(
+            f'{self.workspace_dir}{os.sep}foo{os.sep}bar.txt',
+            df['path'][1])
+        self.assertEqual(
+            f'{self.workspace_dir}{os.sep}foo\\bar.txt',
+            df['path'][2])
+        self.assertEqual(
+            f'{self.workspace_dir}{os.sep}foo.txt',
+            df['path'][3])
+        # empty values are returned as empty strings
+        self.assertTrue(pandas.isna(df['path'][4]))
+
+    def test_other_kwarg(self):
+        """validation: any other kwarg should be passed to pandas.read_csv"""
+        from natcap.invest import validation
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                h1;h2;h3
+                a;b;c
+                d;e;f
+                """
+            ))
+        # using sep=None with the default engine='python',
+        # it should infer what the separator is
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={
+                'h1': {'type': 'freestyle_string'},
+                'h2': {'type': 'freestyle_string'},
+                'h3': {'type': 'freestyle_string'}},
+            read_csv_kwargs={'converters': {'h2': lambda val: f'foo_{val}'}})
+
+        self.assertEqual(df.columns[0], 'h1')
+        self.assertEqual(df['h2'][1], 'foo_e')
+
+    def test_csv_with_integer_headers(self):
+        """
+        validation: CSV with integer headers should be read into strings.
+
+        This shouldn't matter for any of the models, but if a user inputs a CSV
+        with extra columns that are labeled with numbers, it should still work.
+        """
+        from natcap.invest import validation
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(textwrap.dedent(
+                """\
+                1,2,3
+                a,b,c
+                d,e,f
+                """
+            ))
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={
+                '1': {'type': 'freestyle_string'},
+                '2': {'type': 'freestyle_string'},
+                '3': {'type': 'freestyle_string'}
+            })
+        # expect headers to be strings
+        self.assertEqual(df.columns[0], '1')
+        self.assertEqual(df['1'][0], 'a')
+
+    def test_removal_whitespace(self):
+        """validation: test that leading/trailing whitespace is removed."""
+        from natcap.invest import validation
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write(" Col1, Col2 ,Col3 \n")
+            file_obj.write(" val1, val2 ,val3 \n")
+            file_obj.write(" , 2 1 ,  ")
+        df = validation.get_validated_dataframe(
+            csv_file,
+            columns={
+                'col1': {'type': 'freestyle_string'},
+                'col2': {'type': 'freestyle_string'},
+                'col3': {'type': 'freestyle_string'}
+            })
+        # header should have no leading / trailing whitespace
+        self.assertEqual(list(df.columns), ['col1', 'col2', 'col3'])
+
+        # values should have no leading / trailing whitespace
+        self.assertEqual(df['col1'][0], 'val1')
+        self.assertEqual(df['col2'][0], 'val2')
+        self.assertEqual(df['col3'][0], 'val3')
+        self.assertEqual(df['col1'][1], '')
+        self.assertEqual(df['col2'][1], '2 1')
+        self.assertEqual(df['col3'][1], '')
+
+    def test_nan_row(self):
+        """validation: test NaN row is dropped."""
+        from natcap.invest import validation
+        csv_text = ("lucode,desc,val1,val2\n"
+                    "1,corn,0.5,2\n"
+                    ",,,\n"
+                    "3,beans,0.5,4\n"
+                    "4,butter,9,1")
+        table_path = os.path.join(self.workspace_dir, 'table.csv')
+        with open(table_path, 'w') as table_file:
+            table_file.write(csv_text)
+
+        result = validation.get_validated_dataframe(
+            table_path,
+            index_col='lucode',
+            columns={
+                'desc': {'type': 'freestyle_string'},
+                'lucode': {'type': 'integer'},
+                'val1': {'type': 'number'},
+                'val2': {'type': 'number'}
+            }).to_dict(orient='index')
+        expected_result = {
+            1: {'desc': 'corn', 'val1': 0.5, 'val2': 2},
+            3: {'desc': 'beans', 'val1': 0.5, 'val2': 4},
+            4: {'desc': 'butter', 'val1': 9, 'val2': 1}}
+
+        self.assertDictEqual(result, expected_result)
+
+    def test_rows(self):
+        """validation: read csv with row headers instead of columns"""
+        from natcap.invest import validation
+
+        csv_file = os.path.join(self.workspace_dir, 'csv.csv')
+
+        with open(csv_file, 'w') as file_obj:
+            file_obj.write("row1, a ,b\n")
+            file_obj.write("row2,1,3\n")
+        df = validation.get_validated_dataframe(
+            csv_file,
+            rows={
+                'row1': {'type': 'freestyle_string'},
+                'row2': {'type': 'number'},
+            })
+        print(df)
+        # header should have no leading / trailing whitespace
+        self.assertEqual(list(df.columns), ['row1', 'row2'])
+
+        self.assertEqual(df['row1'][0], 'a')
+        self.assertEqual(df['row1'][1], 'b')
+        self.assertEqual(df['row2'][0], 1)
+        self.assertEqual(df['row2'][1], 3)
+        self.assertEqual(df['row2'].dtype, float)
+
+
 class TestValidationFromSpec(unittest.TestCase):
     """Test Validation From Spec."""
 
diff --git a/tests/test_wind_energy.py b/tests/test_wind_energy.py
index c34687330..c2b732fb0 100644
--- a/tests/test_wind_energy.py
+++ b/tests/test_wind_energy.py
@@ -167,30 +167,6 @@ def test_calculate_land_to_grid_distance(self):
             result_val = point_feat.GetField(field_index)
             numpy.testing.assert_allclose(result_val, exp_results[i])
 
-    def test_read_csv_wind_parameters(self):
-        """WindEnergy: testing 'read_csv_wind_parameter' function."""
-        from natcap.invest import wind_energy
-
-        csv_path = os.path.join(
-            SAMPLE_DATA,
-            'global_wind_energy_parameters.csv')
-
-        parameter_list = [
-            'air_density', 'exponent_power_curve', 'decommission_cost',
-            'operation_maintenance_cost', 'miscellaneous_capex_cost']
-
-        result = wind_energy._read_csv_wind_parameters(
-            csv_path, parameter_list)
-
-        expected_result = {
-            'air_density': 1.225,
-            'exponent_power_curve': 2,
-            'decommission_cost': 0.037,
-            'operation_maintenance_cost': .035,
-            'miscellaneous_capex_cost': .05
-        }
-        self.assertDictEqual(expected_result, result)
-
     def test_wind_data_to_point_vector(self):
         """WindEnergy: testing 'wind_data_to_point_vector' function."""
         from natcap.invest import wind_energy