diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f0f1a089..805ce058 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: - test_requirements.txt - repo: https://github.com/psf/black - rev: 24.3.0 + rev: 24.4.0 hooks: - id: black language_version: python3 @@ -31,7 +31,7 @@ repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.3.5 + rev: v0.3.7 hooks: - id: ruff diff --git a/compliance_checker/base.py b/compliance_checker/base.py index c2674e85..dbad8518 100644 --- a/compliance_checker/base.py +++ b/compliance_checker/base.py @@ -192,8 +192,9 @@ def __del__(self): inadvertently mutated by other functions. """ - cfutil.get_geophysical_variables.cache_clear() - cfutil.get_time_variables.cache_clear() + if cfutil is not None: + cfutil.get_geophysical_variables.cache_clear() + cfutil.get_time_variables.cache_clear() class BaseNCCheck: diff --git a/compliance_checker/cf/cf_1_6.py b/compliance_checker/cf/cf_1_6.py index 2e31f029..f059e0f7 100644 --- a/compliance_checker/cf/cf_1_6.py +++ b/compliance_checker/cf/cf_1_6.py @@ -425,12 +425,12 @@ def check_fill_value_equal_missing_value(self, ds): return Result( BaseCheck.MEDIUM, - (len(fails), total), + (total - len(fails), total), self.section_titles["2.5"], msgs=fails, ) - def check_valid_range_or_valid_min_max_present(self, ds): + def check_valid_range_and_valid_min_max_present(self, ds): """ The valid_range attribute must not be present if the valid_min and/or valid_max attributes are present. This according to 2.5.1 Requirements. @@ -443,19 +443,22 @@ def check_valid_range_or_valid_min_max_present(self, ds): total = 0 for variable in ds.variables.values(): - if hasattr(variable, "valid_max") and ( - hasattr(variable, "valid_min") or hasattr(variable, "valid_range") - ): - total = total + 1 - - fails.append( - f"For the variable {variable.name} the valid_range attribute must not be present " - "if the valid_min and/or valid_max attributes are present", - ) + if hasattr(variable, "valid_max") or hasattr(variable, "valid_min"): + total += 1 + # if there's also valid_range in addition to + # valid_min/valid_max, this is not compliant + if hasattr(variable, "valid_range"): + fails.append( + f"For the variable {variable.name} the valid_range attribute must not be present " + "if the valid_min and/or valid_max attributes are present", + ) + # *Just* valid_range should be added to total as well + elif hasattr(variable, "valid_range"): + total += 1 return Result( BaseCheck.MEDIUM, - (len(fails), total), + (total - len(fails), total), self.section_titles["2.5"], msgs=fails, ) diff --git a/compliance_checker/cf/cf_1_7.py b/compliance_checker/cf/cf_1_7.py index 50eb05ee..7e6cb03b 100644 --- a/compliance_checker/cf/cf_1_7.py +++ b/compliance_checker/cf/cf_1_7.py @@ -391,7 +391,6 @@ def check_cell_boundaries_interval(self, ds): reasoning, ) ret_val.append(result) - print(ret_val) return ret_val def check_cell_measures(self, ds): diff --git a/compliance_checker/protocols/netcdf.py b/compliance_checker/protocols/netcdf.py index 4943ac8a..be99d1e4 100644 --- a/compliance_checker/protocols/netcdf.py +++ b/compliance_checker/protocols/netcdf.py @@ -91,6 +91,10 @@ def is_remote_netcdf(ds_str): else: content_type = head_req.headers.get("content-type") + if content_type is None: + return False + # if the Content-Type header returned was "application/x-netcdf", # or a netCDF file (not OPeNDAP) we can open this into a Dataset - return content_type == "application/x-netcdf" + # Add support for application/x-netcdf;ver=4 + return content_type.split(";")[0] == "application/x-netcdf" diff --git a/compliance_checker/suite.py b/compliance_checker/suite.py index 7d922110..17002da4 100644 --- a/compliance_checker/suite.py +++ b/compliance_checker/suite.py @@ -17,13 +17,13 @@ from pathlib import Path from urllib.parse import urlparse +import importlib_metadata import requests from lxml import etree as ET from netCDF4 import Dataset from owslib.sos import SensorObservationService from owslib.swe.sensor.sml import SensorML from packaging.version import parse -from pkg_resources import working_set from compliance_checker import __version__, tempnc from compliance_checker.base import BaseCheck, GenericFile, Result, fix_return_value @@ -73,8 +73,10 @@ def _get_generator_plugins(cls): """ if not hasattr(cls, "suite_generators"): - gens = working_set.iter_entry_points("compliance_checker.generators") - cls.suite_generators = [x.resolve() for x in gens] + gens = importlib_metadata.entry_points( + group="compliance_checker.generators", + ) + cls.suite_generators = [x.load() for x in gens] return cls.suite_generators @@ -136,7 +138,9 @@ def load_all_available_checkers(cls): Helper method to retrieve all sub checker classes derived from various base classes. """ - cls._load_checkers(working_set.iter_entry_points("compliance_checker.suites")) + cls._load_checkers( + importlib_metadata.entry_points(group="compliance_checker.suites"), + ) @classmethod def _load_checkers(cls, checkers): @@ -147,7 +151,7 @@ def _load_checkers(cls, checkers): for c in checkers: try: - check_obj = c.resolve() + check_obj = c.load() if hasattr(check_obj, "_cc_spec") and hasattr( check_obj, "_cc_spec_version", @@ -867,6 +871,11 @@ def load_remote_dataset(self, ds_str): content_type = response.headers.get("content-type") if content_type.split(";")[0] == "text/xml": return self.process_doc(response.content) + elif content_type.split(";")[0] == "application/x-netcdf": + return Dataset( + urlparse(response.url).path, + memory=response.content, + ) else: raise ValueError( f"Unknown service with content-type: {content_type}", diff --git a/compliance_checker/tests/helpers.py b/compliance_checker/tests/helpers.py index a07c1aa3..3642e123 100644 --- a/compliance_checker/tests/helpers.py +++ b/compliance_checker/tests/helpers.py @@ -1,6 +1,6 @@ import tempfile -from netCDF4 import Dataset +from netCDF4._netCDF4 import Dataset class MockNetCDF(Dataset): @@ -23,6 +23,14 @@ def __init__(self, filename=None): persist=False, ) + # suppress usual dealloc routine to prevent caught exception messages + # from printing + def __dealloc__(self): + try: + super().__dealloc__() + except AttributeError: + pass + class MockTimeSeries(MockNetCDF): """ diff --git a/compliance_checker/tests/test_cf.py b/compliance_checker/tests/test_cf.py index 360683be..292bfd82 100644 --- a/compliance_checker/tests/test_cf.py +++ b/compliance_checker/tests/test_cf.py @@ -396,7 +396,7 @@ def test_check_fill_value_equal_missing_value(self): assert result.msgs == expected_msgs - def test_check_valid_range_or_valid_min_max_present(self): + def test_check_valid_range_and_valid_min_max_present(self): """ 2.5.1 Missing data, valid and actual range of data Requirements: @@ -426,7 +426,7 @@ def test_check_valid_range_or_valid_min_max_present(self): dataset.variables["c"][1] = 2 dataset.variables["c"].setncattr("valid_range", [-10, 10]) - result = self.cf.check_valid_range_or_valid_min_max_present(dataset) + result = self.cf.check_valid_range_and_valid_min_max_present(dataset) # check if the test fails when when variable "a" is checked. expected_msgs = [ @@ -436,7 +436,7 @@ def test_check_valid_range_or_valid_min_max_present(self): ] assert result.msgs == expected_msgs - assert result.value[0] == result.value[1] + assert result.value[0] < result.value[1] def test_check_fill_value_outside_valid_range(self): """ @@ -1794,7 +1794,8 @@ def test_64bit(self): dataset = self.load_dataset(STATIC_FILES["ints64"]) suite = CheckSuite() suite.checkers = {"cf": CF1_6Check} - suite.run(dataset, "cf") + # suite.run(dataset, "cf") + suite.run_all(dataset, ["cf"], skip_checks=["cf"]) def test_variable_feature_check(self): # non-compliant dataset -- 1/1 fail diff --git a/compliance_checker/tests/test_cf_integration.py b/compliance_checker/tests/test_cf_integration.py index 33f8fa29..a55da152 100644 --- a/compliance_checker/tests/test_cf_integration.py +++ b/compliance_checker/tests/test_cf_integration.py @@ -245,7 +245,8 @@ def get_results(self, check_results, checksuite): ], # must be specified to load this param at runtime, instead of at collection ) def test_cf_integration(self, loaded_dataset, expected_messages, cs): - check_results = cs.run(loaded_dataset, [], "cf") + # check_results = cs.run(loaded_dataset, [], "cf") + check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of @@ -270,14 +271,16 @@ def test_cf_integration(self, loaded_dataset, expected_messages, cs): indirect=["loaded_dataset"], ) def test_no_incorrect_errors(self, cs, loaded_dataset, wrong_message): - check_results = cs.run(loaded_dataset, [], True, "cf") + # check_results = cs.run(loaded_dataset, [], True, "cf") + check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) messages = self.get_results(check_results, cs)[-1] assert wrong_message not in "".join(messages) @pytest.mark.parametrize("loaded_dataset", ["fvcom"], indirect=True) def test_fvcom(self, cs, loaded_dataset): - check_results = cs.run(loaded_dataset, [], True, "cf") + # check_results = cs.run(loaded_dataset, [], True, "cf") + check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of @@ -305,6 +308,7 @@ def test_ncei_templates(self, cs, loaded_dataset): Tests some of the NCEI NetCDF templates, which usually should get a perfect score. """ - check_results = cs.run(loaded_dataset, [], "cf") + # check_results = cs.run(loaded_dataset, [], "cf") + check_results = cs.run_all(loaded_dataset, ["cf"], skip_checks=[]) scored, out_of, messages = self.get_results(check_results, cs) assert scored < out_of diff --git a/compliance_checker/tests/test_cli.py b/compliance_checker/tests/test_cli.py index fc6b4f94..c86cd897 100644 --- a/compliance_checker/tests/test_cli.py +++ b/compliance_checker/tests/test_cli.py @@ -91,7 +91,7 @@ def checker_1(): def checker_2(): return Namespace(_cc_spec="checker_2", _cc_spec_version="2.2") - mock_checkers = [Namespace(resolve=checker_1), Namespace(resolve=checker_2)] + mock_checkers = [Namespace(load=checker_1), Namespace(load=checker_2)] with pytest.warns(DeprecationWarning): CheckSuite._load_checkers(mock_checkers) diff --git a/compliance_checker/tests/test_protocols.py b/compliance_checker/tests/test_protocols.py index f43bce68..509b055d 100644 --- a/compliance_checker/tests/test_protocols.py +++ b/compliance_checker/tests/test_protocols.py @@ -38,7 +38,10 @@ def test_hyrax(): """ Tests that a connection can be made to Hyrax """ - url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml" + # Returns: error 405 + # url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml" + # More direct file + url = "http://test.opendap.org:8080/opendap/ioos/mday_joinExist.ncml.dap.nc4" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None @@ -48,13 +51,17 @@ def test_thredds(): """ Tests that a connection can be made to a remote THREDDS endpoint """ - url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP" + # Returns: error 400 + # url = "http://thredds.ucar.edu/thredds/dodsC/grib/NCEP/GFS/Global_0p25deg_ana/TP" + # Use a smaller dataset + url = "https://thredds.ucar.edu/thredds/ncss/grid/grib/NCEP/GFS/Global_0p25deg_ana/TP?var=Temperature_altitude_above_msl&accept=netcdf3" cs = CheckSuite() ds = cs.load_dataset(url) assert ds is not None +@pytest.mark.skip(reason="The thredds endpoint is no longer serving SOS.") def test_sos(): """ Tests that a connection can be made to an SOS endpoint diff --git a/compliance_checker/tests/test_suite.py b/compliance_checker/tests/test_suite.py index 8c8987df..61915ee9 100644 --- a/compliance_checker/tests/test_suite.py +++ b/compliance_checker/tests/test_suite.py @@ -51,16 +51,19 @@ def test_suite(self): # BWA: what's the purpose of this test? Just to see if the suite # runs without errors? ds = self.cs.load_dataset(static_files["2dim"]) - self.cs.run(ds, [], "acdd") + # self.cs.run(ds, [], "acdd") + self.cs.run_all(ds, ["acdd"], skip_checks=[]) def test_suite_pathlib(self): path_obj = Path(static_files["2dim"]) ds = self.cs.load_dataset(path_obj) - self.cs.run(ds, [], "acdd") + # self.cs.run(ds, [], "acdd") + self.cs.run_all(ds, ["acdd"], skip_checks=[]) def test_unicode_formatting(self): ds = self.cs.load_dataset(static_files["bad_region"]) - score_groups = self.cs.run(ds, [], "cf") + # score_groups = self.cs.run(ds, [], "cf") + score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 for checker, rpair in score_groups.items(): @@ -151,7 +154,8 @@ def test_group_func(self): # This is checking for issue #183, where group_func results in # IndexError: list index out of range ds = self.cs.load_dataset(static_files["bad_data_type"]) - score_groups = self.cs.run(ds, [], "cf") + # score_groups = self.cs.run(ds, [], "cf") + score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 for checker, rpair in score_groups.items(): @@ -186,7 +190,8 @@ def test_cdl_file(self): # Testing whether you can run compliance checker on a .cdl file # Load the cdl file ds = self.cs.load_dataset(static_files["test_cdl"]) - vals = self.cs.run(ds, [], "cf") + # vals = self.cs.run(ds, [], "cf") + vals = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 for checker, rpair in vals.items(): @@ -209,7 +214,8 @@ def test_cdl_file(self): # Ok now load the nc file that it came from ds = self.cs.load_dataset(static_files["test_cdl_nc"]) - vals = self.cs.run(ds, [], "cf") + # vals = self.cs.run(ds, [], "cf") + vals = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 for checker, rpair in vals.items(): @@ -247,7 +253,8 @@ def test_standard_output_score_header(self): of potential issues, rather than the weighted score """ ds = self.cs.load_dataset(static_files["bad_region"]) - score_groups = self.cs.run(ds, [], "cf") + # score_groups = self.cs.run(ds, [], "cf") + score_groups = self.cs.run_all(ds, ["cf"], skip_checks=[]) limit = 2 groups, errors = score_groups["cf"] score_list, all_passed, out_of = self.cs.standard_output( diff --git a/docs/source/development.md b/docs/source/development.md new file mode 100644 index 00000000..67b35b67 --- /dev/null +++ b/docs/source/development.md @@ -0,0 +1,11 @@ +# Developer Notes + +## pytest + +When running the python test suite, there may be test errors. Certain +tests `record` responses to remote queries for information. If tests +fail, they will appear to continue to fail as the queries are cached. + +To perform tests using fresh queries from remote services, use +`pytest --disable-vcr`. In certain cases, clearing the cache is +also advised, use `pytest --clear-cache`. diff --git a/docs/source/faq.md b/docs/source/faq.md index f461ce88..d95859bb 100644 --- a/docs/source/faq.md +++ b/docs/source/faq.md @@ -45,6 +45,8 @@ The Compliance Checker is completely open-source and available on [GitHub](https ## Disclaimer -The objective of the IOOS Compliance Checker is to check your file against our interpretation of select dataset metadata standards to use as a guideline in generating compliant files. -The compliance checker should not be considered the authoritative source on whether your file is 100% "compliant". +The objective of the IOOS Compliance Checker is to check your file against +our interpretation of select dataset metadata standards to use as a +guideline in generating compliant files. The compliance checker should +not be considered the authoritative source on whether your file is 100% "compliant". Instead, we recommend that users use the results as a guide to work towards compliance. diff --git a/docs/source/index.rst b/docs/source/index.rst index 0f67a152..bcf62b59 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -10,6 +10,7 @@ Python tool to check your datasets against compliance standards. quickintro compliance_checker_api faq + development Indices and tables ================== diff --git a/requirements.txt b/requirements.txt index 6277c633..fd3bbe9a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ cf-units>=2 cftime>=1.1.0 +importlib-metadata # drop this when dropping Python 3.8 importlib-resources # drop this when dropping Python 3.8 isodate>=0.6.1 jinja2>=2.7.3