From be0da171b6f15a7bf052745b949fba3776d4b2eb Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Mon, 16 Sep 2024 14:45:22 -0400 Subject: [PATCH 01/13] Use temp .netrc file for integration tests Fixes #806 Fixes #743 Fixes #480 --- .gitignore | 1 + CHANGELOG.md | 9 +++ docs/howto/authenticate.md | 62 ++++++++++------ earthaccess/__init__.py | 4 +- earthaccess/auth.py | 53 +++++++++++--- earthaccess/kerchunk.py | 34 +++++---- scripts/integration-test.sh | 2 +- tests/integration/conftest.py | 38 ++++++++++ tests/integration/test_api.py | 46 +++++------- tests/integration/test_auth.py | 86 +++++++---------------- tests/integration/test_cloud_download.py | 45 ++++-------- tests/integration/test_cloud_open.py | 38 +++------- tests/integration/test_kerchunk.py | 20 +----- tests/integration/test_onprem_download.py | 68 +++++++----------- tests/integration/test_onprem_open.py | 58 ++++++--------- 15 files changed, 270 insertions(+), 294 deletions(-) diff --git a/.gitignore b/.gitignore index d0ee3ea9..f3567622 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,7 @@ htmlcov dist site .coverage +.coverage.* coverage.xml .netlify test.db diff --git a/CHANGELOG.md b/CHANGELOG.md index 1cab9c40..638073e0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,9 @@ instead ([#766](https://github.com/nsidc/earthaccess/issues/766)) ([**@Sherwin-14**](https://github.com/Sherwin-14), [**@chuckwondo**](https://github.com/chuckwondo)) +- Use built-in `assert` statement in integration tests + ([#743](https://github.com/nsidc/earthaccess/issues/743)) + ([**@chuckwondo**](https://github.com/chuckwondo)) ### Added @@ -25,6 +28,9 @@ [**@chuckwondo**](https://github.com/chuckwondo), [**@mfisher87**](https://github.com/mfisher87), [**@betolink**](https://github.com/betolink)) +- Support use of `NETRC` environment variable to override default `.netrc` file + location ([#480](https://github.com/nsidc/earthaccess/issues/480)) + ([**@chuckwondo**](https://github.com/chuckwondo)) - Added example PR links to pull request template ([#756](https://github.com/nsidc/earthaccess/issues/756)) @@ -38,6 +44,9 @@ - Removed Broken Link "Introduction to NASA earthaccess" ([#779](https://github.com/nsidc/earthaccess/issues/779)) ([**@Sherwin-14**](https://github.com/Sherwin-14)) +- Integration tests no longer clobber existing `.netrc` file + ([#806](https://github.com/nsidc/earthaccess/issues/806)) + ([**@chuckwondo**](https://github.com/chuckwondo)) ## [0.10.0] 2024-07-19 diff --git a/docs/howto/authenticate.md b/docs/howto/authenticate.md index 407e5140..ec94f5ea 100644 --- a/docs/howto/authenticate.md +++ b/docs/howto/authenticate.md @@ -1,10 +1,14 @@ -## Authenticate with Earthdata Login +# Authenticate with Earthdata Login -The first step to use NASA Earthdata is to create an account with Earthdata Login, please follow the instructions at [NASA EDL](https://urs.earthdata.nasa.gov/) +The first step to use NASA Earthdata is to create an account with Earthdata +Login, please follow the instructions at +[NASA EDL](https://urs.earthdata.nasa.gov/) -Once registered, earthaccess can use environment variables, a `.netrc` file or interactive input from a user to login with NASA EDL. +Once registered, earthaccess can use environment variables, a `.netrc` file or +interactive input from a user to login with NASA EDL. -If a strategy is not especified, env vars will be used first, then netrc and finally user's input. +If a strategy is not specified, environment variables will be used first, then +a `.netrc` (if found, see below), and finally a user's input. ```py import earthaccess @@ -12,35 +16,48 @@ import earthaccess auth = earthaccess.login() ``` -If you have a .netrc file with your Earthdata Login credentials +If you have a `.netrc` file (see below) with your Earthdata Login credentials, +you can explicitly specify its use: ```py auth = earthaccess.login(strategy="netrc") ``` -If your Earthdata Login credentials are set as environment variables: EARTHDATA_USERNAME, EARTHDATA_PASSWORD +If your Earthdata Login credentials are set as the environment variables +`EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD`, you can explicitly specify their +use: ```py auth = earthaccess.login(strategy="environment") ``` -If you wish to enter your Earthdata Login credentials when prompted with optional persistence to .netrc +If you wish to enter your Earthdata Login credentials when prompted, with +optional persistence to your `.netrc` file (see below), specify the interactive +strategy: ```py auth = earthaccess.login(strategy="interactive", persist=True) ``` +## Authentication +By default, `earthaccess` with automatically look for your EDL account +credentials in two locations: -### **Authentication** +1. A `.netrc` file: By default, this is either `~/_netrc` (on a Windows system) + or `~/.netrc` (on a non-Windows system). On *any* system, you may override + the default location by setting the `NETRC` environment variable to the path + of your desired `.netrc` file. -By default, `earthaccess` with automatically look for your EDL account credentials in two locations: - -1. A `~/.netrc` file + **NOTE**: When setting the `NETRC` environment variable, there is no + requirement to use a specific filename. The name `.netrc` is common, but + used throughout documentation primarily for convenience. The only + requirement is that the *contents* of the file adhere to the + [`.netrc` file format](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html). 2. `EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD` environment variables -If neither of these options are configured, you can authenticate by calling the `earthaccess.login()` method -and manually entering your EDL account credentials. +If neither of these options are configured, you can authenticate by calling the +`earthaccess.login()` method and manually entering your EDL account credentials. ```python import earthaccess @@ -48,27 +65,26 @@ import earthaccess earthaccess.login() ``` -Note you can pass `persist=True` to `earthaccess.login()` to have the EDL account credentials you enter -automatically saved to a `~/.netrc` file for future use. - +Note you can pass `persist=True` to `earthaccess.login()` to have the EDL +account credentials you enter automatically saved to your `.netrc` file (see +above) for future use. Once you are authenticated with NASA EDL you can: * Get a file from a DAAC using a `fsspec` session. -* Request temporary S3 credentials from a particular DAAC (needed to download or stream data from an S3 bucket in the cloud). +* Request temporary S3 credentials from a particular DAAC (needed to download or + stream data from an S3 bucket in the cloud). * Use the library to download or stream data directly from S3. * Regenerate CMR tokens (used for restricted datasets). +## Earthdata User Acceptance Testing (UAT) environment -### Earthdata User Acceptance Testing (UAT) environment - -If your EDL account is authorized to access the User Acceptance Testing (UAT) system, -you can set earthaccess to work with its EDL and CMR endpoints -by setting the `system` argument at login, as follows: +If your EDL account is authorized to access the User Acceptance Testing (UAT) +system, you can set earthaccess to work with its EDL and CMR endpoints by +setting the `system` argument at login, as follows: ```python import earthaccess earthaccess.login(system=earthaccess.UAT) - ``` diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index 6d7d0def..c82c23d3 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -21,7 +21,7 @@ ) from .auth import Auth from .kerchunk import consolidate_metadata -from .search import DataCollections, DataGranules +from .search import DataCollection, DataCollections, DataGranule, DataGranules from .services import DataServices from .store import Store from .system import PROD, UAT @@ -46,7 +46,9 @@ "download", "auth_environ", # search.py + "DataGranule", "DataGranules", + "DataCollection", "DataCollections", "DataServices", # auth.py diff --git a/earthaccess/auth.py b/earthaccess/auth.py index 4332379a..3a3b209c 100644 --- a/earthaccess/auth.py +++ b/earthaccess/auth.py @@ -25,6 +25,24 @@ logger = logging.getLogger(__name__) +def netrc_path() -> Path: + """Return the path of the `.netrc` file. + + The path may or may not exist. + + See [the `.netrc` file](https://www.gnu.org/software/inetutils/manual/html_node/The-_002enetrc-file.html). + + Returns: + `Path` of the `NETRC` environment variable, if the value is non-empty; + otherwise, the path of the platform-specific default location: + `~/_netrc` on Windows systems, `~/.netrc` on non-Windows systems. + """ + sys_netrc_name = "_netrc" if platform.system() == "Windows" else ".netrc" + env_netrc = os.environ.get("NETRC") + + return Path(env_netrc) if env_netrc else Path.home() / sys_netrc_name + + class SessionWithHeaderRedirection(requests.Session): """Requests removes auth headers if the redirect happens outside the original req domain. @@ -104,11 +122,12 @@ def login( if self.authenticated and (system == self.system): logger.debug("We are already authenticated with NASA EDL") return self + if strategy == "interactive": self._interactive(persist) - if strategy == "netrc": + elif strategy == "netrc": self._netrc() - if strategy == "environment": + elif strategy == "environment": self._environment() return self @@ -222,25 +241,29 @@ def _interactive(self, persist_credentials: bool = False) -> bool: if authenticated: logger.debug("Using user provided credentials for EDL") if persist_credentials: - logger.info("Persisting credentials to .netrc") self._persist_user_credentials(username, password) return authenticated def _netrc(self) -> bool: + netrc_loc = netrc_path() + try: - my_netrc = Netrc() + my_netrc = Netrc(str(netrc_loc)) except FileNotFoundError as err: - raise FileNotFoundError(f"No .netrc found in {Path.home()}") from err + raise FileNotFoundError(f"No .netrc found at {netrc_loc}") from err except NetrcParseError as err: - raise NetrcParseError("Unable to parse .netrc") from err + raise NetrcParseError(f"Unable to parse .netrc file {netrc_loc}") from err + if (creds := my_netrc[self.system.edl_hostname]) is None: return False username = creds["login"] password = creds["password"] authenticated = self._get_credentials(username, password) + if authenticated: logger.debug("Using .netrc file for EDL") + return authenticated def _environment(self) -> bool: @@ -293,33 +316,41 @@ def _find_or_create_token(self, username: str, password: str) -> Any: def _persist_user_credentials(self, username: str, password: str) -> bool: # See: https://github.com/sloria/tinynetrc/issues/34 + + netrc_loc = netrc_path() + logger.info(f"Persisting credentials to {netrc_loc}") + try: - netrc_path = Path().home().joinpath(".netrc") - netrc_path.touch(exist_ok=True) - netrc_path.chmod(0o600) + netrc_loc.touch(exist_ok=True) + netrc_loc.chmod(0o600) except Exception as e: logger.error(e) return False - my_netrc = Netrc(str(netrc_path)) + + my_netrc = Netrc(str(netrc_loc)) my_netrc[self.system.edl_hostname] = { "login": username, "password": password, } my_netrc.save() + urs_cookies_path = Path.home() / ".urs_cookies" + if not urs_cookies_path.exists(): urs_cookies_path.write_text("") # Create and write to .dodsrc file dodsrc_path = Path.home() / ".dodsrc" + if not dodsrc_path.exists(): dodsrc_contents = ( - f"HTTP.COOKIEJAR={urs_cookies_path}\nHTTP.NETRC={netrc_path}" + f"HTTP.COOKIEJAR={urs_cookies_path}\nHTTP.NETRC={netrc_loc}" ) dodsrc_path.write_text(dodsrc_contents) if platform.system() == "Windows": local_dodsrc_path = Path.cwd() / dodsrc_path.name + if not local_dodsrc_path.exists(): shutil.copy2(dodsrc_path, local_dodsrc_path) diff --git a/earthaccess/kerchunk.py b/earthaccess/kerchunk.py index 26758184..9ee40dec 100644 --- a/earthaccess/kerchunk.py +++ b/earthaccess/kerchunk.py @@ -1,34 +1,39 @@ from __future__ import annotations +from typing import Optional, Union + import fsspec +import fsspec.utils import s3fs import earthaccess def _get_chunk_metadata( - granule: earthaccess.results.DataGranule, - fs: fsspec.AbstractFileSystem | s3fs.S3FileSystem, + granule: earthaccess.DataGranule, + fs: fsspec.AbstractFileSystem, ) -> list[dict]: from kerchunk.hdf import SingleHdf5ToZarr metadata = [] access = "direct" if isinstance(fs, s3fs.S3FileSystem) else "indirect" + for url in granule.data_links(access=access): with fs.open(url) as inf: h5chunks = SingleHdf5ToZarr(inf, url) m = h5chunks.translate() metadata.append(m) + return metadata def consolidate_metadata( - granules: list[earthaccess.results.DataGranule], - kerchunk_options: dict | None = None, + granules: list[earthaccess.DataGranule], + kerchunk_options: Optional[dict] = None, access: str = "direct", - outfile: str | None = None, - storage_options: dict | None = None, -) -> str | dict: + outfile: Optional[str] = None, + storage_options: Optional[dict] = None, +) -> Union[str, dict]: try: import dask @@ -44,15 +49,16 @@ def consolidate_metadata( fs = earthaccess.get_fsspec_https_session() # Get metadata for each granule - get_chunk_metadata = dask.delayed(_get_chunk_metadata) - chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) + get_chunk_metadata = dask.delayed(_get_chunk_metadata) # type: ignore + chunks = dask.compute(*[get_chunk_metadata(g, fs) for g in granules]) # type: ignore chunks = sum(chunks, start=[]) # Get combined metadata object mzz = MultiZarrToZarr(chunks, **(kerchunk_options or {})) - if outfile is not None: - output = fsspec.utils.stringify_path(outfile) - mzz.translate(outfile, storage_options=storage_options or {}) - return output - else: + + if outfile is None: return mzz.translate() + + output = fsspec.utils.stringify_path(outfile) + mzz.translate(outfile, storage_options=storage_options or {}) + return output diff --git a/scripts/integration-test.sh b/scripts/integration-test.sh index 15b173f8..506976ad 100755 --- a/scripts/integration-test.sh +++ b/scripts/integration-test.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash set -x -pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing ${@} --capture=no --tb=native --log-cli-level=INFO +pytest tests/integration --cov=earthaccess --cov=tests/integration --cov-report=term-missing "${@}" --capture=no --tb=native --log-cli-level=INFO RET=$? set +x diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index c2d4a3c2..db71b54f 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -1,3 +1,7 @@ +import os +import pathlib + +import earthaccess import pytest ACCEPTABLE_FAILURE_RATE = 10 @@ -29,3 +33,37 @@ def pytest_sessionfinish(session, exitstatus): failure_rate = (100.0 * session.testsfailed) / session.testscollected if failure_rate <= ACCEPTABLE_FAILURE_RATE: session.exitstatus = 99 + + +@pytest.fixture +def mock_env(monkeypatch): + earthaccess.__auth__ = earthaccess.Auth() + # the original comes from github secrets + monkeypatch.setenv("EARTHDATA_USERNAME", os.getenv("EARTHACCESS_TEST_USERNAME", "")) + monkeypatch.setenv("EARTHDATA_PASSWORD", os.getenv("EARTHACCESS_TEST_PASSWORD", "")) + + +@pytest.fixture +def mock_missing_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + netrc_path = tmp_path / ".netrc" + monkeypatch.setenv("NETRC", str(netrc_path)) + monkeypatch.delenv("EARTHDATA_USERNAME") + monkeypatch.delenv("EARTHDATA_PASSWORD") + # Currently, due to there being only a single, global, module-level auth + # value, tests using different auth strategies interfere with each other, + # so here we are deleting the auth attribute so that it doesn't interfere. + monkeypatch.delattr(earthaccess, "__auth__", raising=False) + + +@pytest.fixture +def mock_netrc(mock_env, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + netrc = tmp_path / ".netrc" + monkeypatch.setenv("NETRC", str(netrc)) + + username = os.environ["EARTHDATA_USERNAME"] + password = os.environ["EARTHDATA_PASSWORD"] + + netrc.write_text( + f"machine urs.earthdata.nasa.gov login {username} password {password}\n" + ) + netrc.chmod(0o600) diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index 8fd45489..5e90cf46 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -1,21 +1,11 @@ -# package imports import logging import os -import unittest from pathlib import Path import earthaccess import pytest logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") dataset_valid_params = [ @@ -42,36 +32,36 @@ ] -def test_auth_returns_valid_auth_class(): +def test_auth_returns_valid_auth_class(mock_env): auth = earthaccess.login(strategy="environment") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated -def test_dataset_search_returns_none_with_no_parameters(): +def test_dataset_search_returns_none_with_no_parameters(mock_env): results = earthaccess.search_datasets() - assertions.assertIsInstance(results, list) - assertions.assertTrue(len(results) == 0) + assert isinstance(results, list) + assert len(results) == 0 @pytest.mark.parametrize("kwargs", dataset_valid_params) -def test_dataset_search_returns_valid_results(kwargs): +def test_dataset_search_returns_valid_results(mock_env, kwargs): results = earthaccess.search_datasets(**kwargs) - assertions.assertIsInstance(results, list) - assertions.assertIsInstance(results[0], dict) + assert isinstance(results, list) + assert isinstance(results[0], dict) @pytest.mark.parametrize("kwargs", granules_valid_params) -def test_granules_search_returns_valid_results(kwargs): +def test_granules_search_returns_valid_results(mock_env, kwargs): results = earthaccess.search_data(count=10, **kwargs) - assertions.assertIsInstance(results, list) - assertions.assertTrue(len(results) <= 10) + assert isinstance(results, list) + assert len(results) <= 10 @pytest.mark.parametrize("selection", [0, slice(None)]) @pytest.mark.parametrize("use_url", [True, False]) -def test_download(tmp_path, selection, use_url): +def test_download(mock_env, tmp_path, selection, use_url): results = earthaccess.search_data( count=2, short_name="ATL08", @@ -80,15 +70,15 @@ def test_download(tmp_path, selection, use_url): ) if use_url: # Download via file URL string instead of DataGranule object - results = [r.data_links(access="indirect") for r in results] - results = sum(results, start=[]) # flatten to a list of strings + results = [link for r in results for link in r.data_links(access="indirect")] result = results[selection] files = earthaccess.download(result, str(tmp_path)) - assertions.assertIsInstance(files, list) + assert isinstance(files, list) assert all(Path(f).exists() for f in files) -def test_auth_environ(): +def test_auth_environ(mock_env): + earthaccess.login(strategy="environment") environ = earthaccess.auth_environ() assert environ == { "EARTHDATA_USERNAME": os.environ["EARTHDATA_USERNAME"], diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index 7c0c1b37..e333e48c 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -1,90 +1,54 @@ -# package imports import logging -import os -import pathlib -import unittest import earthaccess +import earthaccess.daac import pytest import requests import s3fs logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") -NETRC_PATH = pathlib.Path.home() / pathlib.Path(".netrc") - -def activate_environment(): - earthaccess.__auth__ = earthaccess.Auth() - # the original comes from github secrets - os.environ["EARTHDATA_USERNAME"] = os.getenv("EARTHACCESS_TEST_USERNAME", "") - os.environ["EARTHDATA_PASSWORD"] = os.getenv("EARTHACCESS_TEST_PASSWORD", "") - - -def activate_netrc(): - activate_environment() - username = os.environ["EARTHDATA_USERNAME"] - password = os.environ["EARTHDATA_PASSWORD"] - - with open(NETRC_PATH, "w") as f: - f.write( - f"machine urs.earthdata.nasa.gov login {username} password {password}\n" - ) - NETRC_PATH.chmod(0o600) - - -def delete_netrc(): - if NETRC_PATH.exists(): - NETRC_PATH.unlink() - - -def test_auth_can_read_earthdata_env_variables(): - activate_environment() +def test_auth_can_read_earthdata_env_variables(mock_env): auth = earthaccess.login(strategy="environment") logger.info(f"Current username: {auth.username}") logger.info(f"earthaccess version: {earthaccess.__version__}") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated -def test_auth_can_read_from_netrc_file(): - activate_netrc() +def test_auth_can_read_from_netrc_file(mock_netrc): auth = earthaccess.login(strategy="netrc") - assertions.assertTrue(auth.authenticated) - delete_netrc() + assert auth.authenticated -def test_auth_throws_exception_if_netrc_is_not_present(): - activate_environment() - delete_netrc() - with pytest.raises(Exception): +def test_auth_throws_exception_if_netrc_is_not_present(mock_missing_netrc): + with pytest.raises(FileNotFoundError): earthaccess.login(strategy="netrc") - assertions.assertRaises(FileNotFoundError) -def test_auth_populates_attrs(): - activate_environment() +def test_auth_populates_attrs(mock_env): auth = earthaccess.login(strategy="environment") - assertions.assertIsInstance(auth, earthaccess.Auth) - assertions.assertIsInstance(earthaccess.__auth__, earthaccess.Auth) - assertions.assertTrue(earthaccess.__auth__.authenticated) + assert isinstance(auth, earthaccess.Auth) + assert isinstance(earthaccess.__auth__, earthaccess.Auth) + assert earthaccess.__auth__.authenticated -def test_auth_can_create_authenticated_requests_sessions(): - activate_environment() +def test_auth_can_create_authenticated_requests_sessions(mock_env): session = earthaccess.get_requests_https_session() - assertions.assertTrue("Authorization" in session.headers) - assertions.assertTrue("Bearer" in session.headers["Authorization"]) + assert "Authorization" in session.headers + assert "Bearer" in session.headers["Authorization"] # type: ignore -@pytest.mark.parametrize("daac", earthaccess.daac.DAACS) -def test_auth_can_fetch_s3_credentials(daac): - activate_environment() +@pytest.mark.parametrize( + "daac", [daac for daac in earthaccess.daac.DAACS if daac["s3-credentials"]] +) +def test_auth_can_fetch_s3_credentials(mock_env, daac): auth = earthaccess.login(strategy="environment") assert auth.authenticated + try: credentials = earthaccess.get_s3_credentials(daac["short-name"]) except requests.RequestException as e: @@ -95,10 +59,10 @@ def test_auth_can_fetch_s3_credentials(daac): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) -def test_get_s3_credentials_lowercase_location(location): - activate_environment() +def test_get_s3_credentials_lowercase_location(mock_env, location): earthaccess.login(strategy="environment") creds = earthaccess.get_s3_credentials(**location) + assert creds assert all( creds[key] @@ -107,9 +71,9 @@ def test_get_s3_credentials_lowercase_location(location): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) -def test_get_s3_filesystem_lowercase_location(location): - activate_environment() +def test_get_s3_filesystem_lowercase_location(mock_env, location): earthaccess.login(strategy="environment") fs = earthaccess.get_s3_filesystem(**location) + assert isinstance(fs, s3fs.S3FileSystem) assert all(fs.storage_options[key] for key in ["key", "secret", "token"]) diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 4e8f9519..8ce144e7 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -1,9 +1,6 @@ -# package imports import logging -import os import random import shutil -import unittest from pathlib import Path import earthaccess @@ -56,20 +53,6 @@ }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -83,11 +66,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -96,7 +77,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): @pytest.mark.parametrize("daac", daac_list) -def test_earthaccess_can_download_cloud_collection_granules(daac): +def test_earthaccess_can_download_cloud_collection_granules(mock_env, tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -109,17 +90,17 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) assert isinstance(granules, list) and len(granules) > 0 - assert isinstance(granules[0], earthaccess.results.DataGranule) - local_path = f"./tests/integration/data/{concept_id}" + assert isinstance(granules[0], earthaccess.DataGranule) granules_to_download, total_size_cmr = get_sample_granules( granules, granules_sample_size, granules_max_size ) @@ -132,14 +113,16 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) - # We are testing this method + path = tmp_path / "tests" / "integration" / "data" / concept_id + path.mkdir(parents=True) + store = Store(Auth().login(strategy="environment")) + try: - store.get(granules_to_download, local_path=local_path) - except Exception: - logger.warning(Exception) + # We are testing this method + store.get(granules_to_download, local_path=path) + except Exception as e: + logger.warning(e) - path = Path(local_path) - assert path.is_dir() # test that we downloaded the mb reported by CMR total_mb_downloaded = round( (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2) @@ -156,11 +139,11 @@ def test_earthaccess_can_download_cloud_collection_granules(daac): ) -def test_multi_file_granule(tmp_path): +def test_multi_file_granule(mock_env, tmp_path): # Ensure granules that contain multiple files are handled correctly granules = earthaccess.search_data(short_name="HLSL30", count=1) assert len(granules) == 1 urls = granules[0].data_links() assert len(urls) > 1 files = earthaccess.download(granules, str(tmp_path)) - assert set([Path(f).name for f in urls]) == set([Path(f).name for f in files]) + assert {Path(f).name for f in urls} == {Path(f).name for f in files} diff --git a/tests/integration/test_cloud_open.py b/tests/integration/test_cloud_open.py index b69eba15..f71e36bb 100644 --- a/tests/integration/test_cloud_open.py +++ b/tests/integration/test_cloud_open.py @@ -1,8 +1,5 @@ -# package imports import logging -import os import random -import unittest import earthaccess import magic @@ -55,20 +52,6 @@ }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -82,11 +65,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -95,14 +76,11 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_open_onprem_collection_granules(daac): +def test_earthaccess_can_open_onprem_collection_granules(mock_env, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -115,17 +93,18 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") @@ -143,10 +122,11 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): f"download size(MB): {total_size_cmr}" ) + store = Store(Auth().login(strategy="environment")) # We are testing this method fileset = store.open(granules_to_open) - assertions.assertTrue(isinstance(fileset, list)) + assert isinstance(fileset, list) # we test that we can read some bytes and get the file type for file in fileset: @@ -163,4 +143,4 @@ def test_multi_file_granule(): urls = granules[0].data_links() assert len(urls) > 1 files = earthaccess.open(granules) - assert set(urls) == set(f.path for f in files) + assert set(urls) == {f.path for f in files} diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py index 2e981cce..e92fffe2 100644 --- a/tests/integration/test_kerchunk.py +++ b/tests/integration/test_kerchunk.py @@ -1,6 +1,4 @@ import logging -import os -import unittest from pathlib import Path import earthaccess @@ -11,29 +9,20 @@ pytest.importorskip("dask") logger = logging.getLogger(__name__) -assertions = unittest.TestCase("__init__") - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") @pytest.fixture(scope="module") def granules(): - granules = earthaccess.search_data( + return earthaccess.search_data( count=2, short_name="SEA_SURFACE_HEIGHT_ALT_GRIDS_L4_2SATS_5DAY_6THDEG_V_JPL2205", cloud_hosted=True, ) - return granules @pytest.mark.parametrize("protocol", ["", "file://"]) def test_consolidate_metadata_outfile(tmp_path, granules, protocol): outfile = f"{protocol}{tmp_path / 'metadata.json'}" - assert not Path(outfile).exists() result = earthaccess.consolidate_metadata( granules, outfile=outfile, @@ -44,7 +33,7 @@ def test_consolidate_metadata_outfile(tmp_path, granules, protocol): assert result == outfile -def test_consolidate_metadata_memory(tmp_path, granules): +def test_consolidate_metadata_memory(granules): result = earthaccess.consolidate_metadata( granules, access="indirect", @@ -61,10 +50,7 @@ def test_consolidate_metadata(tmp_path, granules, output): expected = xr.open_mfdataset(earthaccess.open(granules)) # Open with kerchunk consolidated metadata file - if output == "file": - kwargs = {"outfile": tmp_path / "metadata.json"} - else: - kwargs = {} + kwargs = {"outfile": tmp_path / "metadata.json"} if output == "file" else {} metadata = earthaccess.consolidate_metadata( granules, access="indirect", kerchunk_options={"concat_dims": "Time"}, **kwargs ) diff --git a/tests/integration/test_onprem_download.py b/tests/integration/test_onprem_download.py index 242a3c26..5c636e30 100644 --- a/tests/integration/test_onprem_download.py +++ b/tests/integration/test_onprem_download.py @@ -1,10 +1,6 @@ -# package imports import logging -import os import random import shutil -import unittest -from pathlib import Path import earthaccess import pytest @@ -38,30 +34,22 @@ "granules_sample_size": 2, "granules_max_size_mb": 100, }, - { - "short_name": "ORNLDAAC", - "collections_count": 100, - "collections_sample_size": 3, - "granules_count": 100, - "granules_sample_size": 2, - "granules_max_size_mb": 50, - }, + # + # ORNLDAAC no longer has any on-prem collections. This returns 0 collections: + # https://cmr.earthdata.nasa.gov/search/collections?data_center=ORNL_DAAC&cloud_hosted=false + # The following is commented out because the test in this file will now always fail + # because there are no longer any on-prem collections. + # + # { + # "short_name": "ORNLDAAC", + # "collections_count": 100, + # "collections_sample_size": 3, + # "granules_count": 100, + # "granules_sample_size": 2, + # "granules_max_size_mb": 50, + # }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -75,11 +63,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -88,14 +74,11 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_download_onprem_collection_granules(daac): +def test_earthaccess_can_download_onprem_collection_granules(mock_env, tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -108,22 +91,22 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") continue - local_path = f"./tests/integration/data/{concept_id}" granules_to_download, total_size_cmr = get_sample_granules( granules, granules_sample_size, granules_max_size ) @@ -136,14 +119,15 @@ def test_earthaccess_can_download_onprem_collection_granules(daac): f"Testing {concept_id}, granules in collection: {total_granules}, " f"download size(MB): {total_size_cmr}" ) + path = tmp_path / "tests" / "integration" / "data" / concept_id + path.mkdir(parents=True) + store = Store(Auth().login(strategy="environment")) # We are testing this method - downloaded_results = store.get(granules_to_download, local_path=local_path) + downloaded_results = store.get(granules_to_download, local_path=path) - assertions.assertTrue(isinstance(downloaded_results, list)) - assertions.assertTrue(len(downloaded_results) == granules_sample_size) + assert isinstance(downloaded_results, list) + assert len(downloaded_results) >= granules_sample_size - path = Path(local_path) - assertions.assertTrue(path.is_dir()) # test that we downloaded the mb reported by CMR total_mb_downloaded = round( (sum(file.stat().st_size for file in path.rglob("*")) / 1024**2), 2 diff --git a/tests/integration/test_onprem_open.py b/tests/integration/test_onprem_open.py index 2a455c44..1135b7ba 100644 --- a/tests/integration/test_onprem_open.py +++ b/tests/integration/test_onprem_open.py @@ -1,8 +1,5 @@ -# package imports import logging -import os import random -import unittest import earthaccess import magic @@ -37,30 +34,22 @@ "granules_sample_size": 2, "granules_max_size_mb": 130, }, - { - "short_name": "ORNLDAAC", - "collections_count": 100, - "collections_sample_size": 2, - "granules_count": 100, - "granules_sample_size": 2, - "granules_max_size_mb": 50, - }, + # + # ORNLDAAC no longer has any on-prem collections. This returns 0 collections: + # https://cmr.earthdata.nasa.gov/search/collections?data_center=ORNL_DAAC&cloud_hosted=false + # The following is commented out because the test in this file will now always fail + # because there are no longer any on-prem collections. + # + # { + # "short_name": "ORNLDAAC", + # "collections_count": 100, + # "collections_sample_size": 2, + # "granules_count": 100, + # "granules_sample_size": 2, + # "granules_max_size_mb": 50, + # }, ] -assertions = unittest.TestCase("__init__") - -# we need to use a valid EDL credential - -assertions.assertTrue("EARTHDATA_USERNAME" in os.environ) -assertions.assertTrue("EARTHDATA_PASSWORD" in os.environ) - -auth = Auth().login(strategy="environment") -assertions.assertTrue(auth.authenticated) -logger.info(f"Current username: {os.environ['EARTHDATA_USERNAME']}") -logger.info(f"earthaccess version: {earthaccess.__version__}") - -store = Store(auth) - def get_sample_granules(granules, sample_size, max_granule_size): """Returns a list with sample granules and their size in MB if @@ -74,11 +63,9 @@ def get_sample_granules(granules, sample_size, max_granule_size): while tries <= max_tries: g = random.sample(granules, 1)[0] if g.size() > max_granule_size: - # print(f"G: {g['meta']['concept-id']} exceded max size: {g.size()}") tries += 1 continue else: - # print(f"Adding : {g['meta']['concept-id']} size: {g.size()}") files_to_download.append(g) total_size += g.size() if len(files_to_download) >= sample_size: @@ -87,14 +74,11 @@ def get_sample_granules(granules, sample_size, max_granule_size): def supported_collection(data_links): - for url in data_links: - if "podaac-tools.jpl.nasa.gov/drive" in url: - return False - return True + return all("podaac-tools.jpl.nasa.gov/drive" not in url for url in data_links) @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_open_onprem_collection_granules(daac): +def test_earthaccess_can_open_onprem_collection_granules(mock_env, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -107,17 +91,18 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): hits = collection_query.hits() logger.info(f"Cloud hosted collections for {daac_shortname}: {hits}") collections = collection_query.get(collections_count) - assertions.assertGreater(len(collections), collections_sample_size) + assert len(collections) > collections_sample_size # We sample n cloud hosted collections from the results random_collections = random.sample(collections, collections_sample_size) logger.info(f"Sampled {len(random_collections)} collections") + for collection in random_collections: concept_id = collection.concept_id() granule_query = DataGranules().concept_id(concept_id) total_granules = granule_query.hits() granules = granule_query.get(granules_count) - assertions.assertTrue(len(granules) > 0, "Could not fetch granules") - assertions.assertTrue(isinstance(granules[0], earthaccess.results.DataGranule)) + assert len(granules) > 0, "Could not fetch granules" + assert isinstance(granules[0], earthaccess.DataGranule) data_links = granules[0].data_links() if not supported_collection(data_links): logger.warning(f"PODAAC DRIVE is not supported at the moment: {data_links}") @@ -135,10 +120,11 @@ def test_earthaccess_can_open_onprem_collection_granules(daac): f"download size(MB): {total_size_cmr}" ) + store = Store(Auth().login(strategy="environment")) # We are testing this method fileset = store.open(granules_to_open) - assertions.assertTrue(isinstance(fileset, list)) + assert isinstance(fileset, list) # we test that we can read some bytes and get the file type for file in fileset: From e40e07e4b990797e02ab31c15cbf96c5df9b17ae Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sat, 21 Sep 2024 20:33:19 -0400 Subject: [PATCH 02/13] Suppress pip warning about running as root user --- .github/actions/install-pkg/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/install-pkg/action.yml b/.github/actions/install-pkg/action.yml index 5e8bd9e4..4b4e2d92 100644 --- a/.github/actions/install-pkg/action.yml +++ b/.github/actions/install-pkg/action.yml @@ -21,4 +21,4 @@ runs: - name: Install package and test dependencies shell: bash - run: pip install .[test] + run: pip install --root-user-action ignore ".[test]" From 633f7ea446fddb1231bcf9751f9387de35478178 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sat, 21 Sep 2024 20:33:58 -0400 Subject: [PATCH 03/13] Check permission only for pull_request_target event --- .github/workflows/integration-test.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index fba60ccc..89cf8460 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -47,6 +47,7 @@ jobs: steps: - name: Fetch user permission + if: github.event_name == 'pull_request_target' id: permission uses: actions-cool/check-user-permission@v2 with: @@ -54,7 +55,11 @@ jobs: username: ${{ github.triggering_actor }} - name: Check user permission - if: ${{ steps.permission.outputs.require-result == 'false' }} + # The name of the output require-result is a bit confusing, but when its value + # is 'false', it means that the triggering actor does NOT have the required + # permission. + if: github.event_name == 'pull_request_target' && steps.permission.outputs.require-result == 'false' + # If the triggering actor does not have write permission (i.e., this is a # PR from a fork), then we exit, otherwise most of the integration tests will # fail because they require access to secrets. In this case, a maintainer From 4b4edc53df99b0da08cf2ca15a4d3ce8f430191f Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sun, 22 Sep 2024 17:43:31 -0400 Subject: [PATCH 04/13] Remove unnecessary dup env vars --- .github/workflows/integration-test.yml | 2 -- tests/integration/conftest.py | 16 +++++----------- tests/integration/test_api.py | 12 ++++++------ tests/integration/test_auth.py | 12 ++++++------ tests/integration/test_cloud_download.py | 4 ++-- tests/integration/test_cloud_open.py | 2 +- tests/integration/test_onprem_download.py | 2 +- tests/integration/test_onprem_open.py | 2 +- 8 files changed, 22 insertions(+), 30 deletions(-) diff --git a/.github/workflows/integration-test.yml b/.github/workflows/integration-test.yml index 89cf8460..b868549d 100644 --- a/.github/workflows/integration-test.yml +++ b/.github/workflows/integration-test.yml @@ -83,8 +83,6 @@ jobs: env: EARTHDATA_USERNAME: ${{ secrets.EDL_USERNAME }} EARTHDATA_PASSWORD: ${{ secrets.EDL_PASSWORD }} - EARTHACCESS_TEST_USERNAME: ${{ secrets.EDL_USERNAME }} - EARTHACCESS_TEST_PASSWORD: ${{ secrets.EDL_PASSWORD }} run: ./scripts/integration-test.sh - name: Upload coverage report diff --git a/tests/integration/conftest.py b/tests/integration/conftest.py index db71b54f..8c206885 100644 --- a/tests/integration/conftest.py +++ b/tests/integration/conftest.py @@ -35,14 +35,6 @@ def pytest_sessionfinish(session, exitstatus): session.exitstatus = 99 -@pytest.fixture -def mock_env(monkeypatch): - earthaccess.__auth__ = earthaccess.Auth() - # the original comes from github secrets - monkeypatch.setenv("EARTHDATA_USERNAME", os.getenv("EARTHACCESS_TEST_USERNAME", "")) - monkeypatch.setenv("EARTHDATA_PASSWORD", os.getenv("EARTHACCESS_TEST_PASSWORD", "")) - - @pytest.fixture def mock_missing_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): netrc_path = tmp_path / ".netrc" @@ -51,12 +43,14 @@ def mock_missing_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): monkeypatch.delenv("EARTHDATA_PASSWORD") # Currently, due to there being only a single, global, module-level auth # value, tests using different auth strategies interfere with each other, - # so here we are deleting the auth attribute so that it doesn't interfere. - monkeypatch.delattr(earthaccess, "__auth__", raising=False) + # so here we are monkeypatching a new, unauthenticated Auth object. + auth = earthaccess.Auth() + monkeypatch.setattr(earthaccess, "_auth", auth) + monkeypatch.setattr(earthaccess, "__auth__", auth) @pytest.fixture -def mock_netrc(mock_env, tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): +def mock_netrc(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): netrc = tmp_path / ".netrc" monkeypatch.setenv("NETRC", str(netrc)) diff --git a/tests/integration/test_api.py b/tests/integration/test_api.py index 5e90cf46..f0fdd219 100644 --- a/tests/integration/test_api.py +++ b/tests/integration/test_api.py @@ -32,28 +32,28 @@ ] -def test_auth_returns_valid_auth_class(mock_env): +def test_auth_returns_valid_auth_class(): auth = earthaccess.login(strategy="environment") assert isinstance(auth, earthaccess.Auth) assert isinstance(earthaccess.__auth__, earthaccess.Auth) assert earthaccess.__auth__.authenticated -def test_dataset_search_returns_none_with_no_parameters(mock_env): +def test_dataset_search_returns_none_with_no_parameters(): results = earthaccess.search_datasets() assert isinstance(results, list) assert len(results) == 0 @pytest.mark.parametrize("kwargs", dataset_valid_params) -def test_dataset_search_returns_valid_results(mock_env, kwargs): +def test_dataset_search_returns_valid_results(kwargs): results = earthaccess.search_datasets(**kwargs) assert isinstance(results, list) assert isinstance(results[0], dict) @pytest.mark.parametrize("kwargs", granules_valid_params) -def test_granules_search_returns_valid_results(mock_env, kwargs): +def test_granules_search_returns_valid_results(kwargs): results = earthaccess.search_data(count=10, **kwargs) assert isinstance(results, list) assert len(results) <= 10 @@ -61,7 +61,7 @@ def test_granules_search_returns_valid_results(mock_env, kwargs): @pytest.mark.parametrize("selection", [0, slice(None)]) @pytest.mark.parametrize("use_url", [True, False]) -def test_download(mock_env, tmp_path, selection, use_url): +def test_download(tmp_path, selection, use_url): results = earthaccess.search_data( count=2, short_name="ATL08", @@ -77,7 +77,7 @@ def test_download(mock_env, tmp_path, selection, use_url): assert all(Path(f).exists() for f in files) -def test_auth_environ(mock_env): +def test_auth_environ(): earthaccess.login(strategy="environment") environ = earthaccess.auth_environ() assert environ == { diff --git a/tests/integration/test_auth.py b/tests/integration/test_auth.py index e333e48c..1a83833d 100644 --- a/tests/integration/test_auth.py +++ b/tests/integration/test_auth.py @@ -9,7 +9,7 @@ logger = logging.getLogger(__name__) -def test_auth_can_read_earthdata_env_variables(mock_env): +def test_auth_can_read_earthdata_env_variables(): auth = earthaccess.login(strategy="environment") logger.info(f"Current username: {auth.username}") logger.info(f"earthaccess version: {earthaccess.__version__}") @@ -29,14 +29,14 @@ def test_auth_throws_exception_if_netrc_is_not_present(mock_missing_netrc): earthaccess.login(strategy="netrc") -def test_auth_populates_attrs(mock_env): +def test_auth_populates_attrs(): auth = earthaccess.login(strategy="environment") assert isinstance(auth, earthaccess.Auth) assert isinstance(earthaccess.__auth__, earthaccess.Auth) assert earthaccess.__auth__.authenticated -def test_auth_can_create_authenticated_requests_sessions(mock_env): +def test_auth_can_create_authenticated_requests_sessions(): session = earthaccess.get_requests_https_session() assert "Authorization" in session.headers assert "Bearer" in session.headers["Authorization"] # type: ignore @@ -45,7 +45,7 @@ def test_auth_can_create_authenticated_requests_sessions(mock_env): @pytest.mark.parametrize( "daac", [daac for daac in earthaccess.daac.DAACS if daac["s3-credentials"]] ) -def test_auth_can_fetch_s3_credentials(mock_env, daac): +def test_auth_can_fetch_s3_credentials(daac): auth = earthaccess.login(strategy="environment") assert auth.authenticated @@ -59,7 +59,7 @@ def test_auth_can_fetch_s3_credentials(mock_env, daac): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) -def test_get_s3_credentials_lowercase_location(mock_env, location): +def test_get_s3_credentials_lowercase_location(location): earthaccess.login(strategy="environment") creds = earthaccess.get_s3_credentials(**location) @@ -71,7 +71,7 @@ def test_get_s3_credentials_lowercase_location(mock_env, location): @pytest.mark.parametrize("location", ({"daac": "podaac"}, {"provider": "pocloud"})) -def test_get_s3_filesystem_lowercase_location(mock_env, location): +def test_get_s3_filesystem_lowercase_location(location): earthaccess.login(strategy="environment") fs = earthaccess.get_s3_filesystem(**location) diff --git a/tests/integration/test_cloud_download.py b/tests/integration/test_cloud_download.py index 8ce144e7..11fab5a1 100644 --- a/tests/integration/test_cloud_download.py +++ b/tests/integration/test_cloud_download.py @@ -77,7 +77,7 @@ def get_sample_granules(granules, sample_size, max_granule_size): @pytest.mark.parametrize("daac", daac_list) -def test_earthaccess_can_download_cloud_collection_granules(mock_env, tmp_path, daac): +def test_earthaccess_can_download_cloud_collection_granules(tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] @@ -139,7 +139,7 @@ def test_earthaccess_can_download_cloud_collection_granules(mock_env, tmp_path, ) -def test_multi_file_granule(mock_env, tmp_path): +def test_multi_file_granule(tmp_path): # Ensure granules that contain multiple files are handled correctly granules = earthaccess.search_data(short_name="HLSL30", count=1) assert len(granules) == 1 diff --git a/tests/integration/test_cloud_open.py b/tests/integration/test_cloud_open.py index f71e36bb..a0ca5501 100644 --- a/tests/integration/test_cloud_open.py +++ b/tests/integration/test_cloud_open.py @@ -80,7 +80,7 @@ def supported_collection(data_links): @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_open_onprem_collection_granules(mock_env, daac): +def test_earthaccess_can_open_onprem_collection_granules(daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] diff --git a/tests/integration/test_onprem_download.py b/tests/integration/test_onprem_download.py index 5c636e30..5c812e38 100644 --- a/tests/integration/test_onprem_download.py +++ b/tests/integration/test_onprem_download.py @@ -78,7 +78,7 @@ def supported_collection(data_links): @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_download_onprem_collection_granules(mock_env, tmp_path, daac): +def test_earthaccess_can_download_onprem_collection_granules(tmp_path, daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] diff --git a/tests/integration/test_onprem_open.py b/tests/integration/test_onprem_open.py index 1135b7ba..08b6da22 100644 --- a/tests/integration/test_onprem_open.py +++ b/tests/integration/test_onprem_open.py @@ -78,7 +78,7 @@ def supported_collection(data_links): @pytest.mark.parametrize("daac", daacs_list) -def test_earthaccess_can_open_onprem_collection_granules(mock_env, daac): +def test_earthaccess_can_open_onprem_collection_granules(daac): """Tests that we can download cloud collections using HTTPS links.""" daac_shortname = daac["short_name"] collections_count = daac["collections_count"] From 10934dcbd1b464043c88ec3eb22578ddf8f1dc14 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sun, 22 Sep 2024 17:45:01 -0400 Subject: [PATCH 05/13] Move project urls to correct section of pyproject.toml --- pyproject.toml | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index f04fcb76..ff02e39e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,8 +5,6 @@ build-backend = "hatchling.build" [project] name = "earthaccess" version = "0.10.0" -repository = "https://github.com/nsidc/earthaccess" -documentation = "https://earthaccess.readthedocs.io" description = "Client library for NASA Earthdata APIs" authors = [ {name = "earthaccess contributors"} @@ -55,6 +53,12 @@ dependencies = [ "numpy >=1.26.0; python_version >= '3.12'", ] +[project.urls] +Repository = "https://github.com/nsidc/earthaccess" +Documentation = "https://earthaccess.readthedocs.io/en/latest/" +"Bug Tracker" = "https://github.com/nsidc/earthaccess/issues" +Changelog = "https://github.com/nsidc/earthaccess/blob/main/CHANGELOG.md" + [project.optional-dependencies] kerchunk = [ "kerchunk", From 87f6a438cbbe5d40d8508cc1d1d58bb11063ec42 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sun, 22 Sep 2024 17:45:29 -0400 Subject: [PATCH 06/13] Correct Luis's email address --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index ff02e39e..e89bcaca 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,7 +10,7 @@ authors = [ {name = "earthaccess contributors"} ] maintainers = [ - {name = "Luis Lopez", email = "betolin@gmail.com"}, + {name = "Luis Lopez", email = "betolink@gmail.com"}, {name = "Joseph H. Kennedy", email = "jhkennedy@alaska.edu"}, {name = "James Bourbeau", email = "james@coiled.io"}, {name = "Matt Fisher", email = "mfisher87@gmail.com"}, From 9fef6cb5a99451baaea3971b5f44d10b7f96c83a Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sun, 22 Sep 2024 18:00:37 -0400 Subject: [PATCH 07/13] Move and complete kerchunk dependencies --- pyproject.toml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e89bcaca..8c69c8af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,10 +47,6 @@ dependencies = [ "multimethod >=1.8", "importlib-resources >=6.3.2", "typing_extensions >=4.10.0", - # kerchunk requires numpy, but numpy >=1.26.0 is required for Python 3.12 - # support - "numpy >=1.24.0; python_version < '3.12'", - "numpy >=1.26.0; python_version >= '3.12'", ] [project.urls] @@ -63,6 +59,12 @@ Changelog = "https://github.com/nsidc/earthaccess/blob/main/CHANGELOG.md" kerchunk = [ "kerchunk", "dask", + "h5py", + "h5netcdf", + "xarray", + # kerchunk requires numpy, but numpy >=1.26.0 is required for Python 3.12 + "numpy >=1.24.0; python_version < '3.12'", + "numpy >=1.26.0; python_version >= '3.12'", ] dev = [ "bump-my-version >=0.10.0", From f805cc1aee3aefb0a332ce1fee65d308ab84f826 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sun, 22 Sep 2024 18:00:59 -0400 Subject: [PATCH 08/13] Add missing type annotation --- earthaccess/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/earthaccess/__init__.py b/earthaccess/__init__.py index c82c23d3..73f7ed2d 100644 --- a/earthaccess/__init__.py +++ b/earthaccess/__init__.py @@ -1,6 +1,7 @@ import logging import threading from importlib.metadata import version +from typing import Optional from .api import ( auth_environ, @@ -64,7 +65,7 @@ __version__ = version("earthaccess") _auth = Auth() -_store = None +_store: Optional[Store] = None _lock = threading.Lock() From 7d50892d5fb0eef3744a641e80621542e35f4730 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sun, 22 Sep 2024 19:06:45 -0400 Subject: [PATCH 09/13] Include kerchunk extra within test extra --- pyproject.toml | 1 + tests/integration/test_kerchunk.py | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 8c69c8af..043b84cc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -83,6 +83,7 @@ test = [ "types-requests >=0.1", "types-setuptools >=0.1", "vcrpy >=6.0.1", + "earthaccess[kerchunk]", ] docs = [ "jupyterlab >=3", diff --git a/tests/integration/test_kerchunk.py b/tests/integration/test_kerchunk.py index e92fffe2..8bde9ed4 100644 --- a/tests/integration/test_kerchunk.py +++ b/tests/integration/test_kerchunk.py @@ -5,9 +5,6 @@ import pytest from fsspec.core import strip_protocol -kerchunk = pytest.importorskip("kerchunk") -pytest.importorskip("dask") - logger = logging.getLogger(__name__) @@ -45,7 +42,12 @@ def test_consolidate_metadata_memory(granules): @pytest.mark.parametrize("output", ["file", "memory"]) def test_consolidate_metadata(tmp_path, granules, output): - xr = pytest.importorskip("xarray") + # We import here because xarray is installed only when the kerchunk extra is + # installed, and when type checking is run, kerchunk (and thus xarray) is + # not installed, so mypy barfs when this is a top-level import. Further, + # mypy complains even when imported here, but here we can mark it to ignore. + import xarray as xr # type: ignore + # Open directly with `earthaccess.open` expected = xr.open_mfdataset(earthaccess.open(granules)) From 9fea54303ecddad853269540cf9951ca00b17a1a Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sun, 22 Sep 2024 19:07:24 -0400 Subject: [PATCH 10/13] Add nox integration-tests session --- docs/contributing/development.md | 23 ++++++++++++++++------- noxfile.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/docs/contributing/development.md b/docs/contributing/development.md index 1c4baa1a..5371a247 100644 --- a/docs/contributing/development.md +++ b/docs/contributing/development.md @@ -17,20 +17,29 @@ If you don't have pipx (pip for applications), then you can install with pip is reasonable). If you use macOS, then pipx and nox are both in brew, use `brew install pipx nox`. -To use, run `nox`. This will typecheck and test using every installed version of -Python on your system, skipping ones that are not installed. You can also run -specific jobs: +To use, run `nox` without any arguments. This will run type checks and unit +tests using the installed version of Python on your system. + +You can also run individual tasks (_sessions_ in `nox` parlance, hence the `-s` +option below), like so: ```console -$ nox -s typecheck # Typecheck only -$ nox -s tests # Python tests -$ nox -s build_docs -- --serve # Build and serve the docs -$ nox -s build_pkg # Make an SDist and wheel +nox -s typecheck # Run typechecks +nox -s tests # Run unit tests +nox -s integration-tests # Run integration tests (see note below) +nox -s build_docs -- --serve # Build and serve the docs +nox -s build_pkg # Build an SDist and wheel ``` Nox handles everything for you, including setting up a temporary virtual environment for each run. +**NOTE:** In order to run integration tests locally, you must set the +environment variables `EARTHDATA_USERNAME` and `EARTHDATA_PASSWORD` to your +username and password, respectively, of your +[NASA Earthdata](https://urs.earthdata.nasa.gov/) account (registration is +free). + ## Manual development environment setup While `nox` is the fastest way to get started, you will likely need a full diff --git a/noxfile.py b/noxfile.py index 257b0533..a62314f6 100644 --- a/noxfile.py +++ b/noxfile.py @@ -1,5 +1,6 @@ from __future__ import annotations +import os import shutil from pathlib import Path @@ -26,6 +27,21 @@ def tests(session: nox.Session) -> None: session.run("pytest", "tests/unit", *session.posargs) +@nox.session(name="integration-tests") +def integration_tests(session: nox.Session) -> None: + """Run the integration tests.""" + session.install("--editable", ".[test]") + session.run( + "scripts/integration-test.sh", + *session.posargs, + env=dict( + EARTHDATA_USERNAME=os.environ["EARTHDATA_USERNAME"], + EARTHDATA_PASSWORD=os.environ["EARTHDATA_PASSWORD"], + ), + external=True, + ) + + @nox.session def build_pkg(session: nox.Session) -> None: """Build a source distribution and binary distribution (wheel).""" From 510a330c7b88be71203fe06b88e0e8100341e23f Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Sun, 22 Sep 2024 20:29:48 -0400 Subject: [PATCH 11/13] Mindeps requires python 3.9, h5py 3.0+ --- .github/workflows/test-mindeps.yml | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-mindeps.yml b/.github/workflows/test-mindeps.yml index 3be5bb01..050923f0 100644 --- a/.github/workflows/test-mindeps.yml +++ b/.github/workflows/test-mindeps.yml @@ -30,7 +30,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v5 with: - python-version-file: pyproject.toml + python-version: 3.9 - name: Install minimum-compatible dependencies run: uv sync --resolution lowest-direct --extra test diff --git a/pyproject.toml b/pyproject.toml index 043b84cc..673f2818 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,7 +59,7 @@ Changelog = "https://github.com/nsidc/earthaccess/blob/main/CHANGELOG.md" kerchunk = [ "kerchunk", "dask", - "h5py", + "h5py >=3.0", "h5netcdf", "xarray", # kerchunk requires numpy, but numpy >=1.26.0 is required for Python 3.12 From 04c6dcc839f68efa5f532be67fcdeb82a5ca5fe6 Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Mon, 23 Sep 2024 06:19:33 -0400 Subject: [PATCH 12/13] Remove noise from CHANGELOG --- CHANGELOG.md | 3 --- 1 file changed, 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 638073e0..81ead0c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,9 +13,6 @@ instead ([#766](https://github.com/nsidc/earthaccess/issues/766)) ([**@Sherwin-14**](https://github.com/Sherwin-14), [**@chuckwondo**](https://github.com/chuckwondo)) -- Use built-in `assert` statement in integration tests - ([#743](https://github.com/nsidc/earthaccess/issues/743)) - ([**@chuckwondo**](https://github.com/chuckwondo)) ### Added From 6f645d34f88764fe85997b141a0388fa65a8faad Mon Sep 17 00:00:00 2001 From: Chuck Daniels Date: Mon, 23 Sep 2024 12:14:26 -0400 Subject: [PATCH 13/13] Update uv.lock --- uv.lock | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/uv.lock b/uv.lock index e4c8a627..0697dfb1 100644 --- a/uv.lock +++ b/uv.lock @@ -826,7 +826,6 @@ dependencies = [ { name = "fsspec" }, { name = "importlib-resources" }, { name = "multimethod" }, - { name = "numpy" }, { name = "pqdm" }, { name = "python-cmr" }, { name = "requests" }, @@ -864,10 +863,19 @@ docs = [ ] kerchunk = [ { name = "dask" }, + { name = "h5netcdf" }, + { name = "h5py" }, { name = "kerchunk" }, + { name = "numpy" }, + { name = "xarray" }, ] test = [ + { name = "dask" }, + { name = "h5netcdf" }, + { name = "h5py" }, + { name = "kerchunk" }, { name = "mypy" }, + { name = "numpy" }, { name = "pytest" }, { name = "pytest-cov" }, { name = "pytest-watch" }, @@ -877,14 +885,18 @@ test = [ { name = "types-requests", version = "2.32.0.20240907", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10' and python_full_version < '3.12' and platform_python_implementation != 'PyPy'" }, { name = "types-setuptools" }, { name = "vcrpy" }, + { name = "xarray" }, ] [package.metadata] requires-dist = [ { name = "bump-my-version", marker = "extra == 'dev'", specifier = ">=0.10.0" }, { name = "dask", marker = "extra == 'kerchunk'" }, + { name = "earthaccess", extras = ["kerchunk"], marker = "extra == 'test'" }, { name = "fsspec", specifier = ">=2022.11" }, { name = "h5netcdf", marker = "extra == 'docs'", specifier = ">=0.11" }, + { name = "h5netcdf", marker = "extra == 'kerchunk'" }, + { name = "h5py", marker = "extra == 'kerchunk'", specifier = ">=3.0" }, { name = "importlib-resources", specifier = ">=6.3.2" }, { name = "ipywidgets", marker = "extra == 'docs'", specifier = ">=7.7.0" }, { name = "jupyterlab", marker = "extra == 'docs'", specifier = ">=3" }, @@ -901,8 +913,8 @@ requires-dist = [ { name = "multimethod", specifier = ">=1.8" }, { name = "mypy", marker = "extra == 'test'", specifier = ">=1.11.2" }, { name = "nox", marker = "extra == 'dev'" }, - { name = "numpy", marker = "python_full_version < '3.12'", specifier = ">=1.24.0" }, - { name = "numpy", marker = "python_full_version >= '3.12'", specifier = ">=1.26.0" }, + { name = "numpy", marker = "python_full_version >= '3.12' and extra == 'kerchunk'", specifier = ">=1.26.0" }, + { name = "numpy", marker = "python_full_version < '3.12' and extra == 'kerchunk'", specifier = ">=1.24.0" }, { name = "pqdm", specifier = ">=0.1" }, { name = "pre-commit", marker = "extra == 'dev'", specifier = ">=2.4" }, { name = "pygments", marker = "extra == 'docs'", specifier = ">=2.11.1" }, @@ -926,6 +938,7 @@ requires-dist = [ { name = "vcrpy", marker = "extra == 'test'", specifier = ">=6.0.1" }, { name = "widgetsnbextension", marker = "extra == 'docs'", specifier = ">=3.6.0" }, { name = "xarray", marker = "extra == 'docs'", specifier = ">=2023.1" }, + { name = "xarray", marker = "extra == 'kerchunk'" }, ] [[package]]