From 557456e66976ea7b9d2811e1d201643dcbea2e9d Mon Sep 17 00:00:00 2001 From: George Boot <884482+georgeboot@users.noreply.github.com> Date: Sat, 13 May 2023 20:50:10 +0200 Subject: [PATCH 1/6] Allow empty content type for s3 objects (#332) * Allow empty content type for s3 objects Fixes #331 * Update s3client.py --- cloudpathlib/s3/s3client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cloudpathlib/s3/s3client.py b/cloudpathlib/s3/s3client.py index 474f4a88..1e698608 100644 --- a/cloudpathlib/s3/s3client.py +++ b/cloudpathlib/s3/s3client.py @@ -142,7 +142,7 @@ def _get_metadata(self, cloud_path: S3Path) -> Dict[str, Any]: "last_modified": data["LastModified"], "size": data["ContentLength"], "etag": data["ETag"], - "content_type": data["ContentType"], + "content_type": data.get("ContentType", None), "extra": data["Metadata"], } From 6e0dafda71ff7f887634966efb840c60e4d81c1e Mon Sep 17 00:00:00 2001 From: Jay Qi Date: Sat, 13 May 2023 23:03:52 -0400 Subject: [PATCH 2/6] Update changelog --- HISTORY.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/HISTORY.md b/HISTORY.md index 8c640f20..4ad80de1 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,9 +1,10 @@ # cloudpathlib Changelog -## v0.14.0 (Unreleased) +## v0.14.0 (2023-05-13) - Changed to pyproject.toml-based build. - Changed type hints from custom type variable `DerivedCloudPath` to [`typing.Self`](https://docs.python.org/3/library/typing.html#typing.Self) ([PEP 673](https://docs.python.org/3/library/typing.html#typing.Self)). This adds a dependency on the [typing-extensions](https://pypi.org/project/typing-extensions/) backport package from Python versions lower than 3.11. +- Fixed a runtime key error when an S3 object does not have the `Content-Type` metadata set. ([Issue #331](https://github.com/drivendataorg/cloudpathlib/issues/331), [PR #332](https://github.com/drivendataorg/cloudpathlib/pull/332)) ## v0.13.0 (2023-02-15) From 4211d495ce50d9f19189809cf00edcb529aab4e2 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 5 Jun 2023 11:31:36 -0700 Subject: [PATCH 3/6] Fix CloudPathMeta.__call__ return type (#330) (#336) * Fix CloudPathMeta.__call__ return type Since pyright==1.1.307, instances of the subclasses of CloudPath stopped being recognized as instances of their own class; a possible fix is to type hint CloudPathMeta.__call__, but it creates a host of mypy errors * Refactor CloudPathMeta.__call__ Fix mypy errors by making use of object.__new__ and type assertions * Add type ignore on CloudPathMeta.__call__ return Incompatible return value type (got "CloudPath", expected "CloudPathT"); not a clear fix here without a more general refactor * Refactor CloudPathMeta.__call__ signature to use overload Also make issubclass(cls, CloudPath) explicit by raising TypeError * Run black * Change back __init__ to use Self type For some reason this leads to mypy type-var errors in the meta __call__ (probably erroneous) Co-authored-by: Matthew Hoffman --- cloudpathlib/cloudpath.py | 45 ++++++++++++++++++++++++++------------- 1 file changed, 30 insertions(+), 15 deletions(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index c37979ad..a744ae50 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -98,10 +98,12 @@ def path_class(self) -> Type["CloudPath"]: implementation_registry: Dict[str, CloudImplementation] = defaultdict(CloudImplementation) -def register_path_class(key: str) -> Callable: - T = TypeVar("T", bound=Type[CloudPath]) +T = TypeVar("T") +CloudPathT = TypeVar("CloudPathT", bound="CloudPath") - def decorator(cls: Type[T]) -> Type[T]: + +def register_path_class(key: str) -> Callable[[Type[CloudPathT]], Type[CloudPathT]]: + def decorator(cls: Type[CloudPathT]) -> Type[CloudPathT]: if not issubclass(cls, CloudPath): raise TypeError("Only subclasses of CloudPath can be registered.") implementation_registry[key]._path_class = cls @@ -112,34 +114,47 @@ def decorator(cls: Type[T]) -> Type[T]: class CloudPathMeta(abc.ABCMeta): - def __call__(cls, cloud_path, *args, **kwargs): + @overload + def __call__(cls: Type[T], cloud_path: CloudPathT, *args: Any, **kwargs: Any) -> CloudPathT: + ... + + @overload + def __call__( + cls: Type[T], cloud_path: Union[str, "CloudPath"], *args: Any, **kwargs: Any + ) -> T: + ... + + def __call__( + cls: Type[T], cloud_path: Union[str, CloudPathT], *args: Any, **kwargs: Any + ) -> Union[T, "CloudPath", CloudPathT]: # cls is a class that is the instance of this metaclass, e.g., CloudPath + if not issubclass(cls, CloudPath): + raise TypeError( + f"Only subclasses of {CloudPath.__name__} can be instantiated from its meta class." + ) # Dispatch to subclass if base CloudPath - if cls == CloudPath: + if cls is CloudPath: for implementation in implementation_registry.values(): path_class = implementation._path_class if path_class is not None and path_class.is_valid_cloudpath( cloud_path, raise_on_error=False ): # Instantiate path_class instance - new_obj = path_class.__new__(path_class, cloud_path, *args, **kwargs) - if isinstance(new_obj, path_class): - path_class.__init__(new_obj, cloud_path, *args, **kwargs) + new_obj = object.__new__(path_class) + path_class.__init__(new_obj, cloud_path, *args, **kwargs) # type: ignore[type-var] return new_obj - valid = [ + valid_prefixes = [ impl._path_class.cloud_prefix for impl in implementation_registry.values() if impl._path_class is not None ] raise InvalidPrefixError( - f"Path {cloud_path} does not begin with a known prefix {valid}." + f"Path {cloud_path} does not begin with a known prefix {valid_prefixes}." ) - # Otherwise instantiate as normal - new_obj = cls.__new__(cls, cloud_path, *args, **kwargs) - if isinstance(new_obj, cls): - cls.__init__(new_obj, cloud_path, *args, **kwargs) + new_obj = object.__new__(cls) + cls.__init__(new_obj, cloud_path, *args, **kwargs) # type: ignore[type-var] return new_obj def __init__(cls, name: str, bases: Tuple[type, ...], dic: Dict[str, Any]) -> None: @@ -449,7 +464,7 @@ def open( newline: Optional[str] = None, force_overwrite_from_cloud: bool = False, # extra kwarg not in pathlib force_overwrite_to_cloud: bool = False, # extra kwarg not in pathlib - ) -> IO: + ) -> IO[Any]: # if trying to call open on a directory that exists if self.exists() and not self.is_file(): raise CloudPathIsADirectoryError( From cb719e4d1c1791c43600cdca651d072fd7028824 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Mon, 12 Jun 2023 11:31:09 -0700 Subject: [PATCH 4/6] Make CloudPath.is_valid_cloudpath a TypeGuard (#337) (#338) This allows us to infer that CloudPaths are the same type of object as the class is_valid_cloudpath is called on Co-authored-by: Matthew Hoffman --- cloudpathlib/cloudpath.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index a744ae50..c50989db 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -35,6 +35,10 @@ from urllib.parse import urlparse from warnings import warn +if sys.version_info >= (3, 10): + from typing import TypeGuard +else: + from typing_extensions import TypeGuard if sys.version_info >= (3, 11): from typing import Self else: @@ -258,8 +262,20 @@ def _no_prefix(self) -> str: def _no_prefix_no_drive(self) -> str: return self._str[len(self.cloud_prefix) + len(self.drive) :] + @overload + @classmethod + def is_valid_cloudpath(cls, path: "CloudPath", raise_on_error: bool = ...) -> TypeGuard[Self]: + ... + + @overload + @classmethod + def is_valid_cloudpath(cls, path: str, raise_on_error: bool = ...) -> bool: + ... + @classmethod - def is_valid_cloudpath(cls, path: Union[str, "CloudPath"], raise_on_error=False) -> bool: + def is_valid_cloudpath( + cls, path: Union[str, "CloudPath"], raise_on_error: bool = False + ) -> Union[bool, TypeGuard[Self]]: valid = str(path).lower().startswith(cls.cloud_prefix.lower()) if raise_on_error and not valid: From 2a3d712dc1435870f95ea24a7bf14595217e8eda Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Fri, 16 Jun 2023 11:14:50 -0700 Subject: [PATCH 5/6] Prep for v0.15.0 (#339) * release 0150 * python 3.11.4 compatibility --- HISTORY.md | 7 +++++++ cloudpathlib/cloudpath.py | 6 +++--- pyproject.toml | 2 +- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 4ad80de1..11b51912 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,12 @@ # cloudpathlib Changelog +## v0.15.0 (2023-06-16) + +- Changed return type for `CloudPathMeta.__call__` to fix problems with pyright/pylance ([PR #330](https://github.com/drivendataorg/cloudpathlib/pull/330)) +- Make `CloudPath.is_valid_cloudpath` a TypeGuard so that type checkers can know the subclass if `is_valid_cloudpath` is called ([PR #337](https://github.com/drivendataorg/cloudpathlib/pull/337)) +- Added `follow_symlinks` to `stat` for 3.11.4 compatibility (see [bpo 39906](https://github.com/python/cpython/issues/84087)) +- Add `follow_symlinks` to `is_dir` implementation for CPython `glob` compatibility (see [CPython PR #104512](https://github.com/python/cpython/pull/104512)) + ## v0.14.0 (2023-05-13) - Changed to pyproject.toml-based build. diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index c50989db..42b43de3 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -786,7 +786,7 @@ def _dispatch_to_local_cache_path(self, func: str, *args, **kwargs) -> Any: else: return path_version - def stat(self) -> os.stat_result: + def stat(self, follow_symlinks: bool = True) -> os.stat_result: """Note: for many clients, we may want to override so we don't incur network costs since many of these properties are available as API calls. @@ -795,7 +795,7 @@ def stat(self) -> os.stat_result: f"stat not implemented as API call for {self.__class__} so file must be downloaded to " f"calculate stats; this may take a long time depending on filesize" ) - return self._dispatch_to_local_cache_path("stat") + return self._dispatch_to_local_cache_path("stat", follow_symlinks=follow_symlinks) # =========== public cloud methods, not in pathlib =============== def download_to(self, destination: Union[str, os.PathLike]) -> Path: @@ -1182,7 +1182,7 @@ def __init__( def __repr__(self) -> str: return "/".join(self._parents + [self.name]) - def is_dir(self) -> bool: + def is_dir(self, follow_symlinks: bool = False) -> bool: return self._all_children is not None def exists(self) -> bool: diff --git a/pyproject.toml b/pyproject.toml index 7d8b9557..c721516b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi" [project] name = "cloudpathlib" -version = "0.14.0" +version = "0.15.0" description = "pathlib-style classes for cloud storage services." readme = "README.md" authors = [{ name = "DrivenData", email = "info@drivendata.org" }] From 6b4afe9b553f7a25590f185c61fa73bf16c134a8 Mon Sep 17 00:00:00 2001 From: Peter Bull Date: Wed, 12 Jul 2023 10:14:49 -0700 Subject: [PATCH 6/6] Pydantic v2 update (#349) * pydantic compatibility warning * add note to docs * try fix MaxOS actions error * pydantic v2 compatibility * Keep v1 compatibility * Remove warning message * no need to version skip * update changelog --- HISTORY.md | 4 ++++ cloudpathlib/anypath.py | 28 +++++++++++++++++++++++++++- cloudpathlib/cloudpath.py | 20 ++++++++++++++++++++ docs/mkdocs.yml | 1 + pyproject.toml | 2 +- tests/mock_clients/mock_s3.py | 3 +++ 6 files changed, 56 insertions(+), 2 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index 11b51912..f7309295 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,9 @@ # cloudpathlib Changelog +## v0.15.1 (2023-07-12) + +- Compatibility with pydantic >= 2.0.0. ([PR #349](https://github.com/drivendataorg/cloudpathlib/pull/349)) + ## v0.15.0 (2023-06-16) - Changed return type for `CloudPathMeta.__call__` to fix problems with pyright/pylance ([PR #330](https://github.com/drivendataorg/cloudpathlib/pull/330)) diff --git a/cloudpathlib/anypath.py b/cloudpathlib/anypath.py index 197dbe65..ac1980a4 100644 --- a/cloudpathlib/anypath.py +++ b/cloudpathlib/anypath.py @@ -1,7 +1,7 @@ import os from abc import ABC from pathlib import Path -from typing import Union +from typing import Any, Union from .cloudpath import InvalidPrefixError, CloudPath from .exceptions import AnyPathTypeError @@ -30,6 +30,32 @@ def __new__(cls, *args, **kwargs) -> Union[CloudPath, Path]: # type: ignore f"Path exception: {repr(path_exception)}" ) + # =========== pydantic integration special methods =============== + @classmethod + def __get_pydantic_core_schema__(cls, _source_type: Any, _handler): + """Pydantic special method. See + https://docs.pydantic.dev/2.0/usage/types/custom/""" + try: + from pydantic_core import core_schema + + return core_schema.no_info_after_validator_function( + cls.validate, + core_schema.any_schema(), + ) + except ImportError: + return None + + @classmethod + def validate(cls, v: str) -> Union[CloudPath, Path]: + """Pydantic special method. See + https://docs.pydantic.dev/2.0/usage/types/custom/""" + try: + return cls.__new__(cls, v) + except AnyPathTypeError as e: + # type errors no longer converted to validation errors + # https://docs.pydantic.dev/2.0/migration/#typeerror-is-no-longer-converted-to-validationerror-in-validators + raise ValueError(e) + @classmethod def __get_validators__(cls): """Pydantic special method. See diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py index 42b43de3..13e903e2 100644 --- a/cloudpathlib/cloudpath.py +++ b/cloudpathlib/cloudpath.py @@ -1112,6 +1112,26 @@ def _upload_file_to_cloud( ) # =========== pydantic integration special methods =============== + @classmethod + def __get_pydantic_core_schema__(cls, _source_type: Any, _handler): + """Pydantic special method. See + https://docs.pydantic.dev/2.0/usage/types/custom/""" + try: + from pydantic_core import core_schema + + return core_schema.no_info_after_validator_function( + cls.validate, + core_schema.any_schema(), + ) + except ImportError: + return None + + @classmethod + def validate(cls, v: str) -> Self: + """Used as a Pydantic validator. See + https://docs.pydantic.dev/2.0/usage/types/custom/""" + return cls(v) + @classmethod def __get_validators__(cls) -> Generator[Callable[[Any], Self], None, None]: """Pydantic special method. See diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 1989581d..215885b5 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -43,6 +43,7 @@ nav: - cloudpathlib.local: "api-reference/local.md" markdown_extensions: + - admonition - pymdownx.highlight - pymdownx.superfences - toc: diff --git a/pyproject.toml b/pyproject.toml index c721516b..998de781 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi" [project] name = "cloudpathlib" -version = "0.15.0" +version = "0.15.1" description = "pathlib-style classes for cloud storage services." readme = "README.md" authors = [{ name = "DrivenData", email = "info@drivendata.org" }] diff --git a/tests/mock_clients/mock_s3.py b/tests/mock_clients/mock_s3.py index a6eb5685..5476989d 100644 --- a/tests/mock_clients/mock_s3.py +++ b/tests/mock_clients/mock_s3.py @@ -90,6 +90,9 @@ def copy_from(self, CopySource=None, Metadata=None, MetadataDirective=None): def download_file(self, to_path, Config=None, ExtraArgs=None): to_path = Path(to_path) + + to_path.parent.mkdir(parents=True, exist_ok=True) + to_path.write_bytes(self.path.read_bytes()) # track config to make sure it's used in tests self.resource.download_config = Config