diff --git a/src/visions/test/series_sparse.py b/src/visions/test/series_sparse.py index 4789c42d..f1d0bca7 100644 --- a/src/visions/test/series_sparse.py +++ b/src/visions/test/series_sparse.py @@ -26,7 +26,7 @@ def get_sparse_series(): dtype=pd.SparseDtype(np.bool, False), ), pd.Series( - pd.SparseArray([None, None, "gold", "black", "silver"]), + pd.arrays.SparseArray([None, None, "gold", "black", "silver"]), name="str_obj_sparse", ), # Pending https://github.com/pandas-dev/pandas/issues/35762 diff --git a/src/visions/types/boolean.py b/src/visions/types/boolean.py index 07c99a40..0412d985 100644 --- a/src/visions/types/boolean.py +++ b/src/visions/types/boolean.py @@ -9,7 +9,7 @@ from visions.types.type import VisionsBaseType from visions.utils import func_nullable_series_contains from visions.utils.coercion.test_utils import coercion_map, coercion_map_test -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse hasnan_bool_name = "boolean" if int(pd.__version__.split(".")[0]) >= 1 else "Bool" @@ -83,6 +83,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: if not pdt.is_categorical_dtype(series) and pdt.is_bool_dtype(series): return True diff --git a/src/visions/types/categorical.py b/src/visions/types/categorical.py index 74316edb..62972950 100644 --- a/src/visions/types/categorical.py +++ b/src/visions/types/categorical.py @@ -5,7 +5,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse def _get_relations(cls) -> Sequence[TypeRelation]: @@ -30,5 +30,6 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_categorical_dtype(series) diff --git a/src/visions/types/complex.py b/src/visions/types/complex.py index f88e7f91..dfa3684d 100644 --- a/src/visions/types/complex.py +++ b/src/visions/types/complex.py @@ -7,7 +7,7 @@ from visions.types.float import string_is_float from visions.types.type import VisionsBaseType from visions.utils.coercion import test_utils -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse def test_imaginary_in_string( @@ -60,5 +60,6 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_complex_dtype(series) diff --git a/src/visions/types/count.py b/src/visions/types/count.py index e616f2f2..c3e73205 100644 --- a/src/visions/types/count.py +++ b/src/visions/types/count.py @@ -5,7 +5,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse def _get_relations(cls) -> Sequence[TypeRelation]: @@ -30,5 +30,6 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_unsigned_integer_dtype(series) diff --git a/src/visions/types/date.py b/src/visions/types/date.py index ec35889a..cc4282da 100644 --- a/src/visions/types/date.py +++ b/src/visions/types/date.py @@ -5,10 +5,11 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType +from visions.utils import func_nullable_series_contains from visions.utils.series_utils import ( class_name_attrs, - func_nullable_series_contains, nullable_series_contains, + series_not_empty, ) @@ -53,6 +54,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return class_name_attrs(series, date, ["year", "month", "day"]) diff --git a/src/visions/types/date_time.py b/src/visions/types/date_time.py index 840a2bbf..fcf06ffb 100644 --- a/src/visions/types/date_time.py +++ b/src/visions/types/date_time.py @@ -7,7 +7,7 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType from visions.utils.coercion import test_utils -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse def string_is_datetime(series: pd.Series, state: dict) -> bool: @@ -51,5 +51,6 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_datetime64_any_dtype(series) diff --git a/src/visions/types/email_address.py b/src/visions/types/email_address.py index 19a86911..159fd536 100644 --- a/src/visions/types/email_address.py +++ b/src/visions/types/email_address.py @@ -6,7 +6,11 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType from visions.utils.coercion import test_utils -from visions.utils.series_utils import isinstance_attrs, nullable_series_contains +from visions.utils.series_utils import ( + isinstance_attrs, + nullable_series_contains, + series_not_empty, +) def string_is_email(series, state: dict): @@ -73,6 +77,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return isinstance_attrs(series, FQDA, ["local", "fqdn"]) diff --git a/src/visions/types/file.py b/src/visions/types/file.py index 01c643f3..18542608 100644 --- a/src/visions/types/file.py +++ b/src/visions/types/file.py @@ -5,7 +5,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import nullable_series_contains +from visions.utils.series_utils import nullable_series_contains, series_not_empty def _get_relations(cls) -> Sequence[TypeRelation]: @@ -30,6 +30,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return all(isinstance(p, pathlib.Path) and p.exists() for p in series) diff --git a/src/visions/types/float.py b/src/visions/types/float.py index 8b55aea6..a21cb2d6 100644 --- a/src/visions/types/float.py +++ b/src/visions/types/float.py @@ -7,7 +7,7 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType from visions.utils.coercion import test_utils -from visions.utils.series_utils import func_nullable_series_contains, series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse from visions.utils.warning_handling import suppress_warnings @@ -89,6 +89,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @series_not_sparse def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_float_dtype(series) diff --git a/src/visions/types/geometry.py b/src/visions/types/geometry.py index eb099e7c..670345be 100644 --- a/src/visions/types/geometry.py +++ b/src/visions/types/geometry.py @@ -6,7 +6,7 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import nullable_series_contains +from visions.utils.series_utils import nullable_series_contains, series_not_empty def string_is_geometry(series: pd.Series, state: dict) -> bool: @@ -60,6 +60,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: from shapely.geometry.base import BaseGeometry diff --git a/src/visions/types/image.py b/src/visions/types/image.py index 1f1252de..6b79eff8 100644 --- a/src/visions/types/image.py +++ b/src/visions/types/image.py @@ -6,7 +6,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import nullable_series_contains +from visions.utils.series_utils import nullable_series_contains, series_not_empty def _get_relations(cls) -> Sequence[TypeRelation]: @@ -31,6 +31,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return all( diff --git a/src/visions/types/integer.py b/src/visions/types/integer.py index 56c0e9da..9f1a1389 100644 --- a/src/visions/types/integer.py +++ b/src/visions/types/integer.py @@ -7,7 +7,7 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType from visions.utils import func_nullable_series_contains -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse def to_int(series: pd.Series, state: dict) -> pd.Series: @@ -53,5 +53,6 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_integer_dtype(series) diff --git a/src/visions/types/ip_address.py b/src/visions/types/ip_address.py index 8a6ad57c..3d5a3105 100644 --- a/src/visions/types/ip_address.py +++ b/src/visions/types/ip_address.py @@ -6,7 +6,7 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType from visions.utils.coercion import test_utils -from visions.utils.series_utils import nullable_series_contains +from visions.utils.series_utils import nullable_series_contains, series_not_empty def string_is_ip(series, state: dict): @@ -42,6 +42,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return all(isinstance(x, _BaseAddress) for x in series) diff --git a/src/visions/types/numeric.py b/src/visions/types/numeric.py index 310a569b..2d636620 100644 --- a/src/visions/types/numeric.py +++ b/src/visions/types/numeric.py @@ -5,7 +5,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse def _get_relations(cls) -> Sequence[TypeRelation]: @@ -30,5 +30,6 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_numeric_dtype(series) diff --git a/src/visions/types/object.py b/src/visions/types/object.py index 980af4e9..98f7c5be 100644 --- a/src/visions/types/object.py +++ b/src/visions/types/object.py @@ -5,7 +5,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse def _get_relations(cls) -> Sequence[TypeRelation]: @@ -33,6 +33,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: is_object = pdt.is_object_dtype(series) if is_object: diff --git a/src/visions/types/ordinal.py b/src/visions/types/ordinal.py index c880ab40..e9b50bb3 100644 --- a/src/visions/types/ordinal.py +++ b/src/visions/types/ordinal.py @@ -5,6 +5,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType +from visions.utils.series_utils import series_not_empty def to_ordinal(series: pd.Series) -> pd.Categorical: @@ -34,5 +35,6 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_categorical_dtype(series) and series.cat.ordered diff --git a/src/visions/types/path.py b/src/visions/types/path.py index a194429d..79912e28 100644 --- a/src/visions/types/path.py +++ b/src/visions/types/path.py @@ -5,7 +5,7 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import nullable_series_contains +from visions.utils.series_utils import nullable_series_contains, series_not_empty def string_is_path(series, state: dict) -> bool: @@ -51,6 +51,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return all(isinstance(x, pathlib.PurePath) and x.is_absolute() for x in series) diff --git a/src/visions/types/string.py b/src/visions/types/string.py index f22acbfc..c4f07567 100644 --- a/src/visions/types/string.py +++ b/src/visions/types/string.py @@ -6,7 +6,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType from visions.utils import func_nullable_series_contains -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse @func_nullable_series_contains @@ -39,6 +39,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: # TODO: without the object check this passes string categories... is there a better way? if pdt.is_categorical_dtype(series): diff --git a/src/visions/types/time.py b/src/visions/types/time.py index c5ab3126..aca8cdf9 100644 --- a/src/visions/types/time.py +++ b/src/visions/types/time.py @@ -9,6 +9,7 @@ class_name_attrs, func_nullable_series_contains, nullable_series_contains, + series_not_empty, ) @@ -44,6 +45,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return class_name_attrs(series, time, ["microsecond", "hour"]) diff --git a/src/visions/types/time_delta.py b/src/visions/types/time_delta.py index 16bfc645..f91586d5 100644 --- a/src/visions/types/time_delta.py +++ b/src/visions/types/time_delta.py @@ -5,7 +5,7 @@ from visions.relations import IdentityRelation, TypeRelation from visions.types.type import VisionsBaseType -from visions.utils.series_utils import series_not_sparse +from visions.utils.series_utils import series_not_empty, series_not_sparse def _get_relations(cls) -> Sequence[TypeRelation]: @@ -30,5 +30,6 @@ def get_relations(cls) -> Sequence[TypeRelation]: @classmethod @series_not_sparse + @series_not_empty def contains_op(cls, series: pd.Series, state: dict) -> bool: return pdt.is_timedelta64_dtype(series) diff --git a/src/visions/types/type.py b/src/visions/types/type.py index aacdd0df..fa3f74dc 100644 --- a/src/visions/types/type.py +++ b/src/visions/types/type.py @@ -9,13 +9,6 @@ class VisionsBaseTypeMeta(ABCMeta): def __contains__(cls, series: pd.Series, state: dict = {}) -> bool: - # Possible alternative: - # return cls in cls.typeset.detect_type_path(series) - - if series.empty: - from visions.types import Generic - - return issubclass(cls, Generic) return cls.contains_op(series, state) # type: ignore @property diff --git a/src/visions/types/url.py b/src/visions/types/url.py index faecdddb..3c41ddc9 100644 --- a/src/visions/types/url.py +++ b/src/visions/types/url.py @@ -9,6 +9,7 @@ func_nullable_series_contains, isinstance_attrs, nullable_series_contains, + series_not_empty, ) @@ -51,6 +52,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return isinstance_attrs(series, ParseResult, ["netloc", "scheme"]) diff --git a/src/visions/types/uuid.py b/src/visions/types/uuid.py index 86a1392b..71569f98 100644 --- a/src/visions/types/uuid.py +++ b/src/visions/types/uuid.py @@ -6,7 +6,11 @@ from visions.relations import IdentityRelation, InferenceRelation, TypeRelation from visions.types.type import VisionsBaseType from visions.utils.coercion.test_utils import coercion_true_test -from visions.utils.series_utils import isinstance_attrs, nullable_series_contains +from visions.utils.series_utils import ( + isinstance_attrs, + nullable_series_contains, + series_not_empty, +) def string_is_uuid(series, state: dict) -> bool: @@ -56,6 +60,7 @@ def get_relations(cls) -> Sequence[TypeRelation]: return _get_relations(cls) @classmethod + @series_not_empty @nullable_series_contains def contains_op(cls, series: pd.Series, state: dict) -> bool: return isinstance_attrs(series, uuid.UUID, ["time_low", "hex"]) diff --git a/src/visions/utils/series_utils.py b/src/visions/utils/series_utils.py index 52e95d84..9d223a47 100644 --- a/src/visions/utils/series_utils.py +++ b/src/visions/utils/series_utils.py @@ -54,6 +54,16 @@ def inner(cls, series: pd.Series, *args, **kwargs) -> bool: return inner +def series_not_empty(fn: Callable) -> Callable: + @functools.wraps(fn) + def inner(cls, series: pd.Series, *args, **kwargs) -> bool: + if series.empty: + return False + return fn(cls, series, *args, **kwargs) + + return inner + + def _contains_instance_attrs( series, is_method, class_name, attrs: list, sample_size=1 ) -> bool: