diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml new file mode 100644 index 0000000..8ebea5e --- /dev/null +++ b/.github/workflows/docs.yml @@ -0,0 +1,50 @@ +name: Documentation Build + +on: [push, pull_request] + +jobs: + docbuild: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 100 + + - name: Get tags + run: git fetch --depth=1 origin +refs/tags/*:refs/tags/* + + - name: Set up minimal Python version + uses: actions/setup-python@v2 + with: + python-version: "3.10" + + - name: Get pip cache dir + id: pip-cache + run: echo "::set-output name=dir::$(pip cache dir)" + + - name: Setup pip cache + uses: actions/cache@v2 + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: pip-docs + restore-keys: pip-docs + + - name: Install locales + run: | + sudo apt-get install language-pack-fr + sudo localedef -i fr_FR -f UTF-8 fr_FR + + - name: Install dependencies + run: | + sudo apt install -y pandoc + pip install --upgrade pip setuptools wheel + pip install -r "requirements_docs.txt" + pip install docutils==0.14 commonmark==0.8.1 recommonmark==0.5.0 babel==2.8 + pip install . + + - name: Build documentation + run: sphinx-build -n -j auto -b html -d build/doctrees docs build/html + + - name: Doc Tests + run: sphinx-build -a -j auto -b doctest -d build/doctrees docs build/doctest diff --git a/.gitignore b/.gitignore index 250c366..9df029b 100644 --- a/.gitignore +++ b/.gitignore @@ -11,6 +11,7 @@ build/ dist/ MANIFEST *pytest_cache* +*mypy_cache* .eggs # WebDAV file system cache files diff --git a/docs/getting/tutorial.rst b/docs/getting/tutorial.rst index 46762f6..f8f1b48 100644 --- a/docs/getting/tutorial.rst +++ b/docs/getting/tutorial.rst @@ -1,5 +1,3 @@ -.. _tutorial: - ************************** Tutorial ************************** diff --git a/docs/index.rst b/docs/index.rst index 56a0d54..e5aa1ee 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,7 +1,7 @@ :orphan: Pint-pandas: Unit support for pandas -====================== +===================================== **Useful links**: `Code Repository `__ | @@ -66,9 +66,7 @@ Pint-pandas: Unit support for pandas Getting started User Guide - Advanced topics ecosystem - API Reference .. toctree:: :maxdepth: 1 diff --git a/docs/user/common.rst b/docs/user/common.rst index 0256db0..f7445a4 100644 --- a/docs/user/common.rst +++ b/docs/user/common.rst @@ -58,8 +58,9 @@ Creating DataFrames from Series The default operation of Pandas `pd.concat` function is to perform row-wise concatenation. When given a list of Series, each of which is backed by a PintArray, this will inefficiently convert all the PintArrays to arrays of `object` type, concatenate the several series into a DataFrame with that many rows, and then leave it up to you to convert that DataFrame back into column-wise PintArrays. A much more efficient approach is to concatenate Series in a column-wise fashion: .. ipython:: python - :suppress: :okwarning: + + list_of_series = [pd.Series([1.0, 2.0], dtype="pint[m]") for i in range(0, 10)] df = pd.concat(list_of_series, axis=1) diff --git a/docs/user/initializing.rst b/docs/user/initializing.rst index f50fb3a..e843300 100644 --- a/docs/user/initializing.rst +++ b/docs/user/initializing.rst @@ -4,16 +4,16 @@ Initializing data ************************** -There are several ways to initialize PintArrays in a DataFrame. Here's the most common methods. We'll use `PA_` and `Q_` as shorthand for PintArray and Quantity. +There are several ways to initialize a `PintArray`s` in a `DataFrame`. Here's the most common methods. We'll use `PA_` and `Q_` as shorthand for `PintArray` and `Quantity`. .. ipython:: python + :okwarning: import pandas as pd import pint import pint_pandas - import io PA_ = pint_pandas.PintArray ureg = pint_pandas.PintType.ureg @@ -21,18 +21,33 @@ There are several ways to initialize PintArrays in a DataFrame. Here's the most df = pd.DataFrame( { - "A": pd.Series([1.0, 2.0], dtype="pint[m]"), - "B": pd.Series([1.0, 2.0]).astype("pint[m]"), - "C": PA_([2.0, 3.0], dtype="pint[m]"), - "D": PA_([2.0, 3.0], dtype="m"), - "E": PA_([2.0, 3.0], dtype=ureg.m), - "F": PA_.from_1darray_quantity(Q_([2, 3], ureg.m)), - "G": PA_(Q_([2.0, 3.0], ureg.m)), + "Ser1": pd.Series([1, 2], dtype="pint[m]"), + "Ser2": pd.Series([1, 2]).astype("pint[m]"), + "Ser3": pd.Series([1, 2], dtype="pint[m][Int64]"), + "Ser4": pd.Series([1, 2]).astype("pint[m][Int64]"), + "PArr1": PA_([1, 2], dtype="pint[m]"), + "PArr2": PA_([1, 2], dtype="pint[m][Int64]"), + "PArr3": PA_([1, 2], dtype="m"), + "PArr4": PA_([1, 2], dtype=ureg.m), + "PArr5": PA_(Q_([1, 2], ureg.m)), + "PArr6": PA_([1, 2],"m"), } ) df +In the first two Series examples above, the data was converted to Float64. + +.. ipython:: python + + df.dtypes + + +To avoid this conversion, specify the subdtype (dtype of the magnitudes) in the dtype `"pint[m][Int64]"` when constructing using a `Series`. The default data dtype that pint-pandas converts to can be changed by modifying `pint_pandas.DEFAULT_SUBDTYPE`. + +`PintArray` infers the subdtype from the data passed into it when there is no subdtype specified in the dtype. It also accepts a pint `Unit`` or unit string as the dtype. + + .. note:: - "pint[unit]" must be used for the Series or DataFrame constuctor. + `"pint[unit]"` or `"pint[unit][subdtype]"` must be used for the Series or DataFrame constuctor. diff --git a/docs/user/reading.rst b/docs/user/reading.rst index 93a57f5..70e2a22 100644 --- a/docs/user/reading.rst +++ b/docs/user/reading.rst @@ -40,7 +40,10 @@ Let's read that into a DataFrame. Here io.StringIO is used in place of reading a df = pd.read_csv(io.StringIO(test_data), header=[0, 1], index_col=[0, 1]).T # df = pd.read_csv("/path/to/test_data.csv", header=[0, 1]) for col in df.columns: - df[col] = pd.to_numeric(df[col], errors="ignore") + try: + df[col] = pd.to_numeric(df[col]) + except: + pass df.dtypes diff --git a/pint_pandas/pint_array.py b/pint_pandas/pint_array.py index 93c787e..cb4f93b 100644 --- a/pint_pandas/pint_array.py +++ b/pint_pandas/pint_array.py @@ -19,7 +19,13 @@ register_series_accessor, ) from pandas.api.indexers import check_array_indexer -from pandas.api.types import is_integer, is_list_like, is_object_dtype, is_string_dtype +from pandas.api.types import ( + is_integer, + is_list_like, + is_object_dtype, + is_string_dtype, + infer_dtype, +) from pandas.compat import set_function_name from pandas.core import nanops # type: ignore from pint import Quantity as _Quantity @@ -29,6 +35,7 @@ # Magic 'unit' flagging columns with no unit support, used in # quantify/dequantify NO_UNIT = "No Unit" +DEFAULT_SUBDTYPE = "Float64" pandas_version = version("pandas") pandas_version_info = tuple( @@ -47,8 +54,9 @@ class PintType(ExtensionDtype): # base = np.dtype('O') # num = 102 units: Optional[_Unit] = None # Filled in by `construct_from_..._string` - _metadata = ("units",) - _match = re.compile(r"(P|p)int\[(?P.+)\]") + subdtype: Optional[np.dtype] = None + _metadata = ("units", "subdtype") + _match = re.compile(r"(P|p)int\[(?P.+)\]\[(?P.+)\]") _cache = {} # type: ignore ureg = pint.get_application_registry() @@ -57,7 +65,7 @@ def _is_numeric(self): # type: () -> bool return True - def __new__(cls, units=None): + def __new__(cls, units=None, subdtype=None): """ Parameters ---------- @@ -72,38 +80,46 @@ def __new__(cls, units=None): return object.__new__(cls) if not isinstance(units, _Unit): - units = cls._parse_dtype_strict(units) + units, subdtype = cls._parse_dtype_strict(units) # ureg.unit returns a quantity with a magnitude of 1 # eg 1 mm. Initialising a quantity and taking its unit # TODO: Seperate units from quantities in pint # to simplify this bit units = cls.ureg.Quantity(1, units).units + if subdtype is None: + subdtype = DEFAULT_SUBDTYPE + try: # TODO: fix when Pint implements Callable typing # TODO: wrap string into PintFormatStr class - return cls._cache["{:P}".format(units)] # type: ignore + return cls._cache[("{:P}".format(units), subdtype)] # type: ignore except KeyError: u = object.__new__(cls) u.units = units - cls._cache["{:P}".format(units)] = u # type: ignore + u.subdtype = subdtype + cls._cache[("{:P}".format(units), subdtype)] = u # type: ignore return u @classmethod - def _parse_dtype_strict(cls, units): - if isinstance(units, str): - if units.lower() == "pint[]": - units = "pint[dimensionless]" - if units.lower().startswith("pint["): - if not units[-1] == "]": - raise ValueError("could not construct PintType") - m = cls._match.search(units) - if m is not None: - units = m.group("units") - if units is not None: - return units - - raise ValueError("could not construct PintType") + def _parse_dtype_strict(cls, string): + if not isinstance(string, str): + raise ValueError("could not construct PintType") + if string.lower()[:6] == "pint[]": + string = "pint[dimensionless]" + string[6:] + if string.lower().startswith("pint["): + if not string[-1] == "]": + raise ValueError("could not construct PintType") + if string.count("]") == 1: + string = string + "[" + str(DEFAULT_SUBDTYPE) + "]" + m = cls._match.search(string) + if m is not None: + units = m.group("units") + subdtype = m.group("subdtype") + else: + units = string + subdtype = DEFAULT_SUBDTYPE + return units, subdtype @classmethod def construct_from_string(cls, string): @@ -145,14 +161,15 @@ def construct_from_quantity_string(cls, string): @property def name(self): - return str("pint[{units}]".format(units=self.units)) + return str( + "pint[{units}][{subdtype}]".format(units=self.units, subdtype=self.subdtype) + ) @property def na_value(self): return self.ureg.Quantity(np.nan, self.units) def __hash__(self): - # make myself hashable return hash(str(self)) def __eq__(self, other): @@ -160,7 +177,7 @@ def __eq__(self, other): other = PintType(other) except (ValueError, errors.UndefinedUnitError): return False - return self.units == other.units + return self.units == other.units and self.subdtype == other.subdtype @classmethod def is_dtype(cls, dtype): @@ -288,11 +305,24 @@ class PintArray(ExtensionArray, ExtensionScalarOpsMixin): _HANDLED_TYPES = (np.ndarray, numbers.Number, _Quantity) def __init__(self, values, dtype=None, copy=False): + # infer subdtype from values if not given in dtype + if (isinstance(dtype, str) and dtype.count("[") <= 1) or isinstance( + dtype, _Unit + ): + _dtype = PintType(dtype) + if isinstance(values, _Quantity): + values = values.m_as(_dtype.units) + values = pd.array(values, copy=copy) + dtype = PintType(units=_dtype.units, subdtype=values.dtype) + # infer units and subdtype from values if no dtype given if dtype is None: if isinstance(values, _Quantity): - dtype = values.units + units = values.units + values = pd.array(values, copy=copy) + dtype = PintType(units=units, subdtype=values.dtype) elif isinstance(values, PintArray): dtype = values._dtype + if dtype is None: raise NotImplementedError @@ -300,20 +330,16 @@ def __init__(self, values, dtype=None, copy=False): dtype = PintType(dtype) self._dtype = dtype + # convert units if unit aware values if isinstance(values, _Quantity): values = values.to(dtype.units).magnitude elif isinstance(values, PintArray): values = values._data - if isinstance(values, np.ndarray): - dtype = values.dtype - if dtype in ddtypemap: - dtype = ddtypemap[dtype] - values = pd.array(values, copy=copy, dtype=dtype) - copy = False - elif not isinstance(values, pd.core.arrays.numeric.NumericArray): - values = pd.array(values, copy=copy) - if copy: - values = values.copy() + + # convert subdtype + if not isinstance(values, ExtensionArray) or not values.dtype == dtype: + values = pd.array(values, copy=copy, dtype=dtype.subdtype) + self._data = values self._Q = self.dtype.ureg.Quantity @@ -346,7 +372,7 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): def _convert_np_result(self, result): if isinstance(result, _Quantity) and is_list_like(result.m): - return PintArray.from_1darray_quantity(result) + return PintArray.from_1darray_quantity(result, self.dtype.subdtype) elif isinstance(result, _Quantity): return result elif type(result) is tuple: @@ -618,7 +644,7 @@ def _concat_same_type(cls, to_concat): converted_values = a.quantity.to(output_units).magnitude data.append(np.atleast_1d(converted_values)) - return cls(np.concatenate(data), output_units) + return cls(np.concatenate(data), to_concat[0].dtype) @classmethod def _from_sequence(cls, scalars, dtype=None, copy=False): @@ -641,25 +667,32 @@ def _from_sequence(cls, scalars, dtype=None, copy=False): if dtype is None: if not isinstance(master_scalar, _Quantity): raise ValueError("No dtype specified and not a sequence of quantities") - dtype = PintType(master_scalar.units) + units = master_scalar.units + subdtype = None + else: + units = dtype.units + subdtype = dtype.subdtype + # convert scalars to output unit if isinstance(master_scalar, _Quantity): scalars = [ - (item.to(dtype.units).magnitude if hasattr(item, "to") else item) + (item.to(units).magnitude if hasattr(item, "to") else item) for item in scalars ] - return cls(scalars, dtype=dtype, copy=copy) + + values = pd.array(scalars, dtype=subdtype) + return cls( + values, dtype=PintType(units=units, subdtype=values.dtype), copy=copy + ) @classmethod def _from_sequence_of_strings(cls, scalars, dtype=None, copy=False): if not dtype: dtype = PintType.construct_from_quantity_string(scalars[0]) - return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars]) + return cls._from_sequence([dtype.ureg.Quantity(x) for x in scalars], dtype) @classmethod def _from_factorized(cls, values, original): - from pandas.api.types import infer_dtype - if infer_dtype(values) != "object": values = pd.array(values, copy=False) return cls(values, dtype=original.dtype) @@ -800,8 +833,6 @@ def _create_method(cls, op, coerce_to_dtype=True): def _binop(self, other): def validate_length(obj1, obj2): - # validates length - # CHANGED: do not convert to listlike (why should we? pint.Quantity is perfecty able to handle that...) try: if len(obj1) != len(obj2): raise ValueError("Lengths must match") @@ -833,15 +864,22 @@ def convert_values(param): # a TypeError should be raised res = op(lvalues, rvalues) + subdtype = self.data.dtype + if "truediv" in op.__name__ and pd.api.types.is_integer_dtype(subdtype): + if isinstance(subdtype, _NumpyEADtype): + subdtype = "float64" + else: + subdtype = "Float64" + if op.__name__ == "divmod": return ( - cls.from_1darray_quantity(res[0]), - cls.from_1darray_quantity(res[1]), + cls.from_1darray_quantity(res[0], subdtype), + cls.from_1darray_quantity(res[1], subdtype), ) if coerce_to_dtype: try: - res = cls.from_1darray_quantity(res) + res = cls.from_1darray_quantity(res, subdtype) except TypeError: pass @@ -859,10 +897,19 @@ def _create_comparison_method(cls, op): return cls._create_method(op, coerce_to_dtype=False) @classmethod - def from_1darray_quantity(cls, quantity): + def from_1darray_quantity(cls, quantity, subdtype=None): if not is_list_like(quantity.magnitude): raise TypeError("quantity's magnitude is not list like") - return cls(quantity.magnitude, quantity.units) + if isinstance(quantity.magnitude, ExtensionArray): + subdtype = quantity.magnitude.dtype + mag = quantity.magnitude + elif subdtype: + mag = pd.array(quantity.magnitude, dtype=subdtype) + else: + mag = pd.array(quantity.magnitude) + subdtype = mag.dtype + + return cls(mag, PintType(quantity.units, subdtype)) def __array__(self, dtype=None, copy=False): if dtype is None or is_object_dtype(dtype): @@ -959,14 +1006,13 @@ def map(self, mapper, na_action=None): arr = map_array(self, mapper, na_action) - master_scalar = None try: - master_scalar = next(i for i in arr if hasattr(i, "units")) + next(i for i in arr if hasattr(i, "units")) except StopIteration: # JSON mapper formatting Qs as str don't create PintArrays # ...and that's OK. Caller will get array of values return arr - return PintArray._from_sequence(arr, PintType(master_scalar.units)) + return PintArray._from_sequence(arr) def _reduce(self, name, *, skipna: bool = True, keepdims: bool = False, **kwds): """ @@ -1047,7 +1093,7 @@ def _accumulate(self, name: str, *, skipna: bool = True, **kwds): except NotImplementedError: result = functions[name](self.numpy_data, **kwds) - return self._from_sequence(result, self.units) + return self._from_sequence(result, self.dtype) PintArray._add_arithmetic_ops() @@ -1164,6 +1210,7 @@ def __init__(self, pandas_obj): else: self._validate(pandas_obj) self.pandas_obj = pandas_obj + self.pintarray = pandas_obj.values self.quantity = pandas_obj.values.quantity self._index = pandas_obj.index self._name = pandas_obj.name @@ -1226,7 +1273,9 @@ def delegated_method(*args, **kwargs): result = method(*args, **kwargs) if self.to_series: if isinstance(result, _Quantity): - result = PintArray.from_1darray_quantity(result) + result = PintArray.from_1darray_quantity( + result, object.__getattribute__(obj, "pintarray").dtype.subdtype + ) result = Series(result, index, name=name) return result diff --git a/pint_pandas/testsuite/test_issues.py b/pint_pandas/testsuite/test_issues.py index 74f26eb..e43dbe2 100644 --- a/pint_pandas/testsuite/test_issues.py +++ b/pint_pandas/testsuite/test_issues.py @@ -41,7 +41,7 @@ def test_force_ndarray_like(self): result = pd.concat([a, b], axis=1) expected = pd.DataFrame( - {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]" + {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC][Int64]" ) tm.assert_equal(result, expected) @@ -64,7 +64,7 @@ def test_offset_concat(self): result = pd.concat([a, b], axis=1) expected = pd.DataFrame( - {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC]" + {0: PintArray(q_a_), 1: PintArray(q_b)}, dtype="pint[degC][Int64]" ) tm.assert_equal(result, expected) @@ -199,9 +199,9 @@ def test_issue_194(dtype): class TestIssue202(BaseExtensionTests): - def test_dequantify(self): + def test_dequantify_duplicate_col_names(self): df = pd.DataFrame() - df["test"] = pd.Series([1, 2, 3], dtype="pint[kN]") + df["test"] = pd.Series([1, 2, 3], dtype="pint[kN][Int64]") df.insert(0, "test", df["test"], allow_duplicates=True) expected = pd.DataFrame.from_dict( @@ -253,7 +253,7 @@ def test_roundtrip(self): "power": pd.Series([1.0, 2.0, 3.0], dtype="pint[W]"), "torque": pd.Series([4.0, 5.0, 6.0], dtype="pint[N*m]"), "fruits": pd.Series(["apple", "pear", "kiwi"]), - "float_numbers": pd.Series([1.0, 2.0, 3.0], dtype="float64"), + "float_numbers": pd.Series([1.0, 2.0, 3.0], dtype="Float64"), "int_numbers": pd.Series([1.0, 2.0, 3.0], dtype="int"), } ) @@ -311,3 +311,15 @@ def test_issue246(self): # now an operation where each cell is independent from each other df.apply(lambda x: x * 2, axis=1) + + +class TestIssue247(BaseExtensionTests): + a = pd.Series([2.0, 3.0, 4.0], dtype="pint[km][int16]") + result = a / a + expected = pd.Series([1, 1, 1], dtype="pint[][float64]") + tm.assert_series_equal(result, expected) + + a = pd.Series([2.0, 3.0, 4.0], dtype="pint[km][Int16]") + result = a / a + expected = pd.Series([1, 1, 1], dtype="pint[][Float64]") + tm.assert_series_equal(result, expected) diff --git a/pint_pandas/testsuite/test_pandas_extensiontests.py b/pint_pandas/testsuite/test_pandas_extensiontests.py index d7a4feb..97af53e 100644 --- a/pint_pandas/testsuite/test_pandas_extensiontests.py +++ b/pint_pandas/testsuite/test_pandas_extensiontests.py @@ -21,7 +21,7 @@ from pint.errors import DimensionalityError from pint_pandas import PintArray, PintType -from pint_pandas.pint_array import dtypemap, pandas_version_info +from pint_pandas.pint_array import pandas_version_info ureg = PintType.ureg @@ -34,31 +34,38 @@ def box_in_series(request): @pytest.fixture def dtype(): - return PintType("pint[meter]") + return PintType("pint[meter][float]") -_base_numeric_dtypes = [float, int] -_all_numeric_dtypes = _base_numeric_dtypes + [np.complex128] +_base_numeric_dtypes = [ + {"np_dtype": np.float64, "pd_dtype": pd.Float64Dtype()}, + {"np_dtype": np.float64, "pd_dtype": pd.core.dtypes.dtypes.NumpyEADtype(float)}, # type: ignore + {"np_dtype": np.int64, "pd_dtype": pd.Int64Dtype()}, +] + +_all_numeric_dtypes = _base_numeric_dtypes + [ + {"np_dtype": np.int64, "pd_dtype": pd.core.dtypes.dtypes.NumpyEADtype(object)}, # type: ignore +] +_all_numeric_dtype_ids = [str(item["pd_dtype"]) for item in _all_numeric_dtypes] -@pytest.fixture(params=_all_numeric_dtypes) +@pytest.fixture(params=_all_numeric_dtypes, ids=_all_numeric_dtype_ids) def numeric_dtype(request): return request.param @pytest.fixture def data(request, numeric_dtype): - return PintArray.from_1darray_quantity( - np.arange(start=1.0, stop=101.0, dtype=numeric_dtype) * ureg.nm + return PintArray( + np.arange(start=1.0, stop=101.0, dtype=numeric_dtype["np_dtype"]), + ureg.nm, + numeric_dtype["pd_dtype"], ) @pytest.fixture def data_missing(numeric_dtype): - numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) - return PintArray.from_1darray_quantity( - ureg.Quantity(pd.array([np.nan, 1], dtype=numeric_dtype), ureg.meter) - ) + return PintArray([np.nan, 1], ureg.nm, numeric_dtype["pd_dtype"]) @pytest.fixture @@ -66,8 +73,8 @@ def data_for_twos(numeric_dtype): x = [ 2.0, ] * 100 - return PintArray.from_1darray_quantity( - pd.array(x, dtype=numeric_dtype) * ureg.meter + return PintArray( + np.array(x, dtype=numeric_dtype["np_dtype"]), ureg.nm, numeric_dtype["pd_dtype"] ) @@ -101,19 +108,15 @@ def sort_by_key(request): @pytest.fixture def data_for_sorting(numeric_dtype): - return PintArray.from_1darray_quantity( - pd.array([0.3, 10.0, -50.0], numeric_dtype) * ureg.centimeter - ) + x = [0.3, 10.0, -50.0] + x = [numeric_dtype["np_dtype"](val) for val in x] + return PintArray(pd.array(x, numeric_dtype["pd_dtype"]), ureg.centimeter) @pytest.fixture def data_missing_for_sorting(numeric_dtype): - numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) - return PintArray.from_1darray_quantity( - ureg.Quantity( - pd.array([4.0, np.nan, -5.0], dtype=numeric_dtype), ureg.centimeter - ) - ) + x = [4.0, np.nan, -5.0] + return PintArray(pd.array(x, numeric_dtype["pd_dtype"]), ureg.centimeter) @pytest.fixture @@ -132,10 +135,12 @@ def data_for_grouping(numeric_dtype): a = 1.0 b = 2.0**32 + 1 c = 2.0**32 + 10 - numeric_dtype = dtypemap.get(numeric_dtype, numeric_dtype) return PintArray.from_1darray_quantity( ureg.Quantity( - pd.array([b, b, np.nan, np.nan, a, a, b, c], dtype=numeric_dtype), ureg.m + pd.array( + [b, b, np.nan, np.nan, a, a, b, c], dtype=numeric_dtype["pd_dtype"] + ), + ureg.m, ) ) @@ -305,6 +310,12 @@ def test_map(self, data_missing, na_action): def test_insert_invalid(self): pass + @pytest.mark.parametrize("ascending", [True, False]) + def test_sort_values_frame(self, data_for_sorting, ascending): + if data_for_sorting.dtype.subdtype == "object": + pytest.skip(reason="Dimensionality error in eq inside factorize.") + super().test_sort_values_frame(data_for_sorting, ascending) + # ArithmeticOps divmod_exc = None series_scalar_exc = None diff --git a/pint_pandas/testsuite/test_pandas_interface.py b/pint_pandas/testsuite/test_pandas_interface.py index 86b7c72..10d21fd 100644 --- a/pint_pandas/testsuite/test_pandas_interface.py +++ b/pint_pandas/testsuite/test_pandas_interface.py @@ -92,7 +92,7 @@ def test_df_operations(self): def test_dequantify(self): df = pd.DataFrame( { - "no_unit_column": pd.Series([i for i in range(4)], dtype=float), + "no_unit_column": pd.Series([i for i in range(4)], dtype="Float64"), "pintarray_column": pd.Series( [1.0, 2.0, 2.0, 3.0], dtype="pint[lbf ft]" ), @@ -100,7 +100,9 @@ def test_dequantify(self): ) expected = pd.DataFrame( { - ("no_unit_column", "No Unit"): {0: 0.0, 1: 1.0, 2: 2.0, 3: 3.0}, + ("no_unit_column", "No Unit"): pd.Series( + {0: 0.0, 1: 1.0, 2: 2.0, 3: 3.0}, dtype="Float64" + ), ("pintarray_column", "foot * force_pound"): pd.Series( { 0: 1.0, @@ -108,7 +110,7 @@ def test_dequantify(self): 2: 2.0, 3: 3.0, }, - dtype=pd.Float64Dtype(), + dtype="Float64", ), } ) @@ -127,12 +129,13 @@ def test_quantify(self): 2: 2.0, 3: 3.0, }, - } + }, + dtype="Float64", ) df.columns.names = [None, "unit"] expected = pd.DataFrame( { - "no_unit_column": pd.Series([i for i in range(4)], dtype=float), + "no_unit_column": pd.Series([i for i in range(4)], dtype="Float64"), "pintarray_column": pd.Series( [1.0, 2.0, 2.0, 3.0], dtype="pint[lbf ft]" ), @@ -145,7 +148,7 @@ def test_quantify(self): def test_to_base_units(self): df = pd.DataFrame( { - "no_unit_column": pd.Series([i for i in range(4)], dtype=float), + "no_unit_column": pd.Series([i for i in range(4)], dtype="Float64"), "pintarray_column": pd.Series( [1.0, 2.0, 2.0, 3.0], dtype="pint[lbf ft]" ), @@ -155,7 +158,7 @@ def test_to_base_units(self): expected = pd.DataFrame( { - "no_unit_column": pd.Series([i for i in range(4)], dtype=float), + "no_unit_column": pd.Series([i for i in range(4)], dtype="Float64"), "pintarray_column": pd.Series( [ 1.3558179483314006, @@ -174,7 +177,7 @@ class TestDataFrameAccessor(object): def test_index_maintained(self): test_csv = join(dirname(__file__), "pandas_test.csv") - df = pd.read_csv(test_csv, header=[0, 1]) + df = pd.read_csv(test_csv, header=[0, 1], dtype="Float64") df.columns = pd.MultiIndex.from_arrays( [ ["Holden", "Holden", "Holden", "Ford", "Ford", "Ford"], diff --git a/requirements_docs.txt b/requirements_docs.txt index 8f44109..40528e2 100644 --- a/requirements_docs.txt +++ b/requirements_docs.txt @@ -1,4 +1,4 @@ -sphinx>4 +sphinx>=5 ipython<=8.12 matplotlib mip>=1.13 @@ -16,7 +16,7 @@ dask[complete] setuptools>=41.2 Serialize pygments>=2.4 -sphinx-book-theme==0.3.3 +sphinx-book-theme>=0.3.3 sphinx_copybutton sphinx_design typing_extensions