diff --git a/doc/source/whatsnew/v2.3.0.rst b/doc/source/whatsnew/v2.3.0.rst index b107a5d3ba100..e37368ce7e79b 100644 --- a/doc/source/whatsnew/v2.3.0.rst +++ b/doc/source/whatsnew/v2.3.0.rst @@ -36,7 +36,8 @@ Other enhancements when using ``np.array()`` or ``np.asarray()`` on pandas objects) has been updated to work correctly with NumPy >= 2 (:issue:`57739`) - The :meth:`~Series.sum` reduction is now implemented for ``StringDtype`` columns (:issue:`59853`) -- +- Median percentile is only included in :meth:`~Series.describe` when a blank + list is passed (:issue:`60550`). .. --------------------------------------------------------------------------- .. _whatsnew_230.notable_bug_fixes: diff --git a/pandas/core/generic.py b/pandas/core/generic.py index d1aa20501b060..bb003a1d11f1b 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -10795,7 +10795,8 @@ def describe( The percentiles to include in the output. All should fall between 0 and 1. The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and - 75th percentiles. + 75th percentiles. If a blank list is passed, then returns + only the 50th percentile value. include : 'all', list-like of dtypes or None (default), optional A white list of data types to include in the result. Ignored for ``Series``. Here are the options: diff --git a/pandas/core/methods/describe.py b/pandas/core/methods/describe.py index 17d4d38c97f33..fa53af4c2bac9 100644 --- a/pandas/core/methods/describe.py +++ b/pandas/core/methods/describe.py @@ -74,7 +74,8 @@ def describe_ndframe( percentiles : list-like of numbers, optional The percentiles to include in the output. All should fall between 0 and 1. The default is ``[.25, .5, .75]``, which returns the 25th, 50th, and - 75th percentiles. + 75th percentiles. If a blank list is passed, then returns only the + 50th percentile value. Returns ------- @@ -351,13 +352,13 @@ def _refine_percentiles( # explicit conversion of `percentiles` to list percentiles = list(percentiles) + # median should be included only if blank iterable is passed + if len(percentiles) == 0: + return np.array([0.5]) + # get them all to be in [0, 1] validate_percentile(percentiles) - # median should always be included - if 0.5 not in percentiles: - percentiles.append(0.5) - percentiles = np.asarray(percentiles) # sort and check for duplicates diff --git a/pandas/tests/reductions/test_describe_ndframe.py b/pandas/tests/reductions/test_describe_ndframe.py new file mode 100644 index 0000000000000..ec252fd4316f5 --- /dev/null +++ b/pandas/tests/reductions/test_describe_ndframe.py @@ -0,0 +1,25 @@ +# -*- encoding: utf-8 -*- + +""" +We test the describe_ndframe function. +""" + +import numpy as np +import pytest + +from pandas.core.methods.describe import _refine_percentiles + +@pytest.mark.parametrize( + "percentiles_, expected", [ + (None, np.array([0.25, 0.5, 0.75])), + ([], np.array([0.5])), + ([0.3, 0.6], np.array([0.3, 0.6])), + ] +) +def test_refine_percentiles(percentiles_, expected): + """ + Check the performance of the _refine_percentiles when multiple + values are passed. + """ + + assert np.array_equal(_refine_percentiles(percentiles_), expected)