diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 65982ecdb810c..fa427d0d74bfc 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -202,6 +202,7 @@ Other enhancements - :class:`Holiday` has gained the constructor argument and field ``exclude_dates`` to exclude specific datetimes from a custom holiday calendar (:issue:`54382`) - :class:`Rolling` and :class:`Expanding` now support ``nunique`` (:issue:`26958`) - :class:`Rolling` and :class:`Expanding` now support aggregations ``first`` and ``last`` (:issue:`33155`) +- :func:`qcut` now accepts the ``right`` parameter, consistent with :func:`cut` (:issue:`63053`) - :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`) - :func:`to_numeric` on big integers converts to ``object`` datatype with python integers when not coercing. (:issue:`51295`) - :meth:`.DataFrameGroupBy.transform`, :meth:`.SeriesGroupBy.transform`, :meth:`.DataFrameGroupBy.agg`, :meth:`.SeriesGroupBy.agg`, :meth:`.SeriesGroupBy.apply`, :meth:`.DataFrameGroupBy.apply` now support ``kurt`` (:issue:`40139`) @@ -232,7 +233,6 @@ Other enhancements - Support reading Stata 102-format (Stata 1) dta files (:issue:`58978`) - Support reading Stata 110-format (Stata 7) dta files (:issue:`47176`) - Switched wheel upload to **PyPI Trusted Publishing** (OIDC) for release-tag pushes in ``wheels.yml``. (:issue:`61718`) -- .. --------------------------------------------------------------------------- .. _whatsnew_300.notable_bug_fixes: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b13da83084e5c..fc3870f2e235f 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -294,6 +294,7 @@ def qcut( x, q, labels=None, + right: bool = True, retbins: bool = False, precision: int = 3, duplicates: str = "raise", @@ -316,6 +317,11 @@ def qcut( Used as labels for the resulting bins. Must be of the same length as the resulting bins. If False, return only integer indicators of the bins. If True, raises an error. + right : bool, default True + Indicates whether `bins` includes the rightmost edge or not. If + ``right == True`` (the default), then the `bins` ``[1, 2, 3, 4]`` + indicate (1,2], (2,3], (3,4]. This argument is ignored when + `bins` is an IntervalIndex. retbins : bool, optional Whether to return the (bins, labels) or not. Can be useful if bins is given as a scalar. @@ -378,6 +384,7 @@ def qcut( x_idx, Index(bins), labels=labels, + right=right, precision=precision, include_lowest=True, duplicates=duplicates, diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index b6d45aeab8a7b..223c5612bfc55 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -44,6 +44,38 @@ def test_qcut(): tm.assert_categorical_equal(labels, ex_levels) +def test_qcut_right(): + arr = np.random.default_rng(2).standard_normal(1000) + + labels, _ = qcut(arr, 4, retbins=True, right=True) + ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + result = labels.categories.left.values + assert np.allclose(result, ex_bins[:-1], atol=1e-2) + + result = labels.categories.right.values + assert np.allclose(result, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True, right=True) + tm.assert_categorical_equal(labels, ex_levels) + + +def test_qcut_no_right(): + arr = np.random.default_rng(2).standard_normal(1000) + + labels, _ = qcut(arr, 4, retbins=True, right=False) + ex_bins = np.quantile(arr, [0, 0.25, 0.5, 0.75, 1.0]) + + lefts = labels.categories.left.values + assert np.allclose(lefts, ex_bins[:-1], atol=1e-2) + + rights = labels.categories.right.values + assert np.allclose(rights, ex_bins[1:], atol=1e-2) + + ex_levels = cut(arr, ex_bins, include_lowest=True, right=False) + tm.assert_categorical_equal(labels, ex_levels) + + def test_qcut_bounds(): arr = np.random.default_rng(2).standard_normal(1000)