Skip to content

Commit d264e20

Browse files
committed
Deprecate passing reduction kernels to groupby.agg
1 parent d9e34e7 commit d264e20

File tree

5 files changed

+132
-26
lines changed

5 files changed

+132
-26
lines changed

pandas/core/groupby/generic.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
Union,
2222
cast,
2323
)
24+
import warnings
2425

2526
import numpy as np
2627

@@ -32,6 +33,7 @@
3233
Substitution,
3334
doc,
3435
)
36+
from pandas.util._exceptions import find_stack_level
3537

3638
from pandas.core.dtypes.common import (
3739
ensure_int64,
@@ -62,6 +64,10 @@
6264
import pandas.core.common as com
6365
from pandas.core.frame import DataFrame
6466
from pandas.core.groupby import base
67+
from pandas.core.groupby.base import (
68+
reduction_kernels,
69+
transformation_kernels,
70+
)
6571
from pandas.core.groupby.groupby import (
6672
GroupBy,
6773
GroupByPlot,
@@ -326,6 +332,14 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
326332
kwargs = {}
327333

328334
if isinstance(func, str):
335+
if func not in reduction_kernels and not self._grouper._is_resample:
336+
meth = "transform" if func in transformation_kernels else "apply"
337+
warnings.warn(
338+
f"In the future, using the non-aggregation {func=} will raise a "
339+
f"ValueError, use this function with {type(self).__name__}.{meth}",
340+
category=DeprecationWarning,
341+
stacklevel=find_stack_level(),
342+
)
329343
if maybe_use_numba(engine) and engine is not None:
330344
# Not all agg functions support numba, only propagate numba kwargs
331345
# if user asks for numba, and engine is not None
@@ -1562,6 +1576,19 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs)
15621576
kwargs["engine"] = engine
15631577
kwargs["engine_kwargs"] = engine_kwargs
15641578

1579+
if (
1580+
isinstance(func, str)
1581+
and func not in reduction_kernels
1582+
and not self._grouper._is_resample
1583+
):
1584+
meth = "transform" if func in transformation_kernels else "apply"
1585+
warnings.warn(
1586+
f"In the future, using the non-aggregation {func=} will raise a "
1587+
f"ValueError, use this function with {type(self).__name__}.{meth}",
1588+
category=DeprecationWarning,
1589+
stacklevel=find_stack_level(),
1590+
)
1591+
15651592
op = GroupByApply(self, func, args=args, kwargs=kwargs)
15661593
result = op.agg()
15671594
if not is_dict_like(func) and result is not None:

pandas/tests/groupby/aggregate/test_aggregate.py

Lines changed: 29 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -48,28 +48,29 @@ def test_agg_regression1(tsframe):
4848

4949
def test_agg_must_agg(df):
5050
grouped = df.groupby("A")["C"]
51-
expected = pd.Series(
51+
expected = Series(
5252
{
5353
"bar": df[df.A == "bar"]["C"].describe(),
5454
"foo": df[df.A == "foo"]["C"].describe(),
5555
},
56-
index=pd.Index(["bar", "foo"], name="A"),
56+
index=Index(["bar", "foo"], name="A"),
5757
name="C",
5858
)
5959
result = grouped.agg(lambda x: x.describe())
6060
tm.assert_series_equal(result, expected)
6161

62-
expected = pd.Series(
62+
expected = Series(
6363
{
6464
"bar": df[df.A == "bar"]["C"].index[:2],
6565
"foo": df[df.A == "foo"]["C"].index[:2],
6666
},
67-
index=pd.Index(["bar", "foo"], name="A"),
67+
index=Index(["bar", "foo"], name="A"),
6868
name="C",
6969
)
7070
result = grouped.agg(lambda x: x.index[:2])
7171
tm.assert_series_equal(result, expected)
7272

73+
7374
def test_agg_ser_multi_key(df):
7475
f = lambda x: x.sum()
7576
results = df.C.groupby([df.A, df.B]).aggregate(f)
@@ -485,6 +486,9 @@ def test_groupby_agg_dict_dup_columns():
485486
tm.assert_frame_equal(result, expected)
486487

487488

489+
@pytest.mark.filterwarnings(
490+
"ignore:In the future, using the non-aggregation func:DeprecationWarning"
491+
)
488492
@pytest.mark.parametrize(
489493
"op",
490494
[
@@ -564,7 +568,9 @@ def test_order_aggregate_multiple_funcs():
564568
# GH 25692
565569
df = DataFrame({"A": [1, 1, 2, 2], "B": [1, 2, 3, 4]})
566570

567-
res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
571+
msg = "using the non-aggregation func='ohlc' will raise"
572+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
573+
res = df.groupby("A").agg(["sum", "max", "mean", "ohlc", "min"])
568574
result = res.columns.levels[1]
569575

570576
expected = Index(["sum", "max", "mean", "ohlc", "min"])
@@ -1377,9 +1383,14 @@ def test_nonagg_agg():
13771383
df = DataFrame({"a": [1, 1, 2, 2], "b": [1, 2, 2, 1]})
13781384
g = df.groupby("a")
13791385

1380-
result = g.agg(["cumsum"])
1386+
msg = "using the non-aggregation func='cumsum' will raise"
1387+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
1388+
result = g.agg(["cumsum"])
13811389
result.columns = result.columns.droplevel(-1)
1382-
expected = g.agg("cumsum")
1390+
1391+
msg = "using the non-aggregation func='cumsum' will raise"
1392+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
1393+
expected = g.agg("cumsum")
13831394

13841395
tm.assert_frame_equal(result, expected)
13851396

@@ -1450,8 +1461,12 @@ def test_groupby_agg_precision(any_real_numeric_dtype):
14501461
}
14511462
)
14521463

1453-
expected = DataFrame({"key3": [df["key3"]]},
1454-
index=pd.MultiIndex(levels=[["a"], ["b"]], codes=[[0], [0]], names=["key1", "key2"]))
1464+
expected = DataFrame(
1465+
{"key3": [df["key3"]]},
1466+
index=MultiIndex(
1467+
levels=[["a"], ["b"]], codes=[[0], [0]], names=["key1", "key2"]
1468+
),
1469+
)
14551470

14561471
result = df.groupby(["key1", "key2"]).agg(lambda x: x)
14571472
tm.assert_frame_equal(result, expected)
@@ -1547,10 +1562,8 @@ def test_agg_of_mode_list(test, values):
15471562
# Mode usually only returns 1 value, but can return a list in the case of a tie.
15481563

15491564
expected = DataFrame(
1550-
[
1551-
[df1[df1[0] == value][1].mode()] for value in values
1552-
],
1553-
index=pd.Index(values, name=0),
1565+
[[df1[df1[0] == value][1].mode()] for value in values],
1566+
index=Index(values, name=0),
15541567
columns=[1],
15551568
)
15561569

@@ -1659,7 +1672,9 @@ def test_groupby_agg_extension_timedelta_cumsum_with_named_aggregation():
16591672
}
16601673
)
16611674
gb = df.groupby("grps")
1662-
result = gb.agg(td=("td", "cumsum"))
1675+
msg = "using the non-aggregation func='cumsum' will raise"
1676+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
1677+
result = gb.agg(td=("td", "cumsum"))
16631678
tm.assert_frame_equal(result, expected)
16641679

16651680

pandas/tests/groupby/aggregate/test_cython.py

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
bdate_range,
2222
)
2323
import pandas._testing as tm
24+
from pandas.core.groupby.base import reduction_kernels
2425

2526

2627
@pytest.mark.parametrize(
@@ -287,8 +288,16 @@ def test_read_only_buffer_source_agg(agg):
287288
)
288289
df._mgr.arrays[0].flags.writeable = False
289290

290-
result = df.groupby(["species"]).agg({"sepal_length": agg})
291-
expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})
291+
if agg in reduction_kernels:
292+
warn = None
293+
msg = ""
294+
else:
295+
warn = DeprecationWarning
296+
msg = f"using the non-aggregation func='{agg}' will raise"
297+
with tm.assert_produces_warning(warn, match=msg):
298+
result = df.groupby(["species"]).agg({"sepal_length": agg})
299+
with tm.assert_produces_warning(warn, match=msg):
300+
expected = df.copy().groupby(["species"]).agg({"sepal_length": agg})
292301

293302
tm.assert_equal(result, expected)
294303

pandas/tests/groupby/test_groupby.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2467,7 +2467,7 @@ def test_by_column_values_with_same_starting_value(dtype):
24672467
result = df.groupby(["Name"]).agg(aggregate_details)
24682468
expected_result = DataFrame(
24692469
{
2470-
"Mood": [pd.Series(["happy", "sad"]), pd.Series(["happy"])],
2470+
"Mood": [Series(["happy", "sad"]), Series(["happy"])],
24712471
"Credit": [2500, 900],
24722472
"Name": ["Thomas", "Thomas John"],
24732473
}
@@ -2935,9 +2935,12 @@ def test_groupby_dropna_with_nunique_unique():
29352935
# GH#42016
29362936
df = [[1, 1, 1, "A"], [1, None, 1, "A"], [1, None, 2, "A"], [1, None, 3, "A"]]
29372937
df_dropna = DataFrame(df, columns=["a", "b", "c", "partner"])
2938-
result = df_dropna.groupby(["a", "b", "c"], dropna=False).agg(
2939-
{"partner": ["nunique", "unique"]}
2940-
)
2938+
2939+
msg = "using the non-aggregation func='unique' will raise"
2940+
with tm.assert_produces_warning(DeprecationWarning, match=msg):
2941+
result = df_dropna.groupby(["a", "b", "c"], dropna=False).agg(
2942+
{"partner": ["nunique", "unique"]}
2943+
)
29412944

29422945
index = MultiIndex.from_tuples(
29432946
[(1, 1.0, 1), (1, np.nan, 1), (1, np.nan, 2), (1, np.nan, 3)],

pandas/tests/groupby/test_raises.py

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
Series,
1616
)
1717
import pandas._testing as tm
18+
from pandas.core.groupby.base import reduction_kernels
1819
from pandas.tests.groupby import get_groupby_method_args
1920

2021

@@ -84,8 +85,10 @@ def df_with_cat_col():
8485
return df
8586

8687

87-
def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""):
88-
warn_klass = None if warn_msg == "" else FutureWarning
88+
def _call_and_check(
89+
klass, msg, how, gb, groupby_func, args, warn_msg="", warn_category=FutureWarning
90+
):
91+
warn_klass = None if warn_msg == "" else warn_category
8992
with tm.assert_produces_warning(warn_klass, match=warn_msg, check_stacklevel=False):
9093
if klass is None:
9194
if how == "method":
@@ -183,9 +186,23 @@ def test_groupby_raises_string(
183186
if groupby_func == "fillna":
184187
kind = "Series" if groupby_series else "DataFrame"
185188
warn_msg = f"{kind}GroupBy.fillna is deprecated"
189+
elif groupby_func not in reduction_kernels and how == "agg":
190+
warn_msg = (
191+
f"In the future, using the non-aggregation func='{groupby_func}' will "
192+
"raise a ValueError"
193+
)
186194
else:
187195
warn_msg = ""
188-
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
196+
_call_and_check(
197+
klass,
198+
msg,
199+
how,
200+
gb,
201+
groupby_func,
202+
args,
203+
warn_msg,
204+
warn_category=DeprecationWarning,
205+
)
189206

190207

191208
@pytest.mark.parametrize("how", ["agg", "transform"])
@@ -287,12 +304,30 @@ def test_groupby_raises_datetime(
287304

288305
if groupby_func in ["any", "all"]:
289306
warn_msg = f"'{groupby_func}' with datetime64 dtypes is deprecated"
307+
warn_category = FutureWarning
290308
elif groupby_func == "fillna":
291309
kind = "Series" if groupby_series else "DataFrame"
292310
warn_msg = f"{kind}GroupBy.fillna is deprecated"
311+
warn_category = FutureWarning
312+
elif groupby_func not in reduction_kernels and how == "agg":
313+
warn_msg = (
314+
f"In the future, using the non-aggregation func='{groupby_func}' will "
315+
"raise a ValueError"
316+
)
317+
warn_category = DeprecationWarning
293318
else:
294319
warn_msg = ""
295-
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=warn_msg)
320+
warn_category = FutureWarning
321+
_call_and_check(
322+
klass,
323+
msg,
324+
how,
325+
gb,
326+
groupby_func,
327+
args,
328+
warn_msg=warn_msg,
329+
warn_category=warn_category,
330+
)
296331

297332

298333
@pytest.mark.parametrize("how", ["agg", "transform"])
@@ -487,9 +522,19 @@ def test_groupby_raises_category(
487522
if groupby_func == "fillna":
488523
kind = "Series" if groupby_series else "DataFrame"
489524
warn_msg = f"{kind}GroupBy.fillna is deprecated"
525+
warn_category = FutureWarning
526+
elif groupby_func not in reduction_kernels and how == "agg":
527+
warn_msg = (
528+
f"In the future, using the non-aggregation func='{groupby_func}' "
529+
"will raise a ValueError"
530+
)
531+
warn_category = DeprecationWarning
490532
else:
491533
warn_msg = ""
492-
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
534+
warn_category = FutureWarning
535+
_call_and_check(
536+
klass, msg, how, gb, groupby_func, args, warn_msg, warn_category=warn_category
537+
)
493538

494539

495540
@pytest.mark.parametrize("how", ["agg", "transform"])
@@ -660,6 +705,13 @@ def test_groupby_raises_category_on_category(
660705
if groupby_func == "fillna":
661706
kind = "Series" if groupby_series else "DataFrame"
662707
warn_msg = f"{kind}GroupBy.fillna is deprecated"
708+
warn_category = FutureWarning
709+
elif groupby_func not in reduction_kernels and how == "agg":
710+
warn_msg = f"using the non-aggregation func='{groupby_func}' will raise"
711+
warn_category = DeprecationWarning
663712
else:
664713
warn_msg = ""
665-
_call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg)
714+
warn_category = FutureWarning
715+
_call_and_check(
716+
klass, msg, how, gb, groupby_func, args, warn_msg, warn_category=warn_category
717+
)

0 commit comments

Comments
 (0)