Skip to content
72 changes: 38 additions & 34 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1754,44 +1754,48 @@ def _cython_agg_general(
**kwargs,
):
# Note: we never get here with how="ohlc" for DataFrameGroupBy;
# that goes through SeriesGroupBy
# that goes through SeriesGroupBy
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you revert this change.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still needs to be reverted in order to minimize the diff.


data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)
# Check to confirm numeric_only is fed either True or False and no other data type
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment repeats the code, can you remove it.

if(isinstance(numeric_only, bool)):
data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change negate this condition and raise here; then the rest of this function can go untouched which makes the diff smaller. It also decreases the amount of indentation needed, improving readability.

Also use is_bool from pandsa.core.dtypes.common. We should accept e.g. np.bool here.


def array_func(values: ArrayLike) -> ArrayLike:
try:
result = self._grouper._cython_operation(
"aggregate",
values,
how,
axis=data.ndim - 1,
min_count=min_count,
**kwargs,
)
except NotImplementedError:
# generally if we have numeric_only=False
# and non-applicable functions
# try to python agg
# TODO: shouldn't min_count matter?
# TODO: avoid special casing SparseArray here
if how in ["any", "all"] and isinstance(values, SparseArray):
pass
elif alt is None or how in ["any", "all", "std", "sem"]:
raise # TODO: re-raise as TypeError? should not be reached
else:
return result
def array_func(values: ArrayLike) -> ArrayLike:
try:
result = self._grouper._cython_operation(
"aggregate",
values,
how,
axis=data.ndim - 1,
min_count=min_count,
**kwargs,
)
except NotImplementedError:
# generally if we have numeric_only=False
# and non-applicable functions
# try to python agg
# TODO: shouldn't min_count matter?
# TODO: avoid special casing SparseArray here
if how in ["any", "all"] and isinstance(values, SparseArray):
pass
elif alt is None or how in ["any", "all", "std", "sem"]:
raise # TODO: re-raise as TypeError? should not be reached
else:
return result

assert alt is not None
result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
return result
assert alt is not None
result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt)
return result

new_mgr = data.grouped_reduce(array_func)
res = self._wrap_agged_manager(new_mgr)
if how in ["idxmin", "idxmax"]:
# mypy expects how to be Literal["idxmin", "idxmax"].
res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"]) # type: ignore[arg-type]
out = self._wrap_aggregated_output(res)
return out
new_mgr = data.grouped_reduce(array_func)
res = self._wrap_agged_manager(new_mgr)
if how in ["idxmin", "idxmax"]:
# mypy expects how to be Literal["idxmin", "idxmax"].
res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"]) # type: ignore[arg-type]
out = self._wrap_aggregated_output(res)
return out
else:
raise ValueError("numeric_only accepts only Boolean values")

def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs):
raise AbstractMethodError(self)
Expand Down
24 changes: 24 additions & 0 deletions pandas/tests/groupby/test_reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1520,3 +1520,27 @@ def test_groupby_std_datetimelike():
exp_ser = Series([td1 * 2, td1, td1, td1, td4], index=np.arange(5))
expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser})
tm.assert_frame_equal(result, expected)

def test_mean_numeric_only_validates_bool():
"""
Test that numeric_only parameter only accepts boolean values.
See GH#62778
"""
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"""
Test that numeric_only parameter only accepts boolean values.
See GH#62778
"""
# GH#62778

df = pd.DataFrame({"A": range(5), "B": range(5)})

# These test cases should raise a ValueError
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remove this comment, it repeats the code.

msg = "numeric_only accepts only Boolean values"
with pytest.raises(ValueError, match=msg):
df.groupby(["A"]).mean(["B"])

with pytest.raises(ValueError, match=msg):
df.groupby(["A"]).mean(numeric_only="True")

with pytest.raises(ValueError, match=msg):
df.groupby(["A"]).mean(numeric_only=1)

# These test cases should work absolutely fine
df.groupby(["A"]).mean()
df.groupby(["A"]).mean(numeric_only=True)
df.groupby(["A"]).mean(numeric_only=False)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you remove these; the test suite has many tests for numeric_only being specified or not specified, these are not increasing our test coverage.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So should I remove this whole function from the test file or just these 3 lines of code?

Copy link
Member

@rhshadrach rhshadrach Oct 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just these four lines.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rhshadrach Made the suggested changes. I had made the changes in the comment inside groupby.py file but forgot to add it while doing git add, hence, it didn't show up in the changes here.

This version should be fine. Please let me know if any other changes are there.


Loading