-
-
Notifications
You must be signed in to change notification settings - Fork 19.3k
BUG: Validate numeric_only parameter in groupby aggregations #62842
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
52e07b5
b2f18ef
9655a9a
6ef5696
5a72c79
8056bb8
63f8443
bb9b0ab
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -1754,44 +1754,48 @@ def _cython_agg_general( | |
| **kwargs, | ||
| ): | ||
| # Note: we never get here with how="ohlc" for DataFrameGroupBy; | ||
| # that goes through SeriesGroupBy | ||
| # that goes through SeriesGroupBy | ||
|
|
||
| data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how) | ||
| # Check to confirm numeric_only is fed either True or False and no other data type | ||
|
||
| if(isinstance(numeric_only, bool)): | ||
| data = self._get_data_to_aggregate(numeric_only=numeric_only, name=how) | ||
|
||
|
|
||
| def array_func(values: ArrayLike) -> ArrayLike: | ||
| try: | ||
| result = self._grouper._cython_operation( | ||
| "aggregate", | ||
| values, | ||
| how, | ||
| axis=data.ndim - 1, | ||
| min_count=min_count, | ||
| **kwargs, | ||
| ) | ||
| except NotImplementedError: | ||
| # generally if we have numeric_only=False | ||
| # and non-applicable functions | ||
| # try to python agg | ||
| # TODO: shouldn't min_count matter? | ||
| # TODO: avoid special casing SparseArray here | ||
| if how in ["any", "all"] and isinstance(values, SparseArray): | ||
| pass | ||
| elif alt is None or how in ["any", "all", "std", "sem"]: | ||
| raise # TODO: re-raise as TypeError? should not be reached | ||
| else: | ||
| return result | ||
| def array_func(values: ArrayLike) -> ArrayLike: | ||
| try: | ||
| result = self._grouper._cython_operation( | ||
| "aggregate", | ||
| values, | ||
| how, | ||
| axis=data.ndim - 1, | ||
| min_count=min_count, | ||
| **kwargs, | ||
| ) | ||
| except NotImplementedError: | ||
| # generally if we have numeric_only=False | ||
| # and non-applicable functions | ||
| # try to python agg | ||
| # TODO: shouldn't min_count matter? | ||
| # TODO: avoid special casing SparseArray here | ||
| if how in ["any", "all"] and isinstance(values, SparseArray): | ||
| pass | ||
| elif alt is None or how in ["any", "all", "std", "sem"]: | ||
| raise # TODO: re-raise as TypeError? should not be reached | ||
| else: | ||
| return result | ||
|
|
||
| assert alt is not None | ||
| result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt) | ||
| return result | ||
| assert alt is not None | ||
| result = self._agg_py_fallback(how, values, ndim=data.ndim, alt=alt) | ||
| return result | ||
|
|
||
| new_mgr = data.grouped_reduce(array_func) | ||
| res = self._wrap_agged_manager(new_mgr) | ||
| if how in ["idxmin", "idxmax"]: | ||
| # mypy expects how to be Literal["idxmin", "idxmax"]. | ||
| res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"]) # type: ignore[arg-type] | ||
| out = self._wrap_aggregated_output(res) | ||
| return out | ||
| new_mgr = data.grouped_reduce(array_func) | ||
| res = self._wrap_agged_manager(new_mgr) | ||
| if how in ["idxmin", "idxmax"]: | ||
| # mypy expects how to be Literal["idxmin", "idxmax"]. | ||
| res = self._wrap_idxmax_idxmin(res, how=how, skipna=kwargs["skipna"]) # type: ignore[arg-type] | ||
| out = self._wrap_aggregated_output(res) | ||
| return out | ||
| else: | ||
| raise ValueError("numeric_only accepts only Boolean values") | ||
|
|
||
| def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs): | ||
| raise AbstractMethodError(self) | ||
|
|
||
| Original file line number | Diff line number | Diff line change | ||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -1520,3 +1520,27 @@ def test_groupby_std_datetimelike(): | |||||||||||
| exp_ser = Series([td1 * 2, td1, td1, td1, td4], index=np.arange(5)) | ||||||||||||
| expected = DataFrame({"A": exp_ser, "B": exp_ser, "C": exp_ser}) | ||||||||||||
| tm.assert_frame_equal(result, expected) | ||||||||||||
|
|
||||||||||||
| def test_mean_numeric_only_validates_bool(): | ||||||||||||
| """ | ||||||||||||
| Test that numeric_only parameter only accepts boolean values. | ||||||||||||
| See GH#62778 | ||||||||||||
| """ | ||||||||||||
|
||||||||||||
| """ | |
| Test that numeric_only parameter only accepts boolean values. | |
| See GH#62778 | |
| """ | |
| # GH#62778 |
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you remove this comment, it repeats the code.
Outdated
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you remove these; the test suite has many tests for numeric_only being specified or not specified, these are not increasing our test coverage.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So should I remove this whole function from the test file or just these 3 lines of code?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Just these four lines.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@rhshadrach Made the suggested changes. I had made the changes in the comment inside groupby.py file but forgot to add it while doing git add, hence, it didn't show up in the changes here.
This version should be fine. Please let me know if any other changes are there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you revert this change.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Still needs to be reverted in order to minimize the diff.