Skip to content

Commit 477cc4f

Browse files
committed
ENH: Add future.python_scalars
1 parent ebbd16c commit 477cc4f

File tree

22 files changed

+247
-85
lines changed

22 files changed

+247
-85
lines changed

pandas/_config/__init__.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ def using_string_dtype() -> bool:
3535
return _mode_options["infer_string"]
3636

3737

38+
def using_python_scalars() -> bool:
39+
_mode_options = _global_config["future"]
40+
return _mode_options["python_scalars"]
41+
42+
3843
def is_nan_na() -> bool:
3944
_mode_options = _global_config["mode"]
4045
return _mode_options["nan_is_na"]

pandas/conftest.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2097,6 +2097,11 @@ def using_infer_string() -> bool:
20972097
return pd.options.future.infer_string is True
20982098

20992099

2100+
@pytest.fixture
2101+
def using_python_scalars() -> bool:
2102+
return pd.options.future.python_scalars is True
2103+
2104+
21002105
_warsaws: list[Any] = ["Europe/Warsaw", "dateutil/Europe/Warsaw"]
21012106
if pytz is not None:
21022107
_warsaws.append(pytz.timezone("Europe/Warsaw"))

pandas/core/arraylike.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pandas._libs import lib
1616
from pandas._libs.ops_dispatch import maybe_dispatch_ufunc_to_dunder_op
1717

18+
from pandas.core.dtypes.cast import maybe_unbox_numpy_scalar
1819
from pandas.core.dtypes.generic import ABCNDFrame
1920

2021
from pandas.core import roperator
@@ -529,4 +530,6 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar
529530

530531
# By default, numpy's reductions do not skip NaNs, so we have to
531532
# pass skipna=False
532-
return getattr(self, method_name)(skipna=False, **kwargs)
533+
result = getattr(self, method_name)(skipna=False, **kwargs)
534+
result = maybe_unbox_numpy_scalar(result)
535+
return result

pandas/core/arrays/masked.py

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,10 @@
1212

1313
import numpy as np
1414

15-
from pandas._config import is_nan_na
15+
from pandas._config import (
16+
is_nan_na,
17+
using_python_scalars,
18+
)
1619

1720
from pandas._libs import (
1821
algos as libalgos,
@@ -27,7 +30,10 @@
2730
from pandas.errors import AbstractMethodError
2831

2932
from pandas.core.dtypes.base import ExtensionDtype
30-
from pandas.core.dtypes.cast import maybe_downcast_to_dtype
33+
from pandas.core.dtypes.cast import (
34+
maybe_downcast_to_dtype,
35+
maybe_unbox_numpy_scalar,
36+
)
3137
from pandas.core.dtypes.common import (
3238
is_bool,
3339
is_integer_dtype,
@@ -1518,7 +1524,10 @@ def _reduce(
15181524
if isna(result):
15191525
return self._wrap_na_result(name=name, axis=0, mask_size=(1,))
15201526
else:
1521-
result = result.reshape(1)
1527+
if using_python_scalars():
1528+
result = np.array([result])
1529+
else:
1530+
result = result.reshape(1)
15221531
mask = np.zeros(1, dtype=bool)
15231532
return self._maybe_mask_result(result, mask)
15241533

@@ -1742,7 +1751,7 @@ def any(
17421751

17431752
values = self._data.copy()
17441753
np.putmask(values, self._mask, self.dtype._falsey_value)
1745-
result = values.any()
1754+
result = maybe_unbox_numpy_scalar(values.any())
17461755
if skipna:
17471756
return result
17481757
else:
@@ -1828,7 +1837,7 @@ def all(
18281837

18291838
values = self._data.copy()
18301839
np.putmask(values, self._mask, self.dtype._truthy_value)
1831-
result = values.all(axis=axis)
1840+
result = maybe_unbox_numpy_scalar(values.all(axis=axis))
18321841

18331842
if skipna:
18341843
return result # type: ignore[return-value]

pandas/core/config_init.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -900,5 +900,12 @@ def register_converter_cb(key: str) -> None:
900900
validator=is_one_of_factory([True, False]),
901901
)
902902

903+
cf.register_option(
904+
"python_scalars",
905+
False if os.environ.get("PANDAS_FUTURE_PYTHON_SCALARS", "0") == "0" else True,
906+
"Whether to return Python scalars instead of NumPy or PyArrow scalars",
907+
validator=is_one_of_factory([True, False]),
908+
)
909+
903910
# GH#59502
904911
cf.deprecate_option("future.no_silent_downcasting", Pandas4Warning)

pandas/core/dtypes/cast.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
from pandas._config import (
2222
is_nan_na,
23+
using_python_scalars,
2324
using_string_dtype,
2425
)
2526

@@ -1434,6 +1435,16 @@ def construct_1d_arraylike_from_scalar(
14341435
return subarr
14351436

14361437

1438+
def maybe_unbox_numpy_scalar(value):
1439+
result = value
1440+
if using_python_scalars() and isinstance(value, np.generic):
1441+
if isinstance(result, np.longdouble):
1442+
result = float(result)
1443+
else:
1444+
result = value.item()
1445+
return result
1446+
1447+
14371448
def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
14381449
# Caller is responsible for checking dtype.kind in "mM"
14391450

pandas/core/indexes/base.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@
9292
common_dtype_categorical_compat,
9393
find_result_type,
9494
infer_dtype_from,
95+
maybe_unbox_numpy_scalar,
9596
np_can_hold_element,
9697
)
9798
from pandas.core.dtypes.common import (
@@ -7532,7 +7533,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
75327533
# quick check
75337534
first = self[0]
75347535
if not isna(first):
7535-
return first
7536+
return maybe_unbox_numpy_scalar(first)
75367537

75377538
if not self._is_multi and self.hasnans:
75387539
# Take advantage of cache
@@ -7543,7 +7544,7 @@ def min(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
75437544
if not self._is_multi and not isinstance(self._values, np.ndarray):
75447545
return self._values._reduce(name="min", skipna=skipna)
75457546

7546-
return nanops.nanmin(self._values, skipna=skipna)
7547+
return maybe_unbox_numpy_scalar(nanops.nanmin(self._values, skipna=skipna))
75477548

75487549
def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs):
75497550
"""
@@ -7596,18 +7597,18 @@ def max(self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs)
75967597
# quick check
75977598
last = self[-1]
75987599
if not isna(last):
7599-
return last
7600+
return maybe_unbox_numpy_scalar(last)
76007601

76017602
if not self._is_multi and self.hasnans:
76027603
# Take advantage of cache
76037604
mask = self._isnan
76047605
if not skipna or mask.all():
7605-
return self._na_value
7606+
return maybe_unbox_numpy_scalar(self._na_value)
76067607

76077608
if not self._is_multi and not isinstance(self._values, np.ndarray):
76087609
return self._values._reduce(name="max", skipna=skipna)
76097610

7610-
return nanops.nanmax(self._values, skipna=skipna)
7611+
return maybe_unbox_numpy_scalar(nanops.nanmax(self._values, skipna=skipna))
76117612

76127613
# --------------------------------------------------------------------
76137614

pandas/core/interchange/column.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import numpy as np
99

10+
from pandas._config import using_python_scalars
11+
1012
from pandas._libs.lib import infer_dtype
1113
from pandas._libs.tslibs import iNaT
1214
from pandas.errors import NoBufferPresent
@@ -232,7 +234,10 @@ def null_count(self) -> int:
232234
"""
233235
Number of null elements. Should always be known.
234236
"""
235-
return self._col.isna().sum().item()
237+
result = self._col.isna().sum()
238+
if not using_python_scalars():
239+
result = result.item()
240+
return result
236241

237242
@property
238243
def metadata(self) -> dict[str, pd.Index]:

pandas/core/series.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
find_common_type,
7474
infer_dtype_from,
7575
maybe_box_native,
76+
maybe_unbox_numpy_scalar,
7677
)
7778
from pandas.core.dtypes.common import (
7879
is_dict_like,
@@ -2014,7 +2015,7 @@ def count(self) -> int:
20142015
>>> s.count()
20152016
2
20162017
"""
2017-
return notna(self._values).sum().astype("int64")
2018+
return maybe_unbox_numpy_scalar(notna(self._values).sum().astype("int64"))
20182019

20192020
def mode(self, dropna: bool = True) -> Series:
20202021
"""
@@ -7356,7 +7357,7 @@ def _reduce(
73567357

73577358
if isinstance(delegate, ExtensionArray):
73587359
# dispatch to ExtensionArray interface
7359-
return delegate._reduce(name, skipna=skipna, **kwds)
7360+
result = delegate._reduce(name, skipna=skipna, **kwds)
73607361

73617362
else:
73627363
# dispatch to numpy arrays
@@ -7370,7 +7371,10 @@ def _reduce(
73707371
f"Series.{name} does not allow {kwd_name}={numeric_only} "
73717372
"with non-numeric dtypes."
73727373
)
7373-
return op(delegate, skipna=skipna, **kwds)
7374+
result = op(delegate, skipna=skipna, **kwds)
7375+
7376+
result = maybe_unbox_numpy_scalar(result)
7377+
return result
73747378

73757379
@Appender(make_doc("any", ndim=1))
73767380
# error: Signature of "any" incompatible with supertype "NDFrame"

pandas/tests/arrays/boolean/test_reduction.py

Lines changed: 28 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,15 @@ def data():
2525
([False, False], False, False, False, False),
2626
],
2727
)
28-
def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
28+
def test_any_all(
29+
values, exp_any, exp_all, exp_any_noskip, exp_all_noskip, using_python_scalars
30+
):
2931
# the methods return numpy scalars
30-
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
31-
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
32-
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
33-
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
32+
if not using_python_scalars:
33+
exp_any = pd.NA if exp_any is pd.NA else np.bool_(exp_any)
34+
exp_all = pd.NA if exp_all is pd.NA else np.bool_(exp_all)
35+
exp_any_noskip = pd.NA if exp_any_noskip is pd.NA else np.bool_(exp_any_noskip)
36+
exp_all_noskip = pd.NA if exp_all_noskip is pd.NA else np.bool_(exp_all_noskip)
3437

3538
for con in [pd.array, pd.Series]:
3639
a = con(values, dtype="boolean")
@@ -39,23 +42,30 @@ def test_any_all(values, exp_any, exp_all, exp_any_noskip, exp_all_noskip):
3942
assert a.any(skipna=False) is exp_any_noskip
4043
assert a.all(skipna=False) is exp_all_noskip
4144

42-
assert np.any(a.any()) is exp_any
43-
assert np.all(a.all()) is exp_all
4445

45-
46-
def test_reductions_return_types(dropna, data, all_numeric_reductions):
46+
def test_reductions_return_types(
47+
dropna, data, all_numeric_reductions, using_python_scalars
48+
):
4749
op = all_numeric_reductions
4850
s = pd.Series(data)
4951
if dropna:
5052
s = s.dropna()
5153

52-
if op in ("sum", "prod"):
53-
assert isinstance(getattr(s, op)(), np.int_)
54-
elif op == "count":
55-
# Oddly on the 32 bit build (but not Windows), this is intc (!= intp)
56-
assert isinstance(getattr(s, op)(), np.integer)
57-
elif op in ("min", "max"):
58-
assert isinstance(getattr(s, op)(), np.bool_)
54+
if using_python_scalars:
55+
expected = {
56+
"sum": int,
57+
"prod": int,
58+
"count": int,
59+
"min": bool,
60+
"max": bool,
61+
}.get(op, float)
5962
else:
60-
# "mean", "std", "var", "median", "kurt", "skew"
61-
assert isinstance(getattr(s, op)(), np.float64)
63+
expected = {
64+
"sum": np.int_,
65+
"prod": np.int_,
66+
"count": np.integer,
67+
"min": np.bool_,
68+
"max": np.bool_,
69+
}.get(op, np.float64)
70+
result = getattr(s, op)()
71+
assert isinstance(result, expected), f"{type(result)} vs {expected}"

0 commit comments

Comments
 (0)