Skip to content

Commit 93b3eb6

Browse files
committed
DEPR: automatic upcasting in reindex
1 parent f33207b commit 93b3eb6

File tree

6 files changed

+115
-23
lines changed

6 files changed

+115
-23
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -740,6 +740,7 @@ Other Deprecations
740740
- Deprecated backward-compatibility behavior for :meth:`DataFrame.select_dtypes` matching "str" dtype when ``np.object_`` is specified (:issue:`61916`)
741741
- Deprecated option "future.no_silent_downcasting", as it is no longer used. In a future version accessing this option will raise (:issue:`59502`)
742742
- Deprecated silent casting of non-datetime 'other' to datetime in :meth:`Series.combine_first` (:issue:`62931`)
743+
- Deprecated reindexing with a ``fill_value`` that cannot be held by the original object's dtype; explicitly cast before reindexing instead (:issue:`53910`)
743744
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
744745
- Deprecated support for the Dataframe Interchange Protocol (:issue:`56732`)
745746
- Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`)

pandas/core/array_algos/take.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,16 @@
66
cast,
77
overload,
88
)
9+
import warnings
910

1011
import numpy as np
1112

1213
from pandas._libs import (
1314
algos as libalgos,
1415
lib,
1516
)
17+
from pandas.errors import Pandas4Warning
18+
from pandas.util._exceptions import find_stack_level
1619

1720
from pandas.core.dtypes.cast import maybe_promote
1821
from pandas.core.dtypes.common import (
@@ -94,6 +97,15 @@ def take_nd(
9497
fill_value = na_value_for_dtype(arr.dtype, compat=False)
9598
elif lib.is_np_dtype(arr.dtype, "mM"):
9699
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
100+
if dtype != arr.dtype:
101+
# GH#53910
102+
warnings.warn(
103+
"reindexing with a fill_value that cannot be held by the "
104+
"original dtype is deprecated. Explicitly cast to a common "
105+
f"dtype (in this case {dtype}) instead.",
106+
Pandas4Warning,
107+
stacklevel=find_stack_level(),
108+
)
97109
if arr.dtype != dtype:
98110
# EA.take is strict about returning a new object of the same type
99111
# so for that case cast upfront
@@ -185,6 +197,10 @@ def take_2d_multi(
185197
indexer = row_idx, col_idx
186198
mask_info = None
187199

200+
if lib.is_float(fill_value) and fill_value.is_integer():
201+
# Avoid warning if possible
202+
fill_value = int(fill_value)
203+
188204
# check for promotion based on types only (do this first because
189205
# it's faster than computing a mask)
190206
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
@@ -202,6 +218,20 @@ def take_2d_multi(
202218
# to crash when trying to cast it to dtype)
203219
dtype, fill_value = arr.dtype, arr.dtype.type()
204220

221+
if dtype != arr.dtype and not (
222+
arr.dtype.kind in "iub"
223+
and lib.is_float(fill_value)
224+
and np.isnan(fill_value)
225+
):
226+
# GH#53910
227+
warnings.warn(
228+
"reindexing with a fill_value that cannot be held by the "
229+
"original dtype is deprecated. Explicitly cast to a common "
230+
f"dtype (in this case {dtype}) instead.",
231+
Pandas4Warning,
232+
stacklevel=find_stack_level(),
233+
)
234+
205235
# at this point, it's guaranteed that dtype can hold both the arr values
206236
# and the fill_value
207237
out_shape = len(row_idx), len(col_idx)
@@ -528,8 +558,20 @@ def _take_preprocess_indexer_and_fill_value(
528558
else:
529559
# check for promotion based on types only (do this first because
530560
# it's faster than computing a mask)
561+
if lib.is_float(fill_value) and fill_value.is_integer():
562+
# Avoid warning if possible
563+
fill_value = int(fill_value)
531564
dtype, fill_value = maybe_promote(arr.dtype, fill_value)
532565
if dtype != arr.dtype:
566+
if not (lib.is_float(fill_value) and np.isnan(fill_value)):
567+
# GH#53910
568+
warnings.warn(
569+
"reindexing with a fill_value that cannot be held by the "
570+
"original dtype is deprecated. Explicitly cast to a common "
571+
f"dtype (in this case {dtype}) instead.",
572+
Pandas4Warning,
573+
stacklevel=find_stack_level(),
574+
)
533575
# check if promotion is actually required based on indexer
534576
if mask is not None:
535577
needs_masking = True

pandas/core/indexes/base.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
from pandas.errors import (
7070
DuplicateLabelError,
7171
InvalidIndexError,
72+
Pandas4Warning,
7273
)
7374
from pandas.util._decorators import (
7475
Appender,
@@ -7888,11 +7889,17 @@ def get_values_for_csv(
78887889
"""
78897890
if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":
78907891
# GH#40754 Convert categorical datetimes to datetime array
7891-
values = algos.take_nd(
7892-
values.categories._values,
7893-
ensure_platform_int(values._codes),
7894-
fill_value=na_rep,
7895-
)
7892+
with warnings.catch_warnings():
7893+
warnings.filterwarnings(
7894+
"ignore",
7895+
"reindexing with a fill_value that cannot be held",
7896+
Pandas4Warning,
7897+
)
7898+
values = algos.take_nd(
7899+
values.categories._values,
7900+
ensure_platform_int(values._codes),
7901+
fill_value=na_rep,
7902+
)
78967903

78977904
values = ensure_wrapped_if_datetimelike(values)
78987905

pandas/tests/frame/methods/test_reindex.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,10 @@ def test_reindex_date_fill_value(self):
168168
ts = df.iloc[0, 0]
169169
fv = ts.date()
170170

171-
res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)
171+
msg = "reindexing with a fill_value that cannot be held"
172+
173+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
174+
res = df.reindex(index=range(4), columns=["A", "B", "C"], fill_value=fv)
172175

173176
expected = DataFrame(
174177
{"A": df["A"].tolist() + [fv], "B": df["B"].tolist() + [fv], "C": [fv] * 4},
@@ -177,7 +180,8 @@ def test_reindex_date_fill_value(self):
177180
tm.assert_frame_equal(res, expected)
178181

179182
# only reindexing rows
180-
res = df.reindex(index=range(4), fill_value=fv)
183+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
184+
res = df.reindex(index=range(4), fill_value=fv)
181185
tm.assert_frame_equal(res, expected[["A", "B"]])
182186

183187
# same with a datetime-castable str
@@ -796,7 +800,9 @@ def test_reindex_fill_value(self):
796800

797801
# other dtypes
798802
df["foo"] = "foo"
799-
result = df.reindex(range(15), fill_value="0")
803+
msg = "reindexing with a fill_value that cannot be held"
804+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
805+
result = df.reindex(range(15), fill_value="0")
800806
expected = df.reindex(range(15)).fillna("0")
801807
tm.assert_frame_equal(result, expected)
802808

@@ -1227,7 +1233,9 @@ def test_reindex_datetimelike_to_object(self, dtype):
12271233
index = df.index.append(Index([1]))
12281234
columns = df.columns.append(Index(["foo"]))
12291235

1230-
res = df.reindex(index=index, columns=columns, fill_value=fv)
1236+
msg = "reindexing with a fill_value that cannot be held"
1237+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
1238+
res = df.reindex(index=index, columns=columns, fill_value=fv)
12311239

12321240
expected = DataFrame(
12331241
{

pandas/tests/series/methods/test_reindex.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import numpy as np
22
import pytest
33

4+
from pandas.errors import Pandas4Warning
5+
46
from pandas import (
57
NA,
68
Categorical,
@@ -314,7 +316,9 @@ def test_reindex_fill_value_datetimelike_upcast(dtype, fill_value):
314316

315317
ser = Series([NaT], dtype=dtype)
316318

317-
result = ser.reindex([0, 1], fill_value=fill_value)
319+
msg = "reindexing with a fill_value that cannot be held"
320+
with tm.assert_produces_warning(Pandas4Warning, match=msg):
321+
result = ser.reindex([0, 1], fill_value=fill_value)
318322
expected = Series([NaT, fill_value], index=range(2), dtype=object)
319323
tm.assert_series_equal(result, expected)
320324

pandas/tests/test_take.py

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import pytest
55

66
from pandas._libs import iNaT
7+
from pandas.errors import Pandas4Warning
78

89
from pandas import array
910
import pandas._testing as tm
@@ -15,7 +16,7 @@
1516
(np.int8, np.int16(127), np.int8),
1617
(np.int8, np.int16(128), np.int16),
1718
(np.int32, 1, np.int32),
18-
(np.int32, 2.0, np.float64),
19+
(np.int32, 2.0, np.int32),
1920
(np.int32, 3.0 + 4.0j, np.complex128),
2021
(np.int32, True, np.object_),
2122
(np.int32, "", np.object_),
@@ -43,75 +44,104 @@ def dtype_fill_out_dtype(request):
4344
class TestTake:
4445
def test_1d_fill_nonna(self, dtype_fill_out_dtype):
4546
dtype, fill_value, out_dtype = dtype_fill_out_dtype
47+
48+
warn = None
49+
if out_dtype != dtype:
50+
warn = Pandas4Warning
51+
msg = "reindexing with a fill_value that cannot be held"
52+
4653
data = np.random.default_rng(2).integers(0, 2, 4).astype(dtype)
4754
indexer = [2, 1, 0, -1]
4855

49-
result = algos.take_nd(data, indexer, fill_value=fill_value)
56+
with tm.assert_produces_warning(warn, match=msg):
57+
result = algos.take_nd(data, indexer, fill_value=fill_value)
5058
assert (result[[0, 1, 2]] == data[[2, 1, 0]]).all()
5159
assert result[3] == fill_value
5260
assert result.dtype == out_dtype
5361

5462
indexer = [2, 1, 0, 1]
5563

56-
result = algos.take_nd(data, indexer, fill_value=fill_value)
64+
with tm.assert_produces_warning(warn, match=msg):
65+
result = algos.take_nd(data, indexer, fill_value=fill_value)
5766
assert (result[[0, 1, 2, 3]] == data[indexer]).all()
5867
assert result.dtype == dtype
5968

6069
def test_2d_fill_nonna(self, dtype_fill_out_dtype):
6170
dtype, fill_value, out_dtype = dtype_fill_out_dtype
71+
72+
warn = None
73+
if out_dtype != dtype:
74+
warn = Pandas4Warning
75+
msg = "reindexing with a fill_value that cannot be held"
76+
6277
data = np.random.default_rng(2).integers(0, 2, (5, 3)).astype(dtype)
6378
indexer = [2, 1, 0, -1]
6479

65-
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
80+
with tm.assert_produces_warning(warn, match=msg):
81+
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
6682
assert (result[[0, 1, 2], :] == data[[2, 1, 0], :]).all()
6783
assert (result[3, :] == fill_value).all()
6884
assert result.dtype == out_dtype
6985

70-
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
86+
with tm.assert_produces_warning(warn, match=msg):
87+
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
7188
assert (result[:, [0, 1, 2]] == data[:, [2, 1, 0]]).all()
7289
assert (result[:, 3] == fill_value).all()
7390
assert result.dtype == out_dtype
7491

7592
indexer = [2, 1, 0, 1]
76-
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
93+
with tm.assert_produces_warning(warn, match=msg):
94+
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
7795
assert (result[[0, 1, 2, 3], :] == data[indexer, :]).all()
7896
assert result.dtype == dtype
7997

80-
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
98+
with tm.assert_produces_warning(warn, match=msg):
99+
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
81100
assert (result[:, [0, 1, 2, 3]] == data[:, indexer]).all()
82101
assert result.dtype == dtype
83102

84103
def test_3d_fill_nonna(self, dtype_fill_out_dtype):
85104
dtype, fill_value, out_dtype = dtype_fill_out_dtype
86105

106+
warn = None
107+
if out_dtype != dtype:
108+
warn = Pandas4Warning
109+
msg = "reindexing with a fill_value that cannot be held"
110+
87111
data = np.random.default_rng(2).integers(0, 2, (5, 4, 3)).astype(dtype)
88112
indexer = [2, 1, 0, -1]
89113

90-
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
114+
with tm.assert_produces_warning(warn, match=msg):
115+
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
91116
assert (result[[0, 1, 2], :, :] == data[[2, 1, 0], :, :]).all()
92117
assert (result[3, :, :] == fill_value).all()
93118
assert result.dtype == out_dtype
94119

95-
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
120+
with tm.assert_produces_warning(warn, match=msg):
121+
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
96122
assert (result[:, [0, 1, 2], :] == data[:, [2, 1, 0], :]).all()
97123
assert (result[:, 3, :] == fill_value).all()
98124
assert result.dtype == out_dtype
99125

100-
result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
126+
with tm.assert_produces_warning(warn, match=msg):
127+
result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
101128
assert (result[:, :, [0, 1, 2]] == data[:, :, [2, 1, 0]]).all()
102129
assert (result[:, :, 3] == fill_value).all()
103130
assert result.dtype == out_dtype
104131

105132
indexer = [2, 1, 0, 1]
106-
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
133+
with tm.assert_produces_warning(warn, match=msg):
134+
result = algos.take_nd(data, indexer, axis=0, fill_value=fill_value)
107135
assert (result[[0, 1, 2, 3], :, :] == data[indexer, :, :]).all()
108136
assert result.dtype == dtype
109137

110-
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
138+
with tm.assert_produces_warning(warn, match=msg):
139+
result = algos.take_nd(data, indexer, axis=1, fill_value=fill_value)
111140
assert (result[:, [0, 1, 2, 3], :] == data[:, indexer, :]).all()
112141
assert result.dtype == dtype
113142

114-
result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
143+
with tm.assert_produces_warning(warn, match=msg):
144+
result = algos.take_nd(data, indexer, axis=2, fill_value=fill_value)
115145
assert (result[:, :, [0, 1, 2, 3]] == data[:, :, indexer]).all()
116146
assert result.dtype == dtype
117147

0 commit comments

Comments
 (0)