Skip to content

Commit 55a9698

Browse files
BUG: Fixed assign failure when with Copy-on-Write (pandas-dev#60941)
Co-authored-by: Joris Van den Bossche <jorisvandenbossche@gmail.com>
1 parent fd40f9a commit 55a9698

File tree

7 files changed

+190
-26
lines changed

7 files changed

+190
-26
lines changed

doc/source/whatsnew/v2.3.3.rst

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@ Bug fixes
2727
with a compiled regex and custom flags (:issue:`62240`)
2828
- Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
2929

30+
31+
Improvements and fixes for Copy-on-Write
32+
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
33+
34+
Bug fixes
35+
^^^^^^^^^
36+
37+
- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`)
38+
39+
3040
.. ---------------------------------------------------------------------------
3141
.. _whatsnew_233.contributors:
3242

pandas/core/internals/managers.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,27 @@ def setitem(self, indexer, value, warn: bool = True) -> Self:
405405
self._iset_split_block( # type: ignore[attr-defined]
406406
0, blk_loc, values
407407
)
408-
# first block equals values
409-
self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value)
408+
409+
indexer = list(indexer)
410+
# first block equals values we are setting to -> set to all columns
411+
if lib.is_integer(indexer[1]):
412+
col_indexer = 0
413+
elif len(blk_loc) > 1:
414+
col_indexer = slice(None) # type: ignore[assignment]
415+
else:
416+
col_indexer = np.arange(len(blk_loc)) # type: ignore[assignment]
417+
indexer[1] = col_indexer
418+
419+
row_indexer = indexer[0]
420+
if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2:
421+
# numpy cannot handle a 2d indexer in combo with a slice
422+
row_indexer = np.squeeze(row_indexer, axis=1)
423+
if isinstance(row_indexer, np.ndarray) and len(row_indexer) == 0:
424+
# numpy does not like empty indexer combined with slice
425+
# and we are setting nothing anyway
426+
return self
427+
indexer[0] = row_indexer
428+
self.blocks[0].setitem(tuple(indexer), value)
410429
return self
411430
# No need to split if we either set all columns or on a single block
412431
# manager

pandas/tests/frame/indexing/test_indexing.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1500,13 +1500,17 @@ def test_set_2d_casting_date_to_int(self, col, indexer):
15001500
)
15011501
tm.assert_frame_equal(df, expected)
15021502

1503+
@pytest.mark.parametrize("has_ref", [True, False])
15031504
@pytest.mark.parametrize("col", [{}, {"name": "a"}])
1504-
def test_loc_setitem_reordering_with_all_true_indexer(self, col):
1505+
def test_loc_setitem_reordering_with_all_true_indexer(self, col, has_ref):
15051506
# GH#48701
15061507
n = 17
15071508
df = DataFrame({**col, "x": range(n), "y": range(n)})
1509+
value = df[["x", "y"]].copy()
15081510
expected = df.copy()
1509-
df.loc[n * [True], ["x", "y"]] = df[["x", "y"]]
1511+
if has_ref:
1512+
view = df[:] # noqa: F841
1513+
df.loc[n * [True], ["x", "y"]] = value
15101514
tm.assert_frame_equal(df, expected)
15111515

15121516
def test_loc_rhs_empty_warning(self):

pandas/tests/frame/indexing/test_setitem.py

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1400,6 +1400,75 @@ def test_frame_setitem_empty_dataframe(self):
14001400
)
14011401
tm.assert_frame_equal(df, expected)
14021402

1403+
def test_iloc_setitem_view_2dblock(self):
1404+
# https://github.com/pandas-dev/pandas/issues/60309
1405+
df_parent = DataFrame(
1406+
{
1407+
"A": [1, 4, 1, 5],
1408+
"B": [2, 5, 2, 6],
1409+
"C": [3, 6, 1, 7],
1410+
"D": [8, 9, 10, 11],
1411+
}
1412+
)
1413+
df_orig = df_parent.copy()
1414+
df = df_parent[["B", "C"]]
1415+
1416+
# Perform the iloc operation
1417+
df.iloc[[1, 3], :] = [[2, 2], [2, 2]]
1418+
1419+
# Check that original DataFrame is unchanged
1420+
tm.assert_frame_equal(df_parent, df_orig)
1421+
1422+
# Check that df is modified correctly
1423+
expected = DataFrame({"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index)
1424+
tm.assert_frame_equal(df, expected)
1425+
1426+
# with setting to subset of columns
1427+
df = df_parent[["B", "C", "D"]]
1428+
df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]]
1429+
tm.assert_frame_equal(df_parent, df_orig)
1430+
expected = DataFrame(
1431+
{"B": [2, 2, 2, 2], "C": [3, 6, 1, 7], "D": [8, 2, 10, 2]}, index=df.index
1432+
)
1433+
tm.assert_frame_equal(df, expected)
1434+
1435+
@pytest.mark.parametrize(
1436+
"indexer, value",
1437+
[
1438+
(([0, 2], slice(None)), [[2, 2, 2, 2], [2, 2, 2, 2]]),
1439+
((slice(None), slice(None)), 2),
1440+
((0, [1, 3]), [2, 2]),
1441+
(([0], 1), [2]),
1442+
(([0], np.int64(1)), [2]),
1443+
((slice(None), np.int64(1)), [2, 2, 2]),
1444+
((slice(None, 2), np.int64(1)), [2, 2]),
1445+
(
1446+
(np.array([False, True, False]), np.array([False, True, False, True])),
1447+
[2, 2],
1448+
),
1449+
],
1450+
)
1451+
def test_setitem_2dblock_with_ref(self, indexer, value):
1452+
# https://github.com/pandas-dev/pandas/issues/60309
1453+
arr = np.arange(12).reshape(3, 4)
1454+
1455+
df_parent = DataFrame(arr.copy(), columns=list("ABCD"))
1456+
# the test is specifically for the case where the df is backed by a single
1457+
# block (taking the non-split path)
1458+
assert df_parent._mgr.is_single_block
1459+
df_orig = df_parent.copy()
1460+
df = df_parent[:]
1461+
1462+
df.iloc[indexer] = value
1463+
1464+
# Check that original DataFrame is unchanged
1465+
tm.assert_frame_equal(df_parent, df_orig)
1466+
1467+
# Check that df is modified correctly
1468+
arr[indexer] = value
1469+
expected = DataFrame(arr, columns=list("ABCD"))
1470+
tm.assert_frame_equal(df, expected)
1471+
14031472

14041473
def test_full_setter_loc_incompatible_dtype():
14051474
# https://github.com/pandas-dev/pandas/issues/55791

pandas/tests/indexing/multiindex/test_loc.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,21 @@ def frame_random_data_integer_multi_index():
3333

3434

3535
class TestMultiIndexLoc:
36-
def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
36+
@pytest.mark.parametrize("has_ref", [True, False])
37+
def test_loc_setitem_frame_with_multiindex(
38+
self, multiindex_dataframe_random_data, has_ref
39+
):
3740
frame = multiindex_dataframe_random_data
41+
if has_ref:
42+
view = frame[:]
3843
frame.loc[("bar", "two"), "B"] = 5
3944
assert frame.loc[("bar", "two"), "B"] == 5
4045

4146
# with integer labels
4247
df = frame.copy()
4348
df.columns = list(range(3))
49+
if has_ref:
50+
view = df[:] # noqa: F841
4451
df.loc[("bar", "two"), 1] = 7
4552
assert df.loc[("bar", "two"), 1] == 7
4653

pandas/tests/indexing/test_iloc.py

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -104,12 +104,17 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
104104
expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
105105
tm.assert_frame_equal(df, expected)
106106

107+
@pytest.mark.parametrize("has_ref", [True, False])
107108
@pytest.mark.parametrize("box", [array, Series])
108-
def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_copy_on_write):
109+
def test_iloc_setitem_ea_inplace(
110+
self, frame_or_series, box, has_ref, using_copy_on_write
111+
):
109112
# GH#38952 Case with not setting a full column
110113
# IntegerArray without NAs
111114
arr = array([1, 2, 3, 4])
112115
obj = frame_or_series(arr.to_numpy("i8"))
116+
if has_ref:
117+
view = obj[:] # noqa: F841
113118

114119
if frame_or_series is Series:
115120
values = obj.values
@@ -125,14 +130,15 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_copy_on_write
125130
tm.assert_equal(obj, expected)
126131

127132
# Check that we are actually in-place
128-
if frame_or_series is Series:
129-
if using_copy_on_write:
130-
assert obj.values is not values
131-
assert np.shares_memory(obj.values, values)
133+
if not has_ref:
134+
if frame_or_series is Series:
135+
if using_copy_on_write:
136+
assert obj.values is not values
137+
assert np.shares_memory(obj.values, values)
138+
else:
139+
assert obj.values is values
132140
else:
133-
assert obj.values is values
134-
else:
135-
assert np.shares_memory(obj[0].values, values)
141+
assert np.shares_memory(obj[0].values, values)
136142

137143
def test_is_scalar_access(self):
138144
# GH#32085 index with duplicates doesn't matter for _is_scalar_access
@@ -426,12 +432,15 @@ def test_iloc_getitem_slice_dups(self):
426432
tm.assert_frame_equal(df.iloc[10:, :2], df2)
427433
tm.assert_frame_equal(df.iloc[10:, 2:], df1)
428434

429-
def test_iloc_setitem(self, warn_copy_on_write):
435+
@pytest.mark.parametrize("has_ref", [True, False])
436+
def test_iloc_setitem(self, warn_copy_on_write, has_ref):
430437
df = DataFrame(
431438
np.random.default_rng(2).standard_normal((4, 4)),
432439
index=np.arange(0, 8, 2),
433440
columns=np.arange(0, 12, 3),
434441
)
442+
if has_ref:
443+
view = df[:] # noqa: F841
435444

436445
df.iloc[1, 1] = 1
437446
result = df.iloc[1, 1]
@@ -448,27 +457,35 @@ def test_iloc_setitem(self, warn_copy_on_write):
448457
expected = Series([0, 1, 0], index=[4, 5, 6])
449458
tm.assert_series_equal(s, expected)
450459

451-
def test_iloc_setitem_axis_argument(self):
460+
@pytest.mark.parametrize("has_ref", [True, False])
461+
def test_iloc_setitem_axis_argument(self, has_ref):
452462
# GH45032
453463
df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
454464
df[1] = df[1].astype(object)
465+
if has_ref:
466+
view = df[:]
455467
expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]])
456468
expected[1] = expected[1].astype(object)
457469
df.iloc(axis=0)[2] = 5
458470
tm.assert_frame_equal(df, expected)
459471

460472
df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
461473
df[1] = df[1].astype(object)
474+
if has_ref:
475+
view = df[:] # noqa: F841
462476
expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]])
463477
expected[1] = expected[1].astype(object)
464478
df.iloc(axis=1)[2] = 5
465479
tm.assert_frame_equal(df, expected)
466480

467-
def test_iloc_setitem_list(self):
481+
@pytest.mark.parametrize("has_ref", [True, False])
482+
def test_iloc_setitem_list(self, has_ref):
468483
# setitem with an iloc list
469484
df = DataFrame(
470485
np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]
471486
)
487+
if has_ref:
488+
view = df[:] # noqa: F841
472489
df.iloc[[0, 1], [1, 2]]
473490
df.iloc[[0, 1], [1, 2]] += 100
474491

@@ -669,12 +686,15 @@ def test_iloc_getitem_doc_issue(self, using_array_manager):
669686
expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4])
670687
tm.assert_frame_equal(result, expected)
671688

672-
def test_iloc_setitem_series(self):
689+
@pytest.mark.parametrize("has_ref", [True, False])
690+
def test_iloc_setitem_series(self, has_ref):
673691
df = DataFrame(
674692
np.random.default_rng(2).standard_normal((10, 4)),
675693
index=list("abcdefghij"),
676694
columns=list("ABCD"),
677695
)
696+
if has_ref:
697+
view = df[:] # noqa: F841
678698

679699
df.iloc[1, 1] = 1
680700
result = df.iloc[1, 1]
@@ -703,32 +723,40 @@ def test_iloc_setitem_series(self):
703723
expected = Series([0, 1, 2, 3, 4, 5])
704724
tm.assert_series_equal(result, expected)
705725

706-
def test_iloc_setitem_list_of_lists(self):
726+
@pytest.mark.parametrize("has_ref", [True, False])
727+
def test_iloc_setitem_list_of_lists(self, has_ref):
707728
# GH 7551
708729
# list-of-list is set incorrectly in mixed vs. single dtyped frames
709730
df = DataFrame(
710731
{"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")}
711732
)
733+
if has_ref:
734+
view = df[:]
712735
df.iloc[2:4] = [[10, 11], [12, 13]]
713736
expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]})
714737
tm.assert_frame_equal(df, expected)
715738

716739
df = DataFrame(
717740
{"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")}
718741
)
742+
if has_ref:
743+
view = df[:] # noqa: F841
719744
df.iloc[2:4] = [["x", 11], ["y", 13]]
720745
expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]})
721746
tm.assert_frame_equal(df, expected)
722747

748+
@pytest.mark.parametrize("has_ref", [True, False])
723749
@pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])])
724750
@pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
725-
def test_iloc_setitem_with_scalar_index(self, indexer, value):
751+
def test_iloc_setitem_with_scalar_index(self, has_ref, indexer, value):
726752
# GH #19474
727753
# assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
728754
# elementwisely, not using "setter('A', ['Z'])".
729755

730756
# Set object type to avoid upcast when setting "Z"
731757
df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]).astype({"A": object})
758+
if has_ref:
759+
view = df[:] # noqa: F841
732760
df.iloc[0, indexer] = value
733761
result = df.iloc[0, 0]
734762

@@ -1034,25 +1062,33 @@ def test_iloc_setitem_bool_indexer(self, klass):
10341062
expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
10351063
tm.assert_frame_equal(df, expected)
10361064

1065+
@pytest.mark.parametrize("has_ref", [True, False])
10371066
@pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
1038-
def test_iloc_setitem_pure_position_based(self, indexer):
1067+
def test_iloc_setitem_pure_position_based(self, indexer, has_ref):
10391068
# GH#22046
10401069
df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
10411070
df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
1071+
if has_ref:
1072+
view = df2[:] # noqa: F841
10421073
df2.iloc[:, indexer] = df1.iloc[:, [0]]
10431074
expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
10441075
tm.assert_frame_equal(df2, expected)
10451076

1046-
def test_iloc_setitem_dictionary_value(self):
1077+
@pytest.mark.parametrize("has_ref", [True, False])
1078+
def test_iloc_setitem_dictionary_value(self, has_ref):
10471079
# GH#37728
10481080
df = DataFrame({"x": [1, 2], "y": [2, 2]})
1081+
if has_ref:
1082+
view = df[:]
10491083
rhs = {"x": 9, "y": 99}
10501084
df.iloc[1] = rhs
10511085
expected = DataFrame({"x": [1, 9], "y": [2, 99]})
10521086
tm.assert_frame_equal(df, expected)
10531087

10541088
# GH#38335 same thing, mixed dtypes
10551089
df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]})
1090+
if has_ref:
1091+
view = df[:] # noqa: F841
10561092
df.iloc[1] = rhs
10571093
expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]})
10581094
tm.assert_frame_equal(df, expected)
@@ -1266,10 +1302,13 @@ def test_iloc_float_raises(
12661302
):
12671303
obj.iloc[3.0] = 0
12681304

1269-
def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
1305+
@pytest.mark.parametrize("has_ref", [True, False])
1306+
def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame, has_ref):
12701307
with pytest.raises(IndexingError, match="Too many indexers"):
12711308
float_frame.iloc[:, :, :]
12721309

1310+
if has_ref:
1311+
view = float_frame[:] # noqa: F841
12731312
with pytest.raises(IndexError, match="too many indices for array"):
12741313
# GH#32257 we let numpy do validation, get their exception
12751314
float_frame.iloc[:, :, :] = 1

0 commit comments

Comments
 (0)