BUG: Fixed assign failure when with Copy-on-Write (pandas-dev#60941)

chilin0525 · jorisvandenbossche · jorisvandenbossche · commit 55a9698c742b · 2025-09-22T08:46:39.000+02:00
Co-authored-by: Joris Van den Bossche &lt;jorisvandenbossche@gmail.com&gt;
diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst
@@ -27,6 +27,16 @@ Bug fixes
   with a compiled regex and custom flags (:issue:`62240`)
 - Fix :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`)
 
+
+Improvements and fixes for Copy-on-Write
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Bug fixes
+^^^^^^^^^
+
+- The :meth:`DataFrame.iloc` now works correctly with ``copy_on_write`` option when assigning values after subsetting the columns of a homogeneous DataFrame (:issue:`60309`)
+
+
 .. ---------------------------------------------------------------------------
 .. _whatsnew_233.contributors:
 
diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -405,8 +405,27 @@ def setitem(self, indexer, value, warn: bool = True) -> Self:
                     self._iset_split_block(  # type: ignore[attr-defined]
                         0, blk_loc, values
                     )
-                    # first block equals values
-                    self.blocks[0].setitem((indexer[0], np.arange(len(blk_loc))), value)
+
+                    indexer = list(indexer)
+                    # first block equals values we are setting to -> set to all columns
+                    if lib.is_integer(indexer[1]):
+                        col_indexer = 0
+                    elif len(blk_loc) > 1:
+                        col_indexer = slice(None)  # type: ignore[assignment]
+                    else:
+                        col_indexer = np.arange(len(blk_loc))  # type: ignore[assignment]
+                    indexer[1] = col_indexer
+
+                    row_indexer = indexer[0]
+                    if isinstance(row_indexer, np.ndarray) and row_indexer.ndim == 2:
+                        # numpy cannot handle a 2d indexer in combo with a slice
+                        row_indexer = np.squeeze(row_indexer, axis=1)
+                    if isinstance(row_indexer, np.ndarray) and len(row_indexer) == 0:
+                        # numpy does not like empty indexer combined with slice
+                        # and we are setting nothing anyway
+                        return self
+                    indexer[0] = row_indexer
+                    self.blocks[0].setitem(tuple(indexer), value)
                     return self
             # No need to split if we either set all columns or on a single block
             # manager
diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py
@@ -1500,13 +1500,17 @@ def test_set_2d_casting_date_to_int(self, col, indexer):
         )
         tm.assert_frame_equal(df, expected)
 
+    @pytest.mark.parametrize("has_ref", [True, False])
     @pytest.mark.parametrize("col", [{}, {"name": "a"}])
-    def test_loc_setitem_reordering_with_all_true_indexer(self, col):
+    def test_loc_setitem_reordering_with_all_true_indexer(self, col, has_ref):
         # GH#48701
         n = 17
         df = DataFrame({**col, "x": range(n), "y": range(n)})
+        value = df[["x", "y"]].copy()
         expected = df.copy()
-        df.loc[n * [True], ["x", "y"]] = df[["x", "y"]]
+        if has_ref:
+            view = df[:]  # noqa: F841
+        df.loc[n * [True], ["x", "y"]] = value
         tm.assert_frame_equal(df, expected)
 
     def test_loc_rhs_empty_warning(self):
diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py
@@ -1400,6 +1400,75 @@ def test_frame_setitem_empty_dataframe(self):
         )
         tm.assert_frame_equal(df, expected)
 
+    def test_iloc_setitem_view_2dblock(self):
+        # https://github.com/pandas-dev/pandas/issues/60309
+        df_parent = DataFrame(
+            {
+                "A": [1, 4, 1, 5],
+                "B": [2, 5, 2, 6],
+                "C": [3, 6, 1, 7],
+                "D": [8, 9, 10, 11],
+            }
+        )
+        df_orig = df_parent.copy()
+        df = df_parent[["B", "C"]]
+
+        # Perform the iloc operation
+        df.iloc[[1, 3], :] = [[2, 2], [2, 2]]
+
+        # Check that original DataFrame is unchanged
+        tm.assert_frame_equal(df_parent, df_orig)
+
+        # Check that df is modified correctly
+        expected = DataFrame({"B": [2, 2, 2, 2], "C": [3, 2, 1, 2]}, index=df.index)
+        tm.assert_frame_equal(df, expected)
+
+        # with setting to subset of columns
+        df = df_parent[["B", "C", "D"]]
+        df.iloc[[1, 3], 0:3:2] = [[2, 2], [2, 2]]
+        tm.assert_frame_equal(df_parent, df_orig)
+        expected = DataFrame(
+            {"B": [2, 2, 2, 2], "C": [3, 6, 1, 7], "D": [8, 2, 10, 2]}, index=df.index
+        )
+        tm.assert_frame_equal(df, expected)
+
+    @pytest.mark.parametrize(
+        "indexer, value",
+        [
+            (([0, 2], slice(None)), [[2, 2, 2, 2], [2, 2, 2, 2]]),
+            ((slice(None), slice(None)), 2),
+            ((0, [1, 3]), [2, 2]),
+            (([0], 1), [2]),
+            (([0], np.int64(1)), [2]),
+            ((slice(None), np.int64(1)), [2, 2, 2]),
+            ((slice(None, 2), np.int64(1)), [2, 2]),
+            (
+                (np.array([False, True, False]), np.array([False, True, False, True])),
+                [2, 2],
+            ),
+        ],
+    )
+    def test_setitem_2dblock_with_ref(self, indexer, value):
+        # https://github.com/pandas-dev/pandas/issues/60309
+        arr = np.arange(12).reshape(3, 4)
+
+        df_parent = DataFrame(arr.copy(), columns=list("ABCD"))
+        # the test is specifically for the case where the df is backed by a single
+        # block (taking the non-split path)
+        assert df_parent._mgr.is_single_block
+        df_orig = df_parent.copy()
+        df = df_parent[:]
+
+        df.iloc[indexer] = value
+
+        # Check that original DataFrame is unchanged
+        tm.assert_frame_equal(df_parent, df_orig)
+
+        # Check that df is modified correctly
+        arr[indexer] = value
+        expected = DataFrame(arr, columns=list("ABCD"))
+        tm.assert_frame_equal(df, expected)
+
 
 def test_full_setter_loc_incompatible_dtype():
     # https://github.com/pandas-dev/pandas/issues/55791
diff --git a/pandas/tests/indexing/multiindex/test_loc.py b/pandas/tests/indexing/multiindex/test_loc.py
@@ -33,14 +33,21 @@ def frame_random_data_integer_multi_index():
 
 
 class TestMultiIndexLoc:
-    def test_loc_setitem_frame_with_multiindex(self, multiindex_dataframe_random_data):
+    @pytest.mark.parametrize("has_ref", [True, False])
+    def test_loc_setitem_frame_with_multiindex(
+        self, multiindex_dataframe_random_data, has_ref
+    ):
         frame = multiindex_dataframe_random_data
+        if has_ref:
+            view = frame[:]
         frame.loc[("bar", "two"), "B"] = 5
         assert frame.loc[("bar", "two"), "B"] == 5
 
         # with integer labels
         df = frame.copy()
         df.columns = list(range(3))
+        if has_ref:
+            view = df[:]  # noqa: F841
         df.loc[("bar", "two"), 1] = 7
         assert df.loc[("bar", "two"), 1] == 7
 
diff --git a/pandas/tests/indexing/test_iloc.py b/pandas/tests/indexing/test_iloc.py
@@ -104,12 +104,17 @@ def test_iloc_setitem_fullcol_categorical(self, indexer, key, using_array_manage
         expected = DataFrame({0: Series(cat.astype(object), dtype=object), 1: range(3)})
         tm.assert_frame_equal(df, expected)
 
+    @pytest.mark.parametrize("has_ref", [True, False])
     @pytest.mark.parametrize("box", [array, Series])
-    def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_copy_on_write):
+    def test_iloc_setitem_ea_inplace(
+        self, frame_or_series, box, has_ref, using_copy_on_write
+    ):
         # GH#38952 Case with not setting a full column
         #  IntegerArray without NAs
         arr = array([1, 2, 3, 4])
         obj = frame_or_series(arr.to_numpy("i8"))
+        if has_ref:
+            view = obj[:]  # noqa: F841
 
         if frame_or_series is Series:
             values = obj.values
@@ -125,14 +130,15 @@ def test_iloc_setitem_ea_inplace(self, frame_or_series, box, using_copy_on_write
         tm.assert_equal(obj, expected)
 
         # Check that we are actually in-place
-        if frame_or_series is Series:
-            if using_copy_on_write:
-                assert obj.values is not values
-                assert np.shares_memory(obj.values, values)
+        if not has_ref:
+            if frame_or_series is Series:
+                if using_copy_on_write:
+                    assert obj.values is not values
+                    assert np.shares_memory(obj.values, values)
+                else:
+                    assert obj.values is values
             else:
-                assert obj.values is values
-        else:
-            assert np.shares_memory(obj[0].values, values)
+                assert np.shares_memory(obj[0].values, values)
 
     def test_is_scalar_access(self):
         # GH#32085 index with duplicates doesn't matter for _is_scalar_access
@@ -426,12 +432,15 @@ def test_iloc_getitem_slice_dups(self):
         tm.assert_frame_equal(df.iloc[10:, :2], df2)
         tm.assert_frame_equal(df.iloc[10:, 2:], df1)
 
-    def test_iloc_setitem(self, warn_copy_on_write):
+    @pytest.mark.parametrize("has_ref", [True, False])
+    def test_iloc_setitem(self, warn_copy_on_write, has_ref):
         df = DataFrame(
             np.random.default_rng(2).standard_normal((4, 4)),
             index=np.arange(0, 8, 2),
             columns=np.arange(0, 12, 3),
         )
+        if has_ref:
+            view = df[:]  # noqa: F841
 
         df.iloc[1, 1] = 1
         result = df.iloc[1, 1]
@@ -448,27 +457,35 @@ def test_iloc_setitem(self, warn_copy_on_write):
         expected = Series([0, 1, 0], index=[4, 5, 6])
         tm.assert_series_equal(s, expected)
 
-    def test_iloc_setitem_axis_argument(self):
+    @pytest.mark.parametrize("has_ref", [True, False])
+    def test_iloc_setitem_axis_argument(self, has_ref):
         # GH45032
         df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
         df[1] = df[1].astype(object)
+        if has_ref:
+            view = df[:]
         expected = DataFrame([[6, "c", 10], [7, "d", 11], [5, 5, 5]])
         expected[1] = expected[1].astype(object)
         df.iloc(axis=0)[2] = 5
         tm.assert_frame_equal(df, expected)
 
         df = DataFrame([[6, "c", 10], [7, "d", 11], [8, "e", 12]])
         df[1] = df[1].astype(object)
+        if has_ref:
+            view = df[:]  # noqa: F841
         expected = DataFrame([[6, "c", 5], [7, "d", 5], [8, "e", 5]])
         expected[1] = expected[1].astype(object)
         df.iloc(axis=1)[2] = 5
         tm.assert_frame_equal(df, expected)
 
-    def test_iloc_setitem_list(self):
+    @pytest.mark.parametrize("has_ref", [True, False])
+    def test_iloc_setitem_list(self, has_ref):
         # setitem with an iloc list
         df = DataFrame(
             np.arange(9).reshape((3, 3)), index=["A", "B", "C"], columns=["A", "B", "C"]
         )
+        if has_ref:
+            view = df[:]  # noqa: F841
         df.iloc[[0, 1], [1, 2]]
         df.iloc[[0, 1], [1, 2]] += 100
 
@@ -669,12 +686,15 @@ def test_iloc_getitem_doc_issue(self, using_array_manager):
         expected = DataFrame(arr[1:5, 2:4], index=index[1:5], columns=columns[2:4])
         tm.assert_frame_equal(result, expected)
 
-    def test_iloc_setitem_series(self):
+    @pytest.mark.parametrize("has_ref", [True, False])
+    def test_iloc_setitem_series(self, has_ref):
         df = DataFrame(
             np.random.default_rng(2).standard_normal((10, 4)),
             index=list("abcdefghij"),
             columns=list("ABCD"),
         )
+        if has_ref:
+            view = df[:]  # noqa: F841
 
         df.iloc[1, 1] = 1
         result = df.iloc[1, 1]
@@ -703,32 +723,40 @@ def test_iloc_setitem_series(self):
         expected = Series([0, 1, 2, 3, 4, 5])
         tm.assert_series_equal(result, expected)
 
-    def test_iloc_setitem_list_of_lists(self):
+    @pytest.mark.parametrize("has_ref", [True, False])
+    def test_iloc_setitem_list_of_lists(self, has_ref):
         # GH 7551
         # list-of-list is set incorrectly in mixed vs. single dtyped frames
         df = DataFrame(
             {"A": np.arange(5, dtype="int64"), "B": np.arange(5, 10, dtype="int64")}
         )
+        if has_ref:
+            view = df[:]
         df.iloc[2:4] = [[10, 11], [12, 13]]
         expected = DataFrame({"A": [0, 1, 10, 12, 4], "B": [5, 6, 11, 13, 9]})
         tm.assert_frame_equal(df, expected)
 
         df = DataFrame(
             {"A": ["a", "b", "c", "d", "e"], "B": np.arange(5, 10, dtype="int64")}
         )
+        if has_ref:
+            view = df[:]  # noqa: F841
         df.iloc[2:4] = [["x", 11], ["y", 13]]
         expected = DataFrame({"A": ["a", "b", "x", "y", "e"], "B": [5, 6, 11, 13, 9]})
         tm.assert_frame_equal(df, expected)
 
+    @pytest.mark.parametrize("has_ref", [True, False])
     @pytest.mark.parametrize("indexer", [[0], slice(None, 1, None), np.array([0])])
     @pytest.mark.parametrize("value", [["Z"], np.array(["Z"])])
-    def test_iloc_setitem_with_scalar_index(self, indexer, value):
+    def test_iloc_setitem_with_scalar_index(self, has_ref, indexer, value):
         # GH #19474
         # assigning like "df.iloc[0, [0]] = ['Z']" should be evaluated
         # elementwisely, not using "setter('A', ['Z'])".
 
         # Set object type to avoid upcast when setting "Z"
         df = DataFrame([[1, 2], [3, 4]], columns=["A", "B"]).astype({"A": object})
+        if has_ref:
+            view = df[:]  # noqa: F841
         df.iloc[0, indexer] = value
         result = df.iloc[0, 0]
 
@@ -1034,25 +1062,33 @@ def test_iloc_setitem_bool_indexer(self, klass):
         expected = DataFrame({"flag": ["x", "y", "z"], "value": [2, 3, 4]})
         tm.assert_frame_equal(df, expected)
 
+    @pytest.mark.parametrize("has_ref", [True, False])
     @pytest.mark.parametrize("indexer", [[1], slice(1, 2)])
-    def test_iloc_setitem_pure_position_based(self, indexer):
+    def test_iloc_setitem_pure_position_based(self, indexer, has_ref):
         # GH#22046
         df1 = DataFrame({"a2": [11, 12, 13], "b2": [14, 15, 16]})
         df2 = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]})
+        if has_ref:
+            view = df2[:]  # noqa: F841
         df2.iloc[:, indexer] = df1.iloc[:, [0]]
         expected = DataFrame({"a": [1, 2, 3], "b": [11, 12, 13], "c": [7, 8, 9]})
         tm.assert_frame_equal(df2, expected)
 
-    def test_iloc_setitem_dictionary_value(self):
+    @pytest.mark.parametrize("has_ref", [True, False])
+    def test_iloc_setitem_dictionary_value(self, has_ref):
         # GH#37728
         df = DataFrame({"x": [1, 2], "y": [2, 2]})
+        if has_ref:
+            view = df[:]
         rhs = {"x": 9, "y": 99}
         df.iloc[1] = rhs
         expected = DataFrame({"x": [1, 9], "y": [2, 99]})
         tm.assert_frame_equal(df, expected)
 
         # GH#38335 same thing, mixed dtypes
         df = DataFrame({"x": [1, 2], "y": [2.0, 2.0]})
+        if has_ref:
+            view = df[:]  # noqa: F841
         df.iloc[1] = rhs
         expected = DataFrame({"x": [1, 9], "y": [2.0, 99.0]})
         tm.assert_frame_equal(df, expected)
@@ -1266,10 +1302,13 @@ def test_iloc_float_raises(
             ):
                 obj.iloc[3.0] = 0
 
-    def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame):
+    @pytest.mark.parametrize("has_ref", [True, False])
+    def test_iloc_getitem_setitem_fancy_exceptions(self, float_frame, has_ref):
         with pytest.raises(IndexingError, match="Too many indexers"):
             float_frame.iloc[:, :, :]
 
+        if has_ref:
+            view = float_frame[:]  # noqa: F841
         with pytest.raises(IndexError, match="too many indices for array"):
             # GH#32257 we let numpy do validation, get their exception
             float_frame.iloc[:, :, :] = 1
diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py