diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 6b78f63f92988..4de20a93fc1a8 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -798,6 +798,7 @@ Other Deprecations - Deprecated lowercase strings ``w``, ``w-mon``, ``w-tue``, etc. denoting frequencies in :class:`Week` in favour of ``W``, ``W-MON``, ``W-TUE``, etc. (:issue:`58998`) - Deprecated parameter ``method`` in :meth:`DataFrame.reindex_like` / :meth:`Series.reindex_like` (:issue:`58667`) - Deprecated strings ``w``, ``d``, ``MIN``, ``MS``, ``US`` and ``NS`` denoting units in :class:`Timedelta` in favour of ``W``, ``D``, ``min``, ``ms``, ``us`` and ``ns`` (:issue:`59051`) +- Deprecated the 'verify_integrity' keyword in :meth:`DataFrame.set_index`; directly check the result for ``obj.index.is_unique`` instead (:issue:`62919`) - Deprecated the ``arg`` parameter of ``Series.map``; pass the added ``func`` argument instead. (:issue:`61260`) - Deprecated using ``epoch`` date format in :meth:`DataFrame.to_json` and :meth:`Series.to_json`, use ``iso`` instead. (:issue:`57063`) - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3402fc12f421c..4fd9455212cfc 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -6225,7 +6225,7 @@ def set_index( drop: bool = ..., append: bool = ..., inplace: Literal[False] = ..., - verify_integrity: bool = ..., + verify_integrity: bool | lib.NoDefault = ..., ) -> DataFrame: ... @overload @@ -6236,7 +6236,7 @@ def set_index( drop: bool = ..., append: bool = ..., inplace: Literal[True], - verify_integrity: bool = ..., + verify_integrity: bool | lib.NoDefault = ..., ) -> None: ... def set_index( @@ -6246,7 +6246,7 @@ def set_index( drop: bool = True, append: bool = False, inplace: bool = False, - verify_integrity: bool = False, + verify_integrity: bool | lib.NoDefault = lib.no_default, ) -> DataFrame | None: """ Set the DataFrame index using existing columns. @@ -6276,6 +6276,8 @@ def set_index( necessary. Setting to False will improve the performance of this method. + .. deprecated:: 3.0.0 + Returns ------- DataFrame or None @@ -6362,6 +6364,18 @@ def set_index( 2013 84 2014 31 """ + if verify_integrity is not lib.no_default: + # GH#62919 + warnings.warn( + "The 'verify_integrity' keyword in DataFrame.set_index is " + "deprecated and will be removed in a future version. " + "Directly check the result.index.is_unique instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + else: + verify_integrity = False + inplace = validate_bool_kwarg(inplace, "inplace") self._check_inplace_and_allows_duplicate_labels(inplace) if not isinstance(keys, list): diff --git a/pandas/tests/frame/methods/test_set_index.py b/pandas/tests/frame/methods/test_set_index.py index 198cab0e91eab..edc040536ea69 100644 --- a/pandas/tests/frame/methods/test_set_index.py +++ b/pandas/tests/frame/methods/test_set_index.py @@ -10,6 +10,8 @@ import numpy as np import pytest +from pandas.errors import Pandas4Warning + from pandas import ( Categorical, CategoricalIndex, @@ -547,11 +549,14 @@ class TestSetIndexInvalid: def test_set_index_verify_integrity(self, frame_of_index_cols): df = frame_of_index_cols + msg = "The 'verify_integrity' keyword in DataFrame.set_index" with pytest.raises(ValueError, match="Index has duplicate keys"): - df.set_index("A", verify_integrity=True) + with tm.assert_produces_warning(Pandas4Warning, match=msg): + df.set_index("A", verify_integrity=True) # with MultiIndex with pytest.raises(ValueError, match="Index has duplicate keys"): - df.set_index([df["A"], df["A"]], verify_integrity=True) + with tm.assert_produces_warning(Pandas4Warning, match=msg): + df.set_index([df["A"], df["A"]], verify_integrity=True) @pytest.mark.parametrize("append", [True, False]) @pytest.mark.parametrize("drop", [True, False])