diff --git a/doc/source/reference/indexing.rst b/doc/source/reference/indexing.rst index 93f88db0843dc..e9035a1a9a1e0 100644 --- a/doc/source/reference/indexing.rst +++ b/doc/source/reference/indexing.rst @@ -294,6 +294,7 @@ MultiIndex components MultiIndex.copy MultiIndex.append MultiIndex.truncate + MultiIndex.insert_level MultiIndex selecting ~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 259470a4f1513..8cb9405f4c32f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -214,6 +214,7 @@ Other enhancements - :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`) - :py:class:`frozenset` elements in pandas objects are now natively printed (:issue:`60690`) - Add ``"delete_rows"`` option to ``if_exists`` argument in :meth:`DataFrame.to_sql` deleting all records of the table before inserting data (:issue:`37210`). +- Added :meth:`MultiIndex.insert_level` to insert new levels at specified positions in a MultiIndex (:issue:`62558`) - Added half-year offset classes :class:`HalfYearBegin`, :class:`HalfYearEnd`, :class:`BHalfYearBegin` and :class:`BHalfYearEnd` (:issue:`60928`) - Added support for ``axis=1`` with ``dict`` or :class:`Series` arguments into :meth:`DataFrame.fillna` (:issue:`4514`) - Added support to read and write from and to Apache Iceberg tables with the new :func:`read_iceberg` and :meth:`DataFrame.to_iceberg` functions (:issue:`61383`) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0a4e1f011f06a..f08efa8418625 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2710,6 +2710,68 @@ def reorder_levels(self, order) -> MultiIndex: result = self._reorder_ilevels(order) return result + def insert_level( + self, position: int, value, name: Hashable = lib.no_default + ) -> MultiIndex: + """ + Insert a new level at the specified position in the MultiIndex. + + Parameters + ---------- + position : int + The position at which to insert the new level (0-based). + Must be between 0 and nlevels (inclusive). + value : array-like + Values to use for the new level. Length must match the length of the index. + name : Hashable, default lib.no_default + Name for the new level. If not provided, the new level will have no name. + + Returns + ------- + MultiIndex + New MultiIndex with the inserted level. + + See Also + -------- + MultiIndex.droplevel : Remove levels from the MultiIndex. + MultiIndex.swaplevel : Swap two levels in the MultiIndex. + MultiIndex.reorder_levels : Reorder levels using specified order. + + Examples + -------- + >>> idx = pd.MultiIndex.from_tuples([("A", 1), ("B", 2)]) + >>> idx.insert_level(0, ["new_value", "new_value"]) + MultiIndex([('new_value', 'A', 1), ('new_value', 'B', 2)], ...) + + >>> idx.insert_level(1, ["X", "Y"]) + MultiIndex([('A', 'X', 1), ('B', 'Y', 2)], ...) + """ + if not isinstance(position, int): + raise TypeError("position must be an integer") + + if position < 0 or position > self.nlevels: + raise ValueError(f"position must be between 0 and {self.nlevels}") + + if name is lib.no_default: + name = None + + if not hasattr(value, "__iter__") or isinstance(value, str): + raise ValueError("value must be an array-like object") + + value = list(value) + if len(value) != len(self): + raise ValueError("Length of values must match length of index") + + # 简洁可靠的实现 + new_tuples = [] + for i, tup in enumerate(self): + new_tuple = tup[:position] + (value[i],) + tup[position:] + new_tuples.append(new_tuple) + + new_names = self.names[:position] + [name] + self.names[position:] + + return MultiIndex.from_tuples(new_tuples, names=new_names) + def _reorder_ilevels(self, order) -> MultiIndex: if len(order) != self.nlevels: raise AssertionError( diff --git a/pandas/tests/indexes/multi/test_insert_level.py b/pandas/tests/indexes/multi/test_insert_level.py new file mode 100644 index 0000000000000..b0faed8638210 --- /dev/null +++ b/pandas/tests/indexes/multi/test_insert_level.py @@ -0,0 +1,151 @@ +import pytest + +import pandas as pd +import pandas._testing as tm + + +@pytest.mark.parametrize( + "position, value, name, expected_tuples, expected_names", + [ + ( + 0, + ["new_value"] * 3, + None, + [("new_value", "A", 1), ("new_value", "B", 2), ("new_value", "C", 3)], + [None, "level1", "level2"], + ), + ( + 1, + ["middle"] * 3, + None, + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", None, "level2"], + ), + ( + 0, + ["new_val"] * 3, + "new_level", + [("new_val", "A", 1), ("new_val", "B", 2), ("new_val", "C", 3)], + ["new_level", "level1", "level2"], + ), + ( + 1, + ["middle"] * 3, + "custom_name", + [("A", "middle", 1), ("B", "middle", 2), ("C", "middle", 3)], + ["level1", "custom_name", "level2"], + ), + ( + 0, + ["start"] * 3, + None, + [("start", "A", 1), ("start", "B", 2), ("start", "C", 3)], + [None, "level1", "level2"], + ), + ( + 2, + ["end"] * 3, + None, + [("A", 1, "end"), ("B", 2, "end"), ("C", 3, "end")], + ["level1", "level2", None], + ), + ( + 1, + [100, 100, 100], + None, + [("A", 100, 1), ("B", 100, 2), ("C", 100, 3)], + ["level1", None, "level2"], + ), + ( + 1, + [1.5, 1.5, 1.5], + None, + [("A", 1.5, 1), ("B", 1.5, 2), ("C", 1.5, 3)], + ["level1", None, "level2"], + ), + ( + 1, + [None, None, None], + None, + [("A", None, 1), ("B", None, 2), ("C", None, 3)], + ["level1", None, "level2"], + ), + ( + 1, + ["X", "Y", "Z"], + None, + [("A", "X", 1), ("B", "Y", 2), ("C", "Z", 3)], + ["level1", None, "level2"], + ), + ( + 0, + [""] * 3, + "empty_string", + [("", "A", 1), ("", "B", 2), ("", "C", 3)], + ["empty_string", "level1", "level2"], + ), + ( + 1, + [True, True, True], + None, + [("A", True, 1), ("B", True, 2), ("C", True, 3)], + ["level1", None, "level2"], + ), + ], +) +def test_insert_level_basic(position, value, name, expected_tuples, expected_names): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + result = simple_idx.insert_level(position, value, name=name) + expected = pd.MultiIndex.from_tuples(expected_tuples, names=expected_names) + tm.assert_index_equal(result, expected) + + +@pytest.mark.parametrize( + "position, value, expected_error", + [ + (5, ["invalid"] * 3, "position must be between"), + (-1, ["invalid"] * 3, "position must be between"), + (1, ["too", "few"], "Length of values must match"), + (3, ["value"] * 3, "position must be between"), + (0, "scalar_value", "value must be an array-like object"), + ], +) +def test_insert_level_error_cases(position, value, expected_error): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + with pytest.raises(ValueError, match=expected_error): + simple_idx.insert_level(position, value) + + +def test_insert_level_preserves_original(): + simple_idx = pd.MultiIndex.from_tuples( + [("A", 1), ("B", 2), ("C", 3)], names=["level1", "level2"] + ) + + original = simple_idx.copy() + simple_idx.insert_level(1, ["temp"] * 3) + + tm.assert_index_equal(original, simple_idx) + + +def test_insert_level_empty_index(): + empty_idx = pd.MultiIndex.from_tuples([], names=["level1", "level2"]) + + result = empty_idx.insert_level(0, []) + expected = pd.MultiIndex.from_tuples([], names=[None, "level1", "level2"]) + tm.assert_index_equal(result, expected) + + +def test_insert_level_single_element(): + single_idx = pd.MultiIndex.from_tuples([("A", 1)], names=["level1", "level2"]) + + result = single_idx.insert_level(1, ["middle"]) + expected = pd.MultiIndex.from_tuples( + [("A", "middle", 1)], names=["level1", None, "level2"] + ) + tm.assert_index_equal(result, expected)