pandas-dev · zhangbowen-coder · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025 · Nov 18, 2025
diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst
@@ -1022,11 +1022,11 @@ Performance improvements
 - Performance improvement in :meth:`DataFrame.astype` when converting to extension floating dtypes, e.g. "Float64" (:issue:`60066`)
 - Performance improvement in :meth:`DataFrame.stack` when using ``future_stack=True`` and the DataFrame does not have a :class:`MultiIndex` (:issue:`58391`)
 - Performance improvement in :meth:`DataFrame.where` when ``cond`` is a :class:`DataFrame` with many columns (:issue:`61010`)
+- Performance improvement in :meth:`merge` and ``DataFrame.merge``.Now user can use prefixes or both of suffixes and prefixes to differentiate duplicated columns. (:issue:`63014`)
 - Performance improvement in :meth:`to_hdf` avoid unnecessary reopenings of the HDF5 file to speedup data addition to files with a very large number of groups . (:issue:`58248`)
 - Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
 - Performance improvement in indexing operations for string dtypes (:issue:`56997`)
 - Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)
-
 .. ---------------------------------------------------------------------------
 .. _whatsnew_300.bug_fixes:
 

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -368,13 +368,29 @@
 sort : bool, default False
     Sort the join keys lexicographically in the result DataFrame. If False,
     the order of the join keys depends on the join type (how keyword).
+diff_option : Literal str
+    The allowed values are "suffix"、"prefix"、"both",default "suffix".
+    If the value is "suffix", the duplicated columns will be differentiated
+    using the suffixes provided by parameter "suffixes".
+    If the value is "prefix", the duplicated columns will be differentiated
+    using the prefixes provided by parameter "prefixes".
+    If the value is "both", the duplicated columns will be differentiated
+    using both the suffixes provided by parameter "suffixes" and
+    the prefixes provided by parameter "prefixes".
 suffixes : list-like, default is ("_x", "_y")
     A length-2 sequence where each element is optionally a string
     indicating the suffix to add to overlapping column names in
     `left` and `right` respectively. Pass a value of `None` instead
     of a string to indicate that the column name from `left` or
     `right` should be left as-is, with no suffix. At least one of the
     values must not be None.
+prefixes : list-like, default is ("a_", "b_")
+    A length-2 sequence where each element is optionally a string
+    indicating the prefix to add to overlapping column names in
+    `left` and `right` respectively. Pass a value of `None` instead
+    of a string to indicate that the column name from `left` or
+    `right` should be left as-is, with no prefix. At least one of the
+    values must not be None.
 copy : bool, default False
     If False, avoid copy if possible.
 
@@ -11437,7 +11453,9 @@ def merge(
         left_index: bool = False,
         right_index: bool = False,
         sort: bool = False,
+        diff_option: Literal["prefix", "suffix", "both"] = "suffix",
         suffixes: Suffixes = ("_x", "_y"),
+        prefixes: Sequence[str | None] = ("a_", "b_"),
         copy: bool | lib.NoDefault = lib.no_default,
         indicator: str | bool = False,
         validate: MergeValidate | None = None,
@@ -11456,7 +11474,9 @@ def merge(
             left_index=left_index,
             right_index=right_index,
             sort=sort,
+            diff_option=diff_option,
             suffixes=suffixes,
+            prefixes=prefixes,
             indicator=indicator,
             validate=validate,
         )

diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py
@@ -153,7 +153,11 @@ def merge(
     left_index: bool = False,
     right_index: bool = False,
     sort: bool = False,
+    diff_option: Literal[
+        "prefix", "suffix", "both"
+    ] = "suffix",  # add new parameter prefixes diff_option
     suffixes: Suffixes = ("_x", "_y"),
+    prefixes: Sequence[str | None] = ("a_", "b_"),  # add new parameter prefixes
     copy: bool | lib.NoDefault = lib.no_default,
     indicator: str | bool = False,
     validate: str | None = None,
@@ -221,13 +225,29 @@ def merge(
     sort : bool, default False
         Sort the join keys lexicographically in the result DataFrame. If False,
         the order of the join keys depends on the join type (how keyword).
+    diff_option : Literal str
+        The allowed values are "suffix"、"prefix"、"both",default "suffix".
+        If the value is "suffix", the duplicated columns will be differentiated
+        using the suffixes provided by parameter "suffixes".
+        If the value is "prefix", the duplicated columns will be differentiated
+        using the prefixes provided by parameter "prefixes".
+        If the value is "both", the duplicated columns will be differentiated
+        using both the suffixes provided by parameter "suffixes" and
+        the prefixes provided by parameter "prefixes".
     suffixes : list-like, default is ("_x", "_y")
         A length-2 sequence where each element is optionally a string
         indicating the suffix to add to overlapping column names in
         `left` and `right` respectively. Pass a value of `None` instead
         of a string to indicate that the column name from `left` or
         `right` should be left as-is, with no suffix. At least one of the
         values must not be None.
+    prefixes : list-like, default is ("a_", "b_")
+        A length-2 sequence where each element is optionally a string
+        indicating the prefix to add to overlapping column names in
+        `left` and `right` respectively. Pass a value of `None` instead
+        of a string to indicate that the column name from `left` or
+        `right` should be left as-is, with no prefix. At least one of the
+        values must not be None.
     copy : bool, default False
         If False, avoid copy if possible.
 
@@ -370,6 +390,13 @@ def merge(
     left_df = _validate_operand(left)
     left._check_copy_deprecation(copy)
     right_df = _validate_operand(right)
+
+    if diff_option != "prefix" and diff_option != "suffix" and diff_option != "both":
+        raise ValueError(
+            "Parameter 'diff_option' is wrong, please choose from 'prefix'"
+            ", 'suffix' and 'both'."
+        )
+
     if how == "cross":
         return _cross_merge(
             left_df,
@@ -380,7 +407,9 @@ def merge(
             left_index=left_index,
             right_index=right_index,
             sort=sort,
+            diff_option=diff_option,
             suffixes=suffixes,
+            prefixes=prefixes,
             indicator=indicator,
             validate=validate,
         )
@@ -395,7 +424,9 @@ def merge(
             left_index=left_index,
             right_index=right_index,
             sort=sort,
+            diff_option=diff_option,
             suffixes=suffixes,
+            prefixes=prefixes,
             indicator=indicator,
             validate=validate,
         )
@@ -411,7 +442,11 @@ def _cross_merge(
     left_index: bool = False,
     right_index: bool = False,
     sort: bool = False,
+    diff_option: Literal[
+        "prefix", "suffix", "both"
+    ] = "suffix",  # add new parameter prefixes diff_option
     suffixes: Suffixes = ("_x", "_y"),
+    prefixes: Sequence[str | None] = ("a_", "b_"),  # add new parameter prefixes
     indicator: str | bool = False,
     validate: str | None = None,
 ) -> DataFrame:
@@ -447,7 +482,9 @@ def _cross_merge(
         left_index=left_index,
         right_index=right_index,
         sort=sort,
+        diff_option=diff_option,
         suffixes=suffixes,
+        prefixes=prefixes,
         indicator=indicator,
         validate=validate,
     )
@@ -954,7 +991,9 @@ class _MergeOperation:
     left_index: bool
     right_index: bool
     sort: bool
+    diff_option: Literal["prefix", "suffix", "both"]
     suffixes: Suffixes
+    prefixes: Sequence[str | None]
     indicator: str | bool
     validate: str | None
     join_names: list[Hashable]
@@ -972,7 +1011,11 @@ def __init__(
         left_index: bool = False,
         right_index: bool = False,
         sort: bool = True,
+        diff_option: Literal[
+            "prefix", "suffix", "both"
+        ] = "suffix",  # add new parameter prefixes diff_option
         suffixes: Suffixes = ("_x", "_y"),
+        prefixes: Sequence[str | None] = ("a_", "b_"),  # add new parameter prefixes
         indicator: str | bool = False,
         validate: str | None = None,
     ) -> None:
@@ -985,6 +1028,8 @@ def __init__(
         self.on = com.maybe_make_list(on)
 
         self.suffixes = suffixes
+        self.prefixes = prefixes
+        self.diff_option = diff_option
         self.sort = sort or how == "outer"
 
         self.left_index = left_index
@@ -1094,8 +1139,12 @@ def _reindex_and_concat(
         left = self.left[:]
         right = self.right[:]
 
-        llabels, rlabels = _items_overlap_with_suffix(
-            self.left._info_axis, self.right._info_axis, self.suffixes
+        llabels, rlabels = _items_overlap_with_suffix_or_prefix(
+            self.left._info_axis,
+            self.right._info_axis,
+            self.suffixes,
+            self.prefixes,
+            self.diff_option,
         )
 
         if left_indexer is not None and not is_range_indexer(left_indexer, len(left)):
@@ -3059,54 +3108,84 @@ def _validate_operand(obj: DataFrame | Series) -> DataFrame:
         )
 
 
-def _items_overlap_with_suffix(
-    left: Index, right: Index, suffixes: Suffixes
+def _items_overlap_with_suffix_or_prefix(
+    left: Index,
+    right: Index,
+    suffixes: Suffixes,
+    prefixes: Sequence[str | None],
+    diff_option: Literal["prefix", "suffix", "both"],
 ) -> tuple[Index, Index]:
     """
-    Suffixes type validation.
+    Suffixes and Prefixes type validation.
 
-    If two indices overlap, add suffixes to overlapping entries.
+    If two indices overlap, add suffixes and prefixes to overlapping entries.
 
-    If corresponding suffix is empty, the entry is simply converted to string.
+    If corresponding suffix and prefix are empty,
+    the entry is simply converted to string.
 
     """
-    if not is_list_like(suffixes, allow_sets=False) or isinstance(suffixes, dict):
+    if (diff_option == "both" or diff_option == "suffix") and (
+        not is_list_like(suffixes, allow_sets=False) or isinstance(suffixes, dict)
+    ):
         raise TypeError(
             f"Passing 'suffixes' as a {type(suffixes)}, is not supported. "
             "Provide 'suffixes' as a tuple instead."
         )
+    if (diff_option == "both" or diff_option == "prefix") and (
+        not is_list_like(prefixes, allow_sets=False) or isinstance(prefixes, dict)
+    ):
+        raise TypeError(
+            f"Passing 'prefixes' as a {type(prefixes)}, is not supported. "
+            "Provide 'prefixes' as a tuple instead."
+        )
 
     to_rename = left.intersection(right)
     if len(to_rename) == 0:
         return left, right
 
-    lsuffix, rsuffix = suffixes
+    if diff_option == "both" or diff_option == "suffix":
+        lsuffix, rsuffix = suffixes
+    else:
+        lsuffix, rsuffix = None, None
 
-    if not lsuffix and not rsuffix:
-        raise ValueError(f"columns overlap but no suffix specified: {to_rename}")
+    if diff_option == "both" or diff_option == "prefix":
+        lprefix, rprefix = prefixes
+    else:
+        lprefix, rprefix = None, None
+
+    if not lsuffix and not rsuffix and not lprefix and not rprefix:
+        raise ValueError(
+            f"columns overlap but no suffix or prefix specified: {to_rename}"
+        )
 
-    def renamer(x, suffix: str | None):
+    def renamer(x, suffix: str | None, prefix: str | None):
         """
         Rename the left and right indices.
 
-        If there is overlap, and suffix is not None, add
-        suffix, otherwise, leave it as-is.
+        If there is overlap, and suffix or prefix is not None, add
+        suffix or prefix(or both if both are provided), otherwise, leave it as-is.
 
         Parameters
         ----------
         x : original column name
         suffix : str or None
+        prefix : str or None
 
         Returns
         -------
         x : renamed column name
         """
-        if x in to_rename and suffix is not None:
-            return f"{x}{suffix}"
+        ret = x
+        if x in to_rename:
+            if suffix is not None:
+                ret = f"{ret}{suffix}"
+            if prefix is not None:
+                ret = f"{prefix}{ret}"
+            return ret
         return x
 
-    lrenamer = partial(renamer, suffix=lsuffix)
-    rrenamer = partial(renamer, suffix=rsuffix)
+    lrenamer = partial(renamer, suffix=lsuffix, prefix=lprefix)
+    rrenamer = partial(renamer, suffix=rsuffix, prefix=rprefix)
 
     llabels = left._transform_index(lrenamer)
     rlabels = right._transform_index(rrenamer)
@@ -3123,7 +3202,8 @@ def renamer(x, suffix: str | None):
     dups.extend(rlabels.intersection(left.difference(to_rename)).tolist())
     if dups:
         raise MergeError(
-            f"Passing 'suffixes' which cause duplicate columns {set(dups)} is "
+            f"Passing 'suffixes' or/and 'prefixes' "
+            f"which cause duplicate columns {set(dups)} is "
             "not allowed.",
         )
 

diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py
@@ -783,7 +783,7 @@ def test_join_dups(self):
         # GH 40991: As of 2.0 causes duplicate columns
         with pytest.raises(
             pd.errors.MergeError,
-            match="Passing 'suffixes' which cause duplicate columns",
+            match="Passing 'suffixes' or/and 'prefixes' which cause duplicate columns",
         ):
             dta.merge(w, left_index=True, right_index=True)