From 6e6cb2154975ae5a3c010729741bdd4c874df938 Mon Sep 17 00:00:00 2001 From: Nadav-Zilberberg Date: Mon, 8 Sep 2025 10:13:13 +0300 Subject: [PATCH 1/2] Update .gitignore --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index d951f3fb9cbad..5c6c7ad9aaf3a 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,9 @@ doc/source/savefig/ # Pyodide/WASM related files # ############################## /.pyodide-xbuildenv-* + + + + +.venv/ +venv/ From b061f7e38db996b8fa99a8a0a5358c97535d3a6d Mon Sep 17 00:00:00 2001 From: test Date: Sun, 19 Oct 2025 06:49:32 +0000 Subject: [PATCH 2/2] A concise summary of the fix --- .gitconfig | 7 ++++++ pandas/core/reshape/merge.py | 41 +++++++++++++++++++++++++++++++----- 2 files changed, 43 insertions(+), 5 deletions(-) create mode 100644 .gitconfig diff --git a/.gitconfig b/.gitconfig new file mode 100644 index 0000000000000..f89a028634167 --- /dev/null +++ b/.gitconfig @@ -0,0 +1,7 @@ +[user] + email = test@example.com + name = test +[pull] + rebase = false +[push] + default = simple diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 285256ac7b16a..657bd43aff1db 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1928,6 +1928,25 @@ def _validate_left_right_on(self, left_on, right_on): return left_on, right_on + def _get_dupes(self, keys: list["ArrayLike"]) -> list: + from pandas import MultiIndex + + multi_index = MultiIndex.from_arrays(keys) + dupes = multi_index[multi_index.duplicated()].unique() + if isinstance(dupes, MultiIndex): + return dupes.to_list() + return dupes.tolist() + + + def _get_dupes(self, keys: list["ArrayLike"]) -> list: + from pandas import MultiIndex + + multi_index = MultiIndex.from_arrays(keys) + dupes = multi_index[multi_index.duplicated()].unique() + if isinstance(dupes, MultiIndex): + return dupes.to_list() + return dupes.tolist() + @final def _validate_validate_kwd(self, validate: str) -> None: # Check uniqueness of each @@ -1944,30 +1963,42 @@ def _validate_validate_kwd(self, validate: str) -> None: # Check data integrity if validate in ["one_to_one", "1:1"]: if not left_unique and not right_unique: + left_dupes = self._get_dupes(self.left_join_keys) + right_dupes = self._get_dupes(self.right_join_keys) raise MergeError( "Merge keys are not unique in either left " - "or right dataset; not a one-to-one merge" + "or right dataset; not a one-to-one merge. " + f"Left duplicate keys: {left_dupes}. " + f"Right duplicate keys: {right_dupes}." ) if not left_unique: + left_dupes = self._get_dupes(self.left_join_keys) raise MergeError( - "Merge keys are not unique in left dataset; not a one-to-one merge" + "Merge keys are not unique in left dataset; not a one-to-one merge. " + f"Duplicate keys: {left_dupes}." ) if not right_unique: + right_dupes = self._get_dupes(self.right_join_keys) raise MergeError( - "Merge keys are not unique in right dataset; not a one-to-one merge" + "Merge keys are not unique in right dataset; not a one-to-one merge. " + f"Duplicate keys: {right_dupes}." ) elif validate in ["one_to_many", "1:m"]: if not left_unique: + left_dupes = self._get_dupes(self.left_join_keys) raise MergeError( - "Merge keys are not unique in left dataset; not a one-to-many merge" + "Merge keys are not unique in left dataset; not a one-to-many merge. " + f"Duplicate keys: {left_dupes}." ) elif validate in ["many_to_one", "m:1"]: if not right_unique: + right_dupes = self._get_dupes(self.right_join_keys) raise MergeError( "Merge keys are not unique in right dataset; " - "not a many-to-one merge" + "not a many-to-one merge. " + f"Duplicate keys: {right_dupes}." ) elif validate in ["many_to_many", "m:m"]: