diff --git a/.gitconfig b/.gitconfig new file mode 100644 index 0000000000000..f89a028634167 --- /dev/null +++ b/.gitconfig @@ -0,0 +1,7 @@ +[user] + email = test@example.com + name = test +[pull] + rebase = false +[push] + default = simple diff --git a/.gitignore b/.gitignore index d951f3fb9cbad..5c6c7ad9aaf3a 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,9 @@ doc/source/savefig/ # Pyodide/WASM related files # ############################## /.pyodide-xbuildenv-* + + + + +.venv/ +venv/ diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 285256ac7b16a..657bd43aff1db 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -1928,6 +1928,25 @@ def _validate_left_right_on(self, left_on, right_on): return left_on, right_on + def _get_dupes(self, keys: list["ArrayLike"]) -> list: + from pandas import MultiIndex + + multi_index = MultiIndex.from_arrays(keys) + dupes = multi_index[multi_index.duplicated()].unique() + if isinstance(dupes, MultiIndex): + return dupes.to_list() + return dupes.tolist() + + + def _get_dupes(self, keys: list["ArrayLike"]) -> list: + from pandas import MultiIndex + + multi_index = MultiIndex.from_arrays(keys) + dupes = multi_index[multi_index.duplicated()].unique() + if isinstance(dupes, MultiIndex): + return dupes.to_list() + return dupes.tolist() + @final def _validate_validate_kwd(self, validate: str) -> None: # Check uniqueness of each @@ -1944,30 +1963,42 @@ def _validate_validate_kwd(self, validate: str) -> None: # Check data integrity if validate in ["one_to_one", "1:1"]: if not left_unique and not right_unique: + left_dupes = self._get_dupes(self.left_join_keys) + right_dupes = self._get_dupes(self.right_join_keys) raise MergeError( "Merge keys are not unique in either left " - "or right dataset; not a one-to-one merge" + "or right dataset; not a one-to-one merge. " + f"Left duplicate keys: {left_dupes}. " + f"Right duplicate keys: {right_dupes}." ) if not left_unique: + left_dupes = self._get_dupes(self.left_join_keys) raise MergeError( - "Merge keys are not unique in left dataset; not a one-to-one merge" + "Merge keys are not unique in left dataset; not a one-to-one merge. " + f"Duplicate keys: {left_dupes}." ) if not right_unique: + right_dupes = self._get_dupes(self.right_join_keys) raise MergeError( - "Merge keys are not unique in right dataset; not a one-to-one merge" + "Merge keys are not unique in right dataset; not a one-to-one merge. " + f"Duplicate keys: {right_dupes}." ) elif validate in ["one_to_many", "1:m"]: if not left_unique: + left_dupes = self._get_dupes(self.left_join_keys) raise MergeError( - "Merge keys are not unique in left dataset; not a one-to-many merge" + "Merge keys are not unique in left dataset; not a one-to-many merge. " + f"Duplicate keys: {left_dupes}." ) elif validate in ["many_to_one", "m:1"]: if not right_unique: + right_dupes = self._get_dupes(self.right_join_keys) raise MergeError( "Merge keys are not unique in right dataset; " - "not a many-to-one merge" + "not a many-to-one merge. " + f"Duplicate keys: {right_dupes}." ) elif validate in ["many_to_many", "m:m"]: