Skip to content

Commit 122bfd9

Browse files
committed
fix(#61434): Improve error message when merging with incompatible DataFrame types
- Add _get_merge_error_message() to provide helpful error messages - Special handling for polars.DataFrame with conversion suggestion - Generic helpful message for other incompatible types - Add comprehensive regression tests - All validation tests pass
1 parent d597079 commit 122bfd9

File tree

4 files changed

+428
-1
lines changed

4 files changed

+428
-1
lines changed

pandas/core/reshape/merge.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3029,10 +3029,36 @@ def _validate_operand(obj: DataFrame | Series) -> DataFrame:
30293029
return obj.to_frame()
30303030
else:
30313031
raise TypeError(
3032-
f"Can only merge Series or DataFrame objects, a {type(obj)} was passed"
3032+
_get_merge_error_message(obj)
30333033
)
30343034

30353035

3036+
def _get_merge_error_message(obj: object) -> str:
3037+
"""Generate a helpful error message for invalid merge arguments."""
3038+
obj_type_name = type(obj).__name__
3039+
obj_module_name = type(obj).__module__
3040+
3041+
# Special handling for known DataFrame-like libraries
3042+
if obj_module_name == "polars.dataframe.frame" and obj_type_name == "DataFrame":
3043+
return (
3044+
"Can only merge Series or DataFrame objects, received "
3045+
"polars.DataFrame. Please convert the polars DataFrame to a "
3046+
"pandas DataFrame using `.to_pandas()` or pass it to "
3047+
"pd.DataFrame()."
3048+
)
3049+
elif "polars" in obj_module_name.lower():
3050+
return (
3051+
f"Can only merge Series or DataFrame objects, received "
3052+
f"{obj_module_name}.{obj_type_name} (a polars object). "
3053+
"Please convert to a pandas DataFrame using `.to_pandas()`."
3054+
)
3055+
3056+
return (
3057+
f"Can only merge Series or DataFrame objects, received "
3058+
f"{obj_module_name}.{obj_type_name}. Expected a pandas Series or DataFrame."
3059+
)
3060+
3061+
30363062
def _items_overlap_with_suffix(
30373063
left: Index, right: Index, suffixes: Suffixes
30383064
) -> tuple[Index, Index]:

test_issue_61434_repro.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
"""
2+
Pandas Issue #61434 - Reproduction Test
3+
4+
Issue: When attempting to merge a pandas DataFrame with a polars DataFrame,
5+
the error message is unhelpful.
6+
7+
Current behavior: Generic error about missing attributes or type errors
8+
Expected behavior: Clear message saying "other must be pandas.DataFrame,
9+
received: polars.DataFrame"
10+
11+
Snippet from issue #61434:
12+
https://github.com/pandas-dev/pandas/issues/61434
13+
"""
14+
15+
import pandas as pd
16+
17+
# Try to import polars for testing
18+
try:
19+
import polars as pl
20+
POLARS_AVAILABLE = True
21+
except ImportError:
22+
POLARS_AVAILABLE = False
23+
print("Warning: polars not installed. Install with: pip install polars")
24+
25+
26+
def test_merge_with_polars():
27+
"""
28+
Reproduce the issue: Merging pandas DataFrame with polars DataFrame.
29+
30+
Before fix: Generic/confusing error message
31+
After fix: Clear message about type mismatch
32+
"""
33+
if not POLARS_AVAILABLE:
34+
print("Skipping test - polars not available")
35+
return False
36+
37+
print("=" * 70)
38+
print("Test: Merging pandas DataFrame with polars DataFrame")
39+
print("=" * 70)
40+
41+
# Create pandas DataFrame
42+
pdf = pd.DataFrame({
43+
'key': ['a', 'b', 'c'],
44+
'value_x': [1, 2, 3]
45+
})
46+
47+
# Create polars DataFrame
48+
plf = pl.DataFrame({
49+
'key': ['a', 'b', 'c'],
50+
'value_y': [10, 20, 30]
51+
})
52+
53+
print(f"\nPandas DataFrame type: {type(pdf)}")
54+
print(f"Polars DataFrame type: {type(plf)}")
55+
print("\nAttempting merge...")
56+
57+
try:
58+
result = pd.merge(pdf, plf, on='key')
59+
print(f"✗ Unexpected: merge succeeded with result type {type(result)}")
60+
return False
61+
except TypeError as e:
62+
error_msg = str(e)
63+
print(f"\nError caught: {type(e).__name__}")
64+
print(f"Error message: {error_msg}")
65+
66+
# Check if error message is helpful
67+
if "polars" in error_msg.lower() and "pandas" in error_msg.lower():
68+
print("\n✓ GOOD: Error message mentions both polars and pandas")
69+
print("✓ GOOD: User knows what went wrong")
70+
return True
71+
elif "must be" in error_msg.lower() or "expected" in error_msg.lower():
72+
print("\n✓ GOOD: Error message explains what's expected")
73+
return True
74+
else:
75+
print(f"\n✗ BAD: Error message is not helpful enough")
76+
print(f" Expected something like:")
77+
print(f" 'other must be pandas.DataFrame, received: polars.DataFrame'")
78+
print(f" But got: {error_msg}")
79+
return False
80+
except Exception as e:
81+
print(f"\n✗ Unexpected error type: {type(e).__name__}")
82+
print(f" {e}")
83+
return False
84+
85+
86+
def test_merge_pandas_baseline():
87+
"""
88+
Baseline test: merge two pandas DataFrames should work.
89+
"""
90+
print("\n" + "=" * 70)
91+
print("Test: Merging two pandas DataFrames (baseline)")
92+
print("=" * 70)
93+
94+
df1 = pd.DataFrame({
95+
'key': ['a', 'b', 'c'],
96+
'value_x': [1, 2, 3]
97+
})
98+
99+
df2 = pd.DataFrame({
100+
'key': ['a', 'b', 'c'],
101+
'value_y': [10, 20, 30]
102+
})
103+
104+
try:
105+
result = pd.merge(df1, df2, on='key')
106+
print(f"✓ Merge succeeded")
107+
print(f" Result shape: {result.shape}")
108+
print(f" Result columns: {list(result.columns)}")
109+
return True
110+
except Exception as e:
111+
print(f"✗ Baseline test failed: {e}")
112+
return False
113+
114+
115+
if __name__ == "__main__":
116+
print("\n" + "=" * 70)
117+
print("PANDAS ISSUE #61434 - REPRODUCTION TEST")
118+
print("=" * 70)
119+
print()
120+
121+
baseline_ok = test_merge_pandas_baseline()
122+
polars_test_ok = test_merge_with_polars()
123+
124+
print("\n" + "=" * 70)
125+
print("SUMMARY")
126+
print("=" * 70)
127+
print(f"Baseline (pandas merge): {'✓ PASS' if baseline_ok else '✗ FAIL'}")
128+
print(f"Polars test (error msg): {'✓ GOOD' if polars_test_ok else '✗ NEEDS FIX'}")
129+
print()

test_issue_61434_tests.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
"""
2+
Regression tests for issue #61434: Improved error message for incompatible merge types
3+
4+
Tests that:
5+
1. Merging with polars.DataFrame raises TypeError with helpful message
6+
2. Merging with other incompatible types also gets helpful messages
7+
3. Normal pandas merges still work correctly
8+
"""
9+
10+
import pytest
11+
import pandas as pd
12+
from pandas import DataFrame, Series
13+
import pandas._testing as tm
14+
15+
16+
class TestMergeIncompatibleTypes:
17+
"""Test merge error messages with incompatible DataFrame types."""
18+
19+
def test_merge_with_polars_dataframe(self):
20+
"""
21+
Test that merging with polars.DataFrame raises helpful TypeError.
22+
23+
Regression test for issue #61434.
24+
"""
25+
pytest.importorskip("polars")
26+
import polars as pl
27+
28+
pdf = DataFrame({
29+
"key": ["a", "b", "c"],
30+
"value_x": [1, 2, 3]
31+
})
32+
33+
plf = pl.DataFrame({
34+
"key": ["a", "b", "c"],
35+
"value_y": [10, 20, 30]
36+
})
37+
38+
with pytest.raises(TypeError, match=".*polars.*pandas.*"):
39+
pd.merge(pdf, plf, on="key")
40+
41+
def test_merge_polars_to_pandas_conversion(self):
42+
"""
43+
Test that converting polars to pandas works.
44+
45+
Shows the workaround mentioned in error message.
46+
"""
47+
pytest.importorskip("polars")
48+
import polars as pl
49+
50+
pdf = DataFrame({
51+
"key": ["a", "b", "c"],
52+
"value_x": [1, 2, 3]
53+
})
54+
55+
plf = pl.DataFrame({
56+
"key": ["a", "b", "c"],
57+
"value_y": [10, 20, 30]
58+
})
59+
60+
# Convert polars to pandas - this should work
61+
plf_pd = plf.to_pandas()
62+
result = pd.merge(pdf, plf_pd, on="key")
63+
64+
expected = DataFrame({
65+
"key": ["a", "b", "c"],
66+
"value_x": [1, 2, 3],
67+
"value_y": [10, 20, 30]
68+
})
69+
70+
tm.assert_frame_equal(result, expected)
71+
72+
def test_merge_with_dict(self):
73+
"""Test that merging with dict raises TypeError with helpful message."""
74+
df = DataFrame({"key": ["a", "b"], "value": [1, 2]})
75+
76+
dict_obj = {"key": ["a", "b"], "value": [3, 4]}
77+
78+
with pytest.raises(TypeError, match=".*dict.*"):
79+
pd.merge(df, dict_obj, on="key")
80+
81+
def test_merge_with_list(self):
82+
"""Test that merging with list raises TypeError with helpful message."""
83+
df = DataFrame({"key": ["a", "b"], "value": [1, 2]})
84+
85+
list_obj = [["a", 1], ["b", 2]]
86+
87+
msg = "Can only merge Series or DataFrame objects"
88+
89+
with pytest.raises(TypeError, match=msg):
90+
pd.merge(df, list_obj, on="key")
91+
92+
def test_merge_pandas_baseline(self):
93+
"""
94+
Test that normal pandas merge still works.
95+
96+
Baseline test to ensure fix doesn't break existing functionality.
97+
"""
98+
df1 = DataFrame({
99+
"key": ["a", "b", "c"],
100+
"value_x": [1, 2, 3]
101+
})
102+
103+
df2 = DataFrame({
104+
"key": ["a", "b", "c"],
105+
"value_y": [10, 20, 30]
106+
})
107+
108+
result = pd.merge(df1, df2, on="key")
109+
110+
expected = DataFrame({
111+
"key": ["a", "b", "c"],
112+
"value_x": [1, 2, 3],
113+
"value_y": [10, 20, 30]
114+
})
115+
116+
tm.assert_frame_equal(result, expected)
117+
118+
def test_merge_with_series_name(self):
119+
"""Test that merging with named Series works (baseline)."""
120+
df = DataFrame({"key": ["a", "b", "c"], "value_x": [1, 2, 3]})
121+
s = Series([10, 20, 30], name="value_y")
122+
123+
result = pd.merge(df, s, left_index=True, right_index=True)
124+
125+
expected = DataFrame({
126+
"key": ["a", "b", "c"],
127+
"value_x": [1, 2, 3],
128+
"value_y": [10, 20, 30]
129+
})
130+
131+
tm.assert_frame_equal(result, expected)
132+
133+
def test_merge_with_unnamed_series(self):
134+
"""Test that merging with unnamed Series raises helpful error."""
135+
df = DataFrame({"key": ["a", "b", "c"], "value": [1, 2, 3]})
136+
s = Series([10, 20, 30]) # No name
137+
138+
msg = "Cannot merge a Series without a name"
139+
140+
with pytest.raises(ValueError, match=msg):
141+
pd.merge(df, s, left_index=True, right_index=True)

0 commit comments

Comments
 (0)