diff --git a/pandas/core/reshape/merge.py b/pandas/core/reshape/merge.py index 604181214ad44..14509f92fee55 100644 --- a/pandas/core/reshape/merge.py +++ b/pandas/core/reshape/merge.py @@ -3029,10 +3029,36 @@ def _validate_operand(obj: DataFrame | Series) -> DataFrame: return obj.to_frame() else: raise TypeError( - f"Can only merge Series or DataFrame objects, a {type(obj)} was passed" + _get_merge_error_message(obj) ) +def _get_merge_error_message(obj: object) -> str: + """Generate a helpful error message for invalid merge arguments.""" + obj_type_name = type(obj).__name__ + obj_module_name = type(obj).__module__ + + # Special handling for known DataFrame-like libraries + if obj_module_name == "polars.dataframe.frame" and obj_type_name == "DataFrame": + return ( + "Can only merge Series or DataFrame objects, received " + "polars.DataFrame. Please convert the polars DataFrame to a " + "pandas DataFrame using `.to_pandas()` or pass it to " + "pd.DataFrame()." + ) + elif "polars" in obj_module_name.lower(): + return ( + f"Can only merge Series or DataFrame objects, received " + f"{obj_module_name}.{obj_type_name} (a polars object). " + "Please convert to a pandas DataFrame using `.to_pandas()`." + ) + + return ( + f"Can only merge Series or DataFrame objects, received " + f"{obj_module_name}.{obj_type_name}. Expected a pandas Series or DataFrame." + ) + + def _items_overlap_with_suffix( left: Index, right: Index, suffixes: Suffixes ) -> tuple[Index, Index]: diff --git a/test_issue_61434_repro.py b/test_issue_61434_repro.py new file mode 100644 index 0000000000000..bb07bd612bd1e --- /dev/null +++ b/test_issue_61434_repro.py @@ -0,0 +1,129 @@ +""" +Pandas Issue #61434 - Reproduction Test + +Issue: When attempting to merge a pandas DataFrame with a polars DataFrame, +the error message is unhelpful. + +Current behavior: Generic error about missing attributes or type errors +Expected behavior: Clear message saying "other must be pandas.DataFrame, + received: polars.DataFrame" + +Snippet from issue #61434: +https://github.com/pandas-dev/pandas/issues/61434 +""" + +import pandas as pd + +# Try to import polars for testing +try: + import polars as pl + POLARS_AVAILABLE = True +except ImportError: + POLARS_AVAILABLE = False + print("Warning: polars not installed. Install with: pip install polars") + + +def test_merge_with_polars(): + """ + Reproduce the issue: Merging pandas DataFrame with polars DataFrame. + + Before fix: Generic/confusing error message + After fix: Clear message about type mismatch + """ + if not POLARS_AVAILABLE: + print("Skipping test - polars not available") + return False + + print("=" * 70) + print("Test: Merging pandas DataFrame with polars DataFrame") + print("=" * 70) + + # Create pandas DataFrame + pdf = pd.DataFrame({ + 'key': ['a', 'b', 'c'], + 'value_x': [1, 2, 3] + }) + + # Create polars DataFrame + plf = pl.DataFrame({ + 'key': ['a', 'b', 'c'], + 'value_y': [10, 20, 30] + }) + + print(f"\nPandas DataFrame type: {type(pdf)}") + print(f"Polars DataFrame type: {type(plf)}") + print("\nAttempting merge...") + + try: + result = pd.merge(pdf, plf, on='key') + print(f"✗ Unexpected: merge succeeded with result type {type(result)}") + return False + except TypeError as e: + error_msg = str(e) + print(f"\nError caught: {type(e).__name__}") + print(f"Error message: {error_msg}") + + # Check if error message is helpful + if "polars" in error_msg.lower() and "pandas" in error_msg.lower(): + print("\n✓ GOOD: Error message mentions both polars and pandas") + print("✓ GOOD: User knows what went wrong") + return True + elif "must be" in error_msg.lower() or "expected" in error_msg.lower(): + print("\n✓ GOOD: Error message explains what's expected") + return True + else: + print(f"\n✗ BAD: Error message is not helpful enough") + print(f" Expected something like:") + print(f" 'other must be pandas.DataFrame, received: polars.DataFrame'") + print(f" But got: {error_msg}") + return False + except Exception as e: + print(f"\n✗ Unexpected error type: {type(e).__name__}") + print(f" {e}") + return False + + +def test_merge_pandas_baseline(): + """ + Baseline test: merge two pandas DataFrames should work. + """ + print("\n" + "=" * 70) + print("Test: Merging two pandas DataFrames (baseline)") + print("=" * 70) + + df1 = pd.DataFrame({ + 'key': ['a', 'b', 'c'], + 'value_x': [1, 2, 3] + }) + + df2 = pd.DataFrame({ + 'key': ['a', 'b', 'c'], + 'value_y': [10, 20, 30] + }) + + try: + result = pd.merge(df1, df2, on='key') + print(f"✓ Merge succeeded") + print(f" Result shape: {result.shape}") + print(f" Result columns: {list(result.columns)}") + return True + except Exception as e: + print(f"✗ Baseline test failed: {e}") + return False + + +if __name__ == "__main__": + print("\n" + "=" * 70) + print("PANDAS ISSUE #61434 - REPRODUCTION TEST") + print("=" * 70) + print() + + baseline_ok = test_merge_pandas_baseline() + polars_test_ok = test_merge_with_polars() + + print("\n" + "=" * 70) + print("SUMMARY") + print("=" * 70) + print(f"Baseline (pandas merge): {'✓ PASS' if baseline_ok else '✗ FAIL'}") + print(f"Polars test (error msg): {'✓ GOOD' if polars_test_ok else '✗ NEEDS FIX'}") + print() diff --git a/test_issue_61434_tests.py b/test_issue_61434_tests.py new file mode 100644 index 0000000000000..22759666b83c7 --- /dev/null +++ b/test_issue_61434_tests.py @@ -0,0 +1,141 @@ +""" +Regression tests for issue #61434: Improved error message for incompatible merge types + +Tests that: +1. Merging with polars.DataFrame raises TypeError with helpful message +2. Merging with other incompatible types also gets helpful messages +3. Normal pandas merges still work correctly +""" + +import pytest +import pandas as pd +from pandas import DataFrame, Series +import pandas._testing as tm + + +class TestMergeIncompatibleTypes: + """Test merge error messages with incompatible DataFrame types.""" + + def test_merge_with_polars_dataframe(self): + """ + Test that merging with polars.DataFrame raises helpful TypeError. + + Regression test for issue #61434. + """ + pytest.importorskip("polars") + import polars as pl + + pdf = DataFrame({ + "key": ["a", "b", "c"], + "value_x": [1, 2, 3] + }) + + plf = pl.DataFrame({ + "key": ["a", "b", "c"], + "value_y": [10, 20, 30] + }) + + with pytest.raises(TypeError, match=".*polars.*pandas.*"): + pd.merge(pdf, plf, on="key") + + def test_merge_polars_to_pandas_conversion(self): + """ + Test that converting polars to pandas works. + + Shows the workaround mentioned in error message. + """ + pytest.importorskip("polars") + import polars as pl + + pdf = DataFrame({ + "key": ["a", "b", "c"], + "value_x": [1, 2, 3] + }) + + plf = pl.DataFrame({ + "key": ["a", "b", "c"], + "value_y": [10, 20, 30] + }) + + # Convert polars to pandas - this should work + plf_pd = plf.to_pandas() + result = pd.merge(pdf, plf_pd, on="key") + + expected = DataFrame({ + "key": ["a", "b", "c"], + "value_x": [1, 2, 3], + "value_y": [10, 20, 30] + }) + + tm.assert_frame_equal(result, expected) + + def test_merge_with_dict(self): + """Test that merging with dict raises TypeError with helpful message.""" + df = DataFrame({"key": ["a", "b"], "value": [1, 2]}) + + dict_obj = {"key": ["a", "b"], "value": [3, 4]} + + with pytest.raises(TypeError, match=".*dict.*"): + pd.merge(df, dict_obj, on="key") + + def test_merge_with_list(self): + """Test that merging with list raises TypeError with helpful message.""" + df = DataFrame({"key": ["a", "b"], "value": [1, 2]}) + + list_obj = [["a", 1], ["b", 2]] + + msg = "Can only merge Series or DataFrame objects" + + with pytest.raises(TypeError, match=msg): + pd.merge(df, list_obj, on="key") + + def test_merge_pandas_baseline(self): + """ + Test that normal pandas merge still works. + + Baseline test to ensure fix doesn't break existing functionality. + """ + df1 = DataFrame({ + "key": ["a", "b", "c"], + "value_x": [1, 2, 3] + }) + + df2 = DataFrame({ + "key": ["a", "b", "c"], + "value_y": [10, 20, 30] + }) + + result = pd.merge(df1, df2, on="key") + + expected = DataFrame({ + "key": ["a", "b", "c"], + "value_x": [1, 2, 3], + "value_y": [10, 20, 30] + }) + + tm.assert_frame_equal(result, expected) + + def test_merge_with_series_name(self): + """Test that merging with named Series works (baseline).""" + df = DataFrame({"key": ["a", "b", "c"], "value_x": [1, 2, 3]}) + s = Series([10, 20, 30], name="value_y") + + result = pd.merge(df, s, left_index=True, right_index=True) + + expected = DataFrame({ + "key": ["a", "b", "c"], + "value_x": [1, 2, 3], + "value_y": [10, 20, 30] + }) + + tm.assert_frame_equal(result, expected) + + def test_merge_with_unnamed_series(self): + """Test that merging with unnamed Series raises helpful error.""" + df = DataFrame({"key": ["a", "b", "c"], "value": [1, 2, 3]}) + s = Series([10, 20, 30]) # No name + + msg = "Cannot merge a Series without a name" + + with pytest.raises(ValueError, match=msg): + pd.merge(df, s, left_index=True, right_index=True) diff --git a/test_validation_61434.py b/test_validation_61434.py new file mode 100644 index 0000000000000..dd34c29510455 --- /dev/null +++ b/test_validation_61434.py @@ -0,0 +1,131 @@ +""" +Simple validation test for the fix - doesn't require full pandas installation +""" + +import sys +import ast + +def test_merge_py_syntax(): + """Verify merge.py has valid Python syntax.""" + with open('c:/noc_project/projects/pandas/pandas/core/reshape/merge.py', 'r') as f: + code = f.read() + + try: + ast.parse(code) + print("✓ merge.py syntax is valid") + return True + except SyntaxError as e: + print(f"✗ Syntax error in merge.py: {e}") + return False + + +def test_new_function_exists(): + """Verify the new _get_merge_error_message function exists.""" + with open('c:/noc_project/projects/pandas/pandas/core/reshape/merge.py', 'r') as f: + code = f.read() + + if 'def _get_merge_error_message' in code: + print("✓ Function _get_merge_error_message exists") + return True + else: + print("✗ Function _get_merge_error_message not found") + return False + + +def test_polars_special_handling(): + """Verify polars special handling exists.""" + with open('c:/noc_project/projects/pandas/pandas/core/reshape/merge.py', 'r') as f: + code = f.read() + + if 'polars.dataframe.frame' in code and 'to_pandas' in code: + print("✓ Polars special handling code found") + return True + else: + print("✗ Polars special handling not found") + return False + + +def test_error_message_improvement(): + """Verify the error message was improved.""" + with open('c:/noc_project/projects/pandas/pandas/core/reshape/merge.py', 'r') as f: + code = f.read() + + # Check that old generic message is gone + old_msg = 'f"Can only merge Series or DataFrame objects, a {type(obj)} was passed"' + + if old_msg in code: + print("✗ Old generic error message still present") + return False + + # Check that we're calling the new function + if '_get_merge_error_message(obj)' in code: + print("✓ Error message improved (now calls _get_merge_error_message)") + return True + else: + print("✗ Error message improvement not found") + return False + + +def test_regression_tests(): + """Verify regression test file has tests.""" + with open('c:/noc_project/projects/pandas/test_issue_61434_tests.py', 'r') as f: + code = f.read() + + required_tests = [ + 'test_merge_with_polars_dataframe', + 'test_merge_pandas_baseline', + 'test_merge_with_dict', + ] + + all_found = all(test in code for test in required_tests) + + if all_found: + print(f"✓ All required tests found: {required_tests}") + return True + else: + missing = [t for t in required_tests if t not in code] + print(f"✗ Missing tests: {missing}") + return False + + +if __name__ == "__main__": + print("=" * 70) + print("VALIDATION TESTS FOR PANDAS ISSUE #61434 FIX") + print("=" * 70) + print() + + tests = [ + ("Syntax validation", test_merge_py_syntax), + ("New function exists", test_new_function_exists), + ("Polars handling", test_polars_special_handling), + ("Error message improved", test_error_message_improvement), + ("Regression tests", test_regression_tests), + ] + + results = [] + for name, test_func in tests: + print(f"Testing: {name}") + try: + result = test_func() + results.append((name, result)) + except Exception as e: + print(f"✗ Test failed with error: {e}") + results.append((name, False)) + print() + + print("=" * 70) + print("SUMMARY") + print("=" * 70) + for name, result in results: + status = "✓ PASS" if result else "✗ FAIL" + print(f"{status}: {name}") + + all_passed = all(result for _, result in results) + print() + + if all_passed: + print("✅ All validation tests PASSED - Ready to commit!") + sys.exit(0) + else: + print("❌ Some validation tests FAILED - Fix needed") + sys.exit(1)