@@ -3101,24 +3101,23 @@ def test_merge_categorical_key_recursion():
31013101
31023102def test_merge_pyarrow_datetime_duplicates ():
31033103 # GH#61926
3104- # Regression test for merge failing on pyarrow datetime columns with duplicates
31053104 pytest .importorskip ("pyarrow" )
31063105
3107- # Create datetime index
31083106 t = pd .date_range ("2025-07-06" , periods = 3 , freq = "h" )
3109-
3110- # Left dataframe: one row per timestamp
31113107 df1 = DataFrame ({"time" : t , "val1" : [1 , 2 , 3 ]})
31123108 df1 = df1 .convert_dtypes (dtype_backend = "pyarrow" )
31133109
3114- # Right dataframe: two rows per timestamp (duplicates)
31153110 df2 = DataFrame ({"time" : t .repeat (2 ), "val2" : [10 , 20 , 30 , 40 , 50 , 60 ]})
31163111 df2 = df2 .convert_dtypes (dtype_backend = "pyarrow" )
31173112
3118- # This should work without raising ValueError
31193113 result = merge (df1 , df2 , on = "time" , how = "left" )
31203114
3121- # Should return 6 rows (df1's 3 timestamps x 2 matches each from df2)
3122- assert len (result ) == 6
3123- assert result ["val1" ].tolist () == [1 , 1 , 2 , 2 , 3 , 3 ]
3124- assert result ["val2" ].tolist () == [10 , 20 , 30 , 40 , 50 , 60 ]
3115+ expected = DataFrame (
3116+ {
3117+ "time" : t .repeat (2 ),
3118+ "val1" : [1 , 1 , 2 , 2 , 3 , 3 ],
3119+ "val2" : [10 , 20 , 30 , 40 , 50 , 60 ],
3120+ }
3121+ )
3122+ expected = expected .convert_dtypes (dtype_backend = "pyarrow" )
3123+ tm .assert_frame_equal (result , expected )
0 commit comments