1+ #!/usr/bin/env python3
2+ """
3+ Working test script that reproduces the exact failing test cases
4+ """
5+
6+ import pandas as pd
7+ import pyarrow as pa
8+ from pandas .core .arrays import ArrowExtensionArray
9+
10+ def test_timestamp ():
11+ print ("=== Testing Timestamp Case ===" )
12+
13+ # Create timestamp
14+ timestamps = pd .to_datetime (['2020-01-01 01:01:01.000001' ]).tz_localize ('US/Eastern' )
15+
16+ # Create with nanosecond precision like in the failing test
17+ arrow_dtype = pd .ArrowDtype (pa .timestamp ('ns' , tz = 'US/Eastern' ))
18+ data_missing = ArrowExtensionArray ._from_sequence ([pd .NA , timestamps [0 ]], dtype = arrow_dtype )
19+
20+ print ("Original array:" )
21+ print (f" dtype: { data_missing .dtype } " )
22+ print (f" pyarrow_dtype: { data_missing .dtype .pyarrow_dtype } " )
23+ print (f" unit: { data_missing .dtype .pyarrow_dtype .unit } " )
24+ print (f" timezone: { data_missing .dtype .pyarrow_dtype .tz } " )
25+ print (f" values: { data_missing } " )
26+ print ()
27+
28+ # Test the map operation that's failing
29+ print ("Testing map operation:" )
30+ result = data_missing .map (lambda x : x , na_action = 'ignore' )
31+
32+ print ("Result array:" )
33+ print (f" dtype: { result .dtype } " )
34+ print (f" pyarrow_dtype: { result .dtype .pyarrow_dtype } " )
35+ print (f" unit: { result .dtype .pyarrow_dtype .unit } " )
36+ print (f" timezone: { result .dtype .pyarrow_dtype .tz } " )
37+ print (f" values: { result } " )
38+ print ()
39+
40+ # Check if they're equal (this is what the test is checking)
41+ dtypes_equal = data_missing .dtype == result .dtype
42+ print (f"Timestamp dtypes equal: { dtypes_equal } " )
43+
44+ if not dtypes_equal :
45+ print ("❌ TIMESTAMP TEST WOULD FAIL!" )
46+ print (f"Expected: { data_missing .dtype } " )
47+ print (f"Got: { result .dtype } " )
48+ else :
49+ print ("✅ Timestamp test would pass!" )
50+
51+ return dtypes_equal
52+
53+ def test_integer ():
54+ print ("\n === Testing Integer Case ===" )
55+
56+ # Create integer array like in the failing test
57+ int_dtype = pd .ArrowDtype (pa .int64 ())
58+ data_missing = ArrowExtensionArray ._from_sequence ([pd .NA , 1 ], dtype = int_dtype )
59+
60+ print ("Original array:" )
61+ print (f" dtype: { data_missing .dtype } " )
62+ print (f" pyarrow_dtype: { data_missing .dtype .pyarrow_dtype } " )
63+ print (f" values: { data_missing } " )
64+ print (f" _pa_array.type: { data_missing ._pa_array .type } " )
65+ print ()
66+
67+ # Test the map operation
68+ print ("Testing map operation:" )
69+ result = data_missing .map (lambda x : x , na_action = 'ignore' )
70+
71+ print ("Result array:" )
72+ print (f" dtype: { result .dtype } " )
73+ print (f" pyarrow_dtype: { result .dtype .pyarrow_dtype } " )
74+ print (f" values: { result } " )
75+ print ()
76+
77+ # Check if they're equal
78+ dtypes_equal = data_missing .dtype == result .dtype
79+ print (f"Integer dtypes equal: { dtypes_equal } " )
80+
81+ if not dtypes_equal :
82+ print ("❌ INTEGER TEST WOULD FAIL!" )
83+ print (f"Expected: { data_missing .dtype } " )
84+ print (f"Got: { result .dtype } " )
85+ else :
86+ print ("✅ Integer test would pass!" )
87+
88+ return dtypes_equal
89+
90+ def test_cast_pointwise_directly ():
91+ print ("\n === Testing _cast_pointwise_result directly ===" )
92+
93+ # Test with timestamp
94+ print ("Testing timestamp cast:" )
95+ timestamps = pd .to_datetime (['2020-01-01 01:01:01.000001' ]).tz_localize ('US/Eastern' )
96+ arrow_dtype_ns = pd .ArrowDtype (pa .timestamp ('ns' , tz = 'US/Eastern' ))
97+ data_ns = ArrowExtensionArray ._from_sequence ([pd .NA , timestamps [0 ]], dtype = arrow_dtype_ns )
98+
99+ arrow_dtype_us = pd .ArrowDtype (pa .timestamp ('us' , tz = 'US/Eastern' ))
100+ data_us = ArrowExtensionArray ._from_sequence ([pd .NA , timestamps [0 ]], dtype = arrow_dtype_us )
101+
102+ print (f"Original (ns): { data_ns .dtype } " )
103+ print (f"Wrong (us): { data_us .dtype } " )
104+
105+ try :
106+ fixed_result = data_ns ._cast_pointwise_result (data_us )
107+ print (f"Fixed result: { fixed_result .dtype } " )
108+ print (f"Timestamp fix works: { data_ns .dtype == fixed_result .dtype } " )
109+ except Exception as e :
110+ print (f"Timestamp cast error: { e } " )
111+
112+ # Test with integer
113+ print ("\n Testing integer cast:" )
114+ int_dtype = pd .ArrowDtype (pa .int64 ())
115+ data_int = ArrowExtensionArray ._from_sequence ([pd .NA , 1 ], dtype = int_dtype )
116+
117+ double_dtype = pd .ArrowDtype (pa .float64 ())
118+ data_double = ArrowExtensionArray ._from_sequence ([pd .NA , 1.0 ], dtype = double_dtype )
119+
120+ print (f"Original (int64): { data_int .dtype } " )
121+ print (f"Wrong (double): { data_double .dtype } " )
122+
123+ try :
124+ fixed_result = data_int ._cast_pointwise_result (data_double )
125+ print (f"Fixed result: { fixed_result .dtype } " )
126+ print (f"Integer fix works: { data_int .dtype == fixed_result .dtype } " )
127+ except Exception as e :
128+ print (f"Integer cast error: { e } " )
129+
130+ def debug_pa_array_creation ():
131+ print ("\n === Debugging pa.array() behavior ===" )
132+
133+ # Test what happens when we create pa.array from integer values
134+ values_int = [None , 1 ]
135+ values_float = [None , 1.0 ]
136+
137+ print ("Testing pa.array with integer values:" )
138+ arr_int = pa .array (values_int , from_pandas = True )
139+ print (f" Input: { values_int } " )
140+ print (f" Result type: { arr_int .type } " )
141+
142+ print ("Testing pa.array with float values:" )
143+ arr_float = pa .array (values_float , from_pandas = True )
144+ print (f" Input: { values_float } " )
145+ print (f" Result type: { arr_float .type } " )
146+
147+ # Test mixed values (this might be the issue)
148+ mixed_values = [pd .NA , 1 ]
149+ print ("Testing pa.array with mixed NA/int values:" )
150+ arr_mixed = pa .array (mixed_values , from_pandas = True )
151+ print (f" Input: { mixed_values } " )
152+ print (f" Result type: { arr_mixed .type } " )
153+
154+ if __name__ == "__main__" :
155+ print ("Testing Arrow dtype preservation issues..." )
156+ print ("=" * 60 )
157+
158+ # Run all tests
159+ ts_pass = test_timestamp ()
160+ int_pass = test_integer ()
161+ test_cast_pointwise_directly ()
162+ debug_pa_array_creation ()
163+
164+ print ("\n " + "=" * 60 )
165+ print ("SUMMARY:" )
166+ print (f"Timestamp test: { '✅ PASS' if ts_pass else '❌ FAIL' } " )
167+ print (f"Integer test: { '✅ PASS' if int_pass else '❌ FAIL' } " )
0 commit comments