1717)
1818
1919
20+ def na_val (dtype ):
21+ if dtype .storage == "pyarrow_numpy" :
22+ return np .nan
23+ else :
24+ return pd .NA
25+
26+
2027@pytest .fixture
2128def dtype (string_storage ):
2229 """Fixture giving StringDtype from parametrized 'string_storage'"""
@@ -31,26 +38,34 @@ def cls(dtype):
3138
3239def test_repr (dtype ):
3340 df = pd .DataFrame ({"A" : pd .array (["a" , pd .NA , "b" ], dtype = dtype )})
34- expected = " A\n 0 a\n 1 <NA>\n 2 b"
41+ if dtype .storage == "pyarrow_numpy" :
42+ expected = " A\n 0 a\n 1 NaN\n 2 b"
43+ else :
44+ expected = " A\n 0 a\n 1 <NA>\n 2 b"
3545 assert repr (df ) == expected
3646
37- expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
47+ if dtype .storage == "pyarrow_numpy" :
48+ expected = "0 a\n 1 NaN\n 2 b\n Name: A, dtype: string"
49+ else :
50+ expected = "0 a\n 1 <NA>\n 2 b\n Name: A, dtype: string"
3851 assert repr (df .A ) == expected
3952
4053 if dtype .storage == "pyarrow" :
4154 arr_name = "ArrowStringArray"
55+ expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
4256 elif dtype .storage == "pyarrow_numpy" :
4357 arr_name = "ArrowStringArrayNumpySemantics"
58+ expected = f"<{ arr_name } >\n ['a', nan, 'b']\n Length: 3, dtype: string"
4459 else :
4560 arr_name = "StringArray"
46- expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
61+ expected = f"<{ arr_name } >\n ['a', <NA>, 'b']\n Length: 3, dtype: string"
4762 assert repr (df .A .array ) == expected
4863
4964
5065def test_none_to_nan (cls ):
5166 a = cls ._from_sequence (["a" , None , "b" ])
5267 assert a [1 ] is not None
53- assert a [1 ] is pd . NA
68+ assert a [1 ] is na_val ( a . dtype )
5469
5570
5671def test_setitem_validates (cls ):
@@ -205,13 +220,9 @@ def test_comparison_methods_scalar(comparison_op, dtype):
205220 other = "a"
206221 result = getattr (a , op_name )(other )
207222 if dtype .storage == "pyarrow_numpy" :
208- expected = np .array ([getattr (item , op_name )(other ) for item in a ], dtype = object )
209- expected = (
210- pd .array (expected , dtype = "boolean" )
211- .to_numpy (na_value = False )
212- .astype (np .bool_ )
213- )
214- tm .assert_numpy_array_equal (result , expected )
223+ expected = np .array ([getattr (item , op_name )(other ) for item in a ])
224+ expected [1 ] = False
225+ tm .assert_numpy_array_equal (result , expected .astype (np .bool_ ))
215226 else :
216227 expected_dtype = "boolean[pyarrow]" if dtype .storage == "pyarrow" else "boolean"
217228 expected = np .array ([getattr (item , op_name )(other ) for item in a ], dtype = object )
@@ -407,7 +418,7 @@ def test_min_max(method, skipna, dtype, request):
407418 expected = "a" if method == "min" else "c"
408419 assert result == expected
409420 else :
410- assert result is pd . NA
421+ assert result is na_val ( arr . dtype )
411422
412423
413424@pytest .mark .parametrize ("method" , ["min" , "max" ])
@@ -475,7 +486,7 @@ def test_arrow_roundtrip(dtype, string_storage2):
475486 expected = df .astype (f"string[{ string_storage2 } ]" )
476487 tm .assert_frame_equal (result , expected )
477488 # ensure the missing value is represented by NA and not np.nan or None
478- assert result .loc [2 , "a" ] is pd . NA
489+ assert result .loc [2 , "a" ] is na_val ( result [ "a" ]. dtype )
479490
480491
481492def test_arrow_load_from_zero_chunks (dtype , string_storage2 ):
@@ -573,7 +584,7 @@ def test_astype_from_float_dtype(float_dtype, dtype):
573584def test_to_numpy_returns_pdna_default (dtype ):
574585 arr = pd .array (["a" , pd .NA , "b" ], dtype = dtype )
575586 result = np .array (arr )
576- expected = np .array (["a" , pd . NA , "b" ], dtype = object )
587+ expected = np .array (["a" , na_val ( dtype ) , "b" ], dtype = object )
577588 tm .assert_numpy_array_equal (result , expected )
578589
579590
@@ -613,7 +624,7 @@ def test_setitem_scalar_with_mask_validation(dtype):
613624 mask = np .array ([False , True , False ])
614625
615626 ser [mask ] = None
616- assert ser .array [1 ] is pd . NA
627+ assert ser .array [1 ] is na_val ( ser . dtype )
617628
618629 # for other non-string we should also raise an error
619630 ser = pd .Series (["a" , "b" , "c" ], dtype = dtype )
0 commit comments