66import numpy as np
77import pytest
88
9- from pandas ._config import using_string_dtype
10-
119import pandas as pd
1210from pandas import (
1311 DataFrame ,
@@ -30,7 +28,6 @@ def mix_abc() -> dict[str, list[float | str]]:
3028
3129
3230class TestDataFrameReplace :
33- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
3431 def test_replace_inplace (self , datetime_frame , float_string_frame ):
3532 datetime_frame .loc [datetime_frame .index [:5 ], "A" ] = np .nan
3633 datetime_frame .loc [datetime_frame .index [- 5 :], "A" ] = np .nan
@@ -46,7 +43,9 @@ def test_replace_inplace(self, datetime_frame, float_string_frame):
4643 mf .iloc [- 10 :, mf .columns .get_loc ("A" )] = np .nan
4744
4845 result = float_string_frame .replace (np .nan , 0 )
49- expected = float_string_frame .fillna (value = 0 )
46+ expected = float_string_frame .copy ()
47+ expected ["foo" ] = expected ["foo" ].astype (object )
48+ expected = expected .fillna (value = 0 )
5049 tm .assert_frame_equal (result , expected )
5150
5251 tsframe = datetime_frame .copy ()
@@ -298,20 +297,22 @@ def test_regex_replace_dict_nested_non_first_character(
298297 tm .assert_frame_equal (result , expected )
299298
300299 def test_regex_replace_dict_nested_gh4115 (self ):
301- df = DataFrame ({"Type" : ["Q" , "T" , "Q" , "Q" , "T" ], "tmp" : 2 })
302- expected = DataFrame ({"Type" : [0 , 1 , 0 , 0 , 1 ], "tmp" : 2 })
300+ df = DataFrame (
301+ {"Type" : Series (["Q" , "T" , "Q" , "Q" , "T" ], dtype = object ), "tmp" : 2 }
302+ )
303+ expected = DataFrame ({"Type" : Series ([0 , 1 , 0 , 0 , 1 ], dtype = object ), "tmp" : 2 })
303304 msg = "Downcasting behavior in `replace`"
304305 with tm .assert_produces_warning (FutureWarning , match = msg ):
305306 result = df .replace ({"Type" : {"Q" : 0 , "T" : 1 }})
307+
306308 tm .assert_frame_equal (result , expected )
307309
308- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
309310 def test_regex_replace_list_to_scalar (self , mix_abc ):
310311 df = DataFrame (mix_abc )
311312 expec = DataFrame (
312313 {
313314 "a" : mix_abc ["a" ],
314- "b" : np . array ([np .nan ] * 4 ),
315+ "b" : Series ([np .nan ] * 4 , dtype = "str" ),
315316 "c" : [np .nan , np .nan , np .nan , "d" ],
316317 }
317318 )
@@ -334,7 +335,6 @@ def test_regex_replace_list_to_scalar(self, mix_abc):
334335 tm .assert_frame_equal (res2 , expec )
335336 tm .assert_frame_equal (res3 , expec )
336337
337- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
338338 def test_regex_replace_str_to_numeric (self , mix_abc ):
339339 # what happens when you try to replace a numeric value with a regex?
340340 df = DataFrame (mix_abc )
@@ -346,11 +346,12 @@ def test_regex_replace_str_to_numeric(self, mix_abc):
346346 return_value = res3 .replace (regex = r"\s*\.\s*" , value = 0 , inplace = True )
347347 assert return_value is None
348348 expec = DataFrame ({"a" : mix_abc ["a" ], "b" : ["a" , "b" , 0 , 0 ], "c" : mix_abc ["c" ]})
349+ # TODO(infer_string)
350+ expec ["c" ] = expec ["c" ].astype (object )
349351 tm .assert_frame_equal (res , expec )
350352 tm .assert_frame_equal (res2 , expec )
351353 tm .assert_frame_equal (res3 , expec )
352354
353- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
354355 def test_regex_replace_regex_list_to_numeric (self , mix_abc ):
355356 df = DataFrame (mix_abc )
356357 res = df .replace ([r"\s*\.\s*" , "b" ], 0 , regex = True )
@@ -566,21 +567,28 @@ def test_replace_convert(self):
566567 res = rep .dtypes
567568 tm .assert_series_equal (expec , res )
568569
569- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
570570 def test_replace_mixed (self , float_string_frame ):
571571 mf = float_string_frame
572572 mf .iloc [5 :20 , mf .columns .get_loc ("foo" )] = np .nan
573573 mf .iloc [- 10 :, mf .columns .get_loc ("A" )] = np .nan
574574
575575 result = float_string_frame .replace (np .nan , - 18 )
576- expected = float_string_frame .fillna (value = - 18 )
576+ expected = float_string_frame .copy ()
577+ expected ["foo" ] = expected ["foo" ].astype (object )
578+ expected = expected .fillna (value = - 18 )
577579 tm .assert_frame_equal (result , expected )
578- tm .assert_frame_equal (result .replace (- 18 , np .nan ), float_string_frame )
580+ expected2 = float_string_frame .copy ()
581+ expected2 ["foo" ] = expected2 ["foo" ].astype (object )
582+ tm .assert_frame_equal (result .replace (- 18 , np .nan ), expected2 )
579583
580584 result = float_string_frame .replace (np .nan , - 1e8 )
581- expected = float_string_frame .fillna (value = - 1e8 )
585+ expected = float_string_frame .copy ()
586+ expected ["foo" ] = expected ["foo" ].astype (object )
587+ expected = expected .fillna (value = - 1e8 )
582588 tm .assert_frame_equal (result , expected )
583- tm .assert_frame_equal (result .replace (- 1e8 , np .nan ), float_string_frame )
589+ expected2 = float_string_frame .copy ()
590+ expected2 ["foo" ] = expected2 ["foo" ].astype (object )
591+ tm .assert_frame_equal (result .replace (- 1e8 , np .nan ), expected2 )
584592
585593 def test_replace_mixed_int_block_upcasting (self ):
586594 # int block upcasting
@@ -641,7 +649,7 @@ def test_replace_mixed2(self, using_infer_string):
641649
642650 expected = DataFrame (
643651 {
644- "A" : Series (["foo" , "bar" ]),
652+ "A" : Series (["foo" , "bar" ], dtype = "object" ),
645653 "B" : Series ([0 , "foo" ], dtype = "object" ),
646654 }
647655 )
@@ -958,15 +966,16 @@ def test_replace_limit(self):
958966 # TODO
959967 pass
960968
961- def test_replace_dict_no_regex (self ):
969+ def test_replace_dict_no_regex (self , any_string_dtype ):
962970 answer = Series (
963971 {
964972 0 : "Strongly Agree" ,
965973 1 : "Agree" ,
966974 2 : "Neutral" ,
967975 3 : "Disagree" ,
968976 4 : "Strongly Disagree" ,
969- }
977+ },
978+ dtype = any_string_dtype ,
970979 )
971980 weights = {
972981 "Agree" : 4 ,
@@ -981,15 +990,16 @@ def test_replace_dict_no_regex(self):
981990 result = answer .replace (weights )
982991 tm .assert_series_equal (result , expected )
983992
984- def test_replace_series_no_regex (self ):
993+ def test_replace_series_no_regex (self , any_string_dtype ):
985994 answer = Series (
986995 {
987996 0 : "Strongly Agree" ,
988997 1 : "Agree" ,
989998 2 : "Neutral" ,
990999 3 : "Disagree" ,
9911000 4 : "Strongly Disagree" ,
992- }
1001+ },
1002+ dtype = any_string_dtype ,
9931003 )
9941004 weights = Series (
9951005 {
@@ -1087,16 +1097,15 @@ def test_nested_dict_overlapping_keys_replace_str(self):
10871097 expected = df .replace ({"a" : dict (zip (astr , bstr ))})
10881098 tm .assert_frame_equal (result , expected )
10891099
1090- @pytest .mark .xfail (using_string_dtype (), reason = "can't set float into string" )
1091- def test_replace_swapping_bug (self , using_infer_string ):
1100+ def test_replace_swapping_bug (self ):
10921101 df = DataFrame ({"a" : [True , False , True ]})
10931102 res = df .replace ({"a" : {True : "Y" , False : "N" }})
1094- expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]})
1103+ expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]}, dtype = object )
10951104 tm .assert_frame_equal (res , expect )
10961105
10971106 df = DataFrame ({"a" : [0 , 1 , 0 ]})
10981107 res = df .replace ({"a" : {0 : "Y" , 1 : "N" }})
1099- expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]})
1108+ expect = DataFrame ({"a" : ["Y" , "N" , "Y" ]}, dtype = object )
11001109 tm .assert_frame_equal (res , expect )
11011110
11021111 def test_replace_period (self ):
@@ -1372,7 +1381,7 @@ def test_replace_commutative(self, df, to_replace, exp):
13721381 )
13731382 def test_replace_replacer_dtype (self , replacer ):
13741383 # GH26632
1375- df = DataFrame (["a" ])
1384+ df = DataFrame (["a" ], dtype = object )
13761385 msg = "Downcasting behavior in `replace` "
13771386 with tm .assert_produces_warning (FutureWarning , match = msg ):
13781387 result = df .replace ({"a" : replacer , "b" : replacer })
@@ -1489,6 +1498,7 @@ def test_replace_value_category_type(self):
14891498 input_df = input_df .replace ("obj1" , "obj9" )
14901499 result = input_df .replace ("cat2" , "catX" )
14911500
1501+ result = result .astype ({"col1" : "int64" , "col3" : "float64" , "col5" : "str" })
14921502 tm .assert_frame_equal (result , expected )
14931503
14941504 def test_replace_dict_category_type (self ):
@@ -1650,6 +1660,14 @@ def test_replace_regex_dtype_frame(self, regex):
16501660 expected_df2 = DataFrame ({"A" : [1 ], "B" : ["1" ]})
16511661 with tm .assert_produces_warning (FutureWarning , match = msg ):
16521662 result_df2 = df2 .replace (to_replace = "0" , value = 1 , regex = regex )
1663+
1664+ if regex :
1665+ # TODO(infer_string): both string columns get cast to object,
1666+ # while only needed for column A
1667+ expected_df2 = DataFrame ({"A" : [1 ], "B" : ["1" ]}, dtype = object )
1668+ else :
1669+ expected_df2 = DataFrame ({"A" : Series ([1 ], dtype = object ), "B" : ["1" ]})
1670+ result_df2 = df2 .replace (to_replace = "0" , value = 1 , regex = regex )
16531671 tm .assert_frame_equal (result_df2 , expected_df2 )
16541672
16551673 def test_replace_with_value_also_being_replaced (self ):
0 commit comments