11import numpy as np
22import pytest
33
4- from pandas ._config import using_string_dtype
5-
64from pandas .compat import HAS_PYARROW
75from pandas .errors import SettingWithCopyWarning
86
@@ -953,15 +951,19 @@ def test_head_tail(method, using_copy_on_write, warn_copy_on_write):
953951 tm .assert_frame_equal (df , df_orig )
954952
955953
956- @pytest .mark .xfail (using_string_dtype () and HAS_PYARROW , reason = "TODO(infer_string)" )
957- def test_infer_objects (using_copy_on_write ):
958- df = DataFrame ({"a" : [1 , 2 ], "b" : "c" , "c" : 1 , "d" : "x" })
954+ def test_infer_objects (using_copy_on_write , using_infer_string ):
955+ df = DataFrame (
956+ {"a" : [1 , 2 ], "b" : Series (["x" , "y" ], dtype = object ), "c" : 1 , "d" : "x" }
957+ )
959958 df_orig = df .copy ()
960959 df2 = df .infer_objects ()
961960
962961 if using_copy_on_write :
963962 assert np .shares_memory (get_array (df2 , "a" ), get_array (df , "a" ))
964- assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
963+ if using_infer_string :
964+ assert not tm .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
965+ else :
966+ assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
965967
966968 else :
967969 assert not np .shares_memory (get_array (df2 , "a" ), get_array (df , "a" ))
@@ -975,19 +977,16 @@ def test_infer_objects(using_copy_on_write):
975977 tm .assert_frame_equal (df , df_orig )
976978
977979
978- @pytest .mark .xfail (
979- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)"
980- )
981- def test_infer_objects_no_reference (using_copy_on_write ):
980+ def test_infer_objects_no_reference (using_copy_on_write , using_infer_string ):
982981 df = DataFrame (
983982 {
984983 "a" : [1 , 2 ],
985- "b" : "c" ,
984+ "b" : Series ([ "x" , "y" ], dtype = object ) ,
986985 "c" : 1 ,
987986 "d" : Series (
988987 [Timestamp ("2019-12-31" ), Timestamp ("2020-12-31" )], dtype = "object"
989988 ),
990- "e" : "b" ,
989+ "e" : Series ([ "z" , "w" ], dtype = object ) ,
991990 }
992991 )
993992 df = df .infer_objects ()
@@ -1001,16 +1000,22 @@ def test_infer_objects_no_reference(using_copy_on_write):
10011000 df .iloc [0 , 3 ] = Timestamp ("2018-12-31" )
10021001 if using_copy_on_write :
10031002 assert np .shares_memory (arr_a , get_array (df , "a" ))
1004- # TODO(CoW): Block splitting causes references here
1005- assert not np .shares_memory (arr_b , get_array (df , "b" ))
1003+ if using_infer_string :
1004+ # note that the underlying memory of arr_b has been copied anyway
1005+ # because of the assignment, but the EA is updated inplace so still
1006+ # appears the share memory
1007+ assert tm .shares_memory (arr_b , get_array (df , "b" ))
1008+ else :
1009+ # TODO(CoW): Block splitting causes references here
1010+ assert not np .shares_memory (arr_b , get_array (df , "b" ))
10061011 assert np .shares_memory (arr_d , get_array (df , "d" ))
10071012
10081013
1009- def test_infer_objects_reference (using_copy_on_write ):
1014+ def test_infer_objects_reference (using_copy_on_write , using_infer_string ):
10101015 df = DataFrame (
10111016 {
10121017 "a" : [1 , 2 ],
1013- "b" : "c" ,
1018+ "b" : Series ([ "x" , "y" ], dtype = object ) ,
10141019 "c" : 1 ,
10151020 "d" : Series (
10161021 [Timestamp ("2019-12-31" ), Timestamp ("2020-12-31" )], dtype = "object"
@@ -1029,7 +1034,8 @@ def test_infer_objects_reference(using_copy_on_write):
10291034 df .iloc [0 , 3 ] = Timestamp ("2018-12-31" )
10301035 if using_copy_on_write :
10311036 assert not np .shares_memory (arr_a , get_array (df , "a" ))
1032- assert not np .shares_memory (arr_b , get_array (df , "b" ))
1037+ if not using_infer_string or HAS_PYARROW :
1038+ assert not np .shares_memory (arr_b , get_array (df , "b" ))
10331039 assert np .shares_memory (arr_d , get_array (df , "d" ))
10341040
10351041
@@ -1184,15 +1190,14 @@ def test_sort_values_inplace(using_copy_on_write, obj, kwargs, warn_copy_on_writ
11841190 assert np .shares_memory (get_array (obj , "a" ), get_array (view , "a" ))
11851191
11861192
1187- @pytest .mark .xfail (using_string_dtype () and HAS_PYARROW , reason = "TODO(infer_string)" )
11881193@pytest .mark .parametrize ("decimals" , [- 1 , 0 , 1 ])
11891194def test_round (using_copy_on_write , warn_copy_on_write , decimals ):
11901195 df = DataFrame ({"a" : [1 , 2 ], "b" : "c" })
11911196 df_orig = df .copy ()
11921197 df2 = df .round (decimals = decimals )
11931198
11941199 if using_copy_on_write :
1195- assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
1200+ assert tm .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
11961201 # TODO: Make inplace by using out parameter of ndarray.round?
11971202 if decimals >= 0 :
11981203 # Ensure lazy copy if no-op
0 commit comments