11import numpy as np
22import pytest
33
4- from pandas ._config import using_string_dtype
5-
64from pandas .compat import HAS_PYARROW
75
86import pandas as pd
@@ -716,14 +714,18 @@ def test_head_tail(method):
716714 tm .assert_frame_equal (df , df_orig )
717715
718716
719- @pytest .mark .xfail (using_string_dtype () and HAS_PYARROW , reason = "TODO(infer_string)" )
720- def test_infer_objects ():
721- df = DataFrame ({"a" : [1 , 2 ], "b" : "c" , "c" : 1 , "d" : "x" })
717+ def test_infer_objects (using_infer_string ):
718+ df = DataFrame (
719+ {"a" : [1 , 2 ], "b" : Series (["x" , "y" ], dtype = object ), "c" : 1 , "d" : "x" }
720+ )
722721 df_orig = df .copy ()
723722 df2 = df .infer_objects ()
724723
725724 assert np .shares_memory (get_array (df2 , "a" ), get_array (df , "a" ))
726- assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
725+ if using_infer_string and HAS_PYARROW :
726+ assert not tm .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
727+ else :
728+ assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
727729
728730 df2 .iloc [0 , 0 ] = 0
729731 df2 .iloc [0 , 1 ] = "d"
@@ -732,19 +734,16 @@ def test_infer_objects():
732734 tm .assert_frame_equal (df , df_orig )
733735
734736
735- @pytest .mark .xfail (
736- using_string_dtype () and not HAS_PYARROW , reason = "TODO(infer_string)"
737- )
738- def test_infer_objects_no_reference ():
737+ def test_infer_objects_no_reference (using_infer_string ):
739738 df = DataFrame (
740739 {
741740 "a" : [1 , 2 ],
742- "b" : "c" ,
741+ "b" : Series ([ "x" , "y" ], dtype = object ) ,
743742 "c" : 1 ,
744743 "d" : Series (
745744 [Timestamp ("2019-12-31" ), Timestamp ("2020-12-31" )], dtype = "object"
746745 ),
747- "e" : "b" ,
746+ "e" : Series ([ "z" , "w" ], dtype = object ) ,
748747 }
749748 )
750749 df = df .infer_objects ()
@@ -757,16 +756,22 @@ def test_infer_objects_no_reference():
757756 df .iloc [0 , 1 ] = "d"
758757 df .iloc [0 , 3 ] = Timestamp ("2018-12-31" )
759758 assert np .shares_memory (arr_a , get_array (df , "a" ))
760- # TODO(CoW): Block splitting causes references here
761- assert not np .shares_memory (arr_b , get_array (df , "b" ))
759+ if using_infer_string and HAS_PYARROW :
760+ # note that the underlying memory of arr_b has been copied anyway
761+ # because of the assignment, but the EA is updated inplace so still
762+ # appears the share memory
763+ assert tm .shares_memory (arr_b , get_array (df , "b" ))
764+ else :
765+ # TODO(CoW): Block splitting causes references here
766+ assert not np .shares_memory (arr_b , get_array (df , "b" ))
762767 assert np .shares_memory (arr_d , get_array (df , "d" ))
763768
764769
765770def test_infer_objects_reference ():
766771 df = DataFrame (
767772 {
768773 "a" : [1 , 2 ],
769- "b" : "c" ,
774+ "b" : Series ([ "x" , "y" ], dtype = object ) ,
770775 "c" : 1 ,
771776 "d" : Series (
772777 [Timestamp ("2019-12-31" ), Timestamp ("2020-12-31" )], dtype = "object"
@@ -904,14 +909,13 @@ def test_sort_values_inplace(obj, kwargs):
904909 tm .assert_equal (view , obj_orig )
905910
906911
907- @pytest .mark .xfail (using_string_dtype () and HAS_PYARROW , reason = "TODO(infer_string)" )
908912@pytest .mark .parametrize ("decimals" , [- 1 , 0 , 1 ])
909913def test_round (decimals ):
910914 df = DataFrame ({"a" : [1 , 2 ], "b" : "c" })
911915 df_orig = df .copy ()
912916 df2 = df .round (decimals = decimals )
913917
914- assert np .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
918+ assert tm .shares_memory (get_array (df2 , "b" ), get_array (df , "b" ))
915919 # TODO: Make inplace by using out parameter of ndarray.round?
916920 if decimals >= 0 :
917921 # Ensure lazy copy if no-op
0 commit comments