33import numpy as np
44import pytest
55
6- from pandas ._config import using_string_dtype
7-
86import pandas as pd
97from pandas import (
108 DataFrame ,
2119def df ():
2220 res = DataFrame (
2321 np .random .default_rng (2 ).standard_normal ((10 , 4 )),
24- columns = Index (list ("ABCD" ), dtype = object ),
22+ columns = Index (list ("ABCD" )),
2523 index = date_range ("2000-01-01" , periods = 10 , freq = "B" ),
2624 )
2725 res ["id1" ] = (res ["A" ] > 0 ).astype (np .int64 )
@@ -83,7 +81,6 @@ def test_default_col_names(self, df):
8381 result2 = df .melt (id_vars = ["id1" , "id2" ])
8482 assert result2 .columns .tolist () == ["id1" , "id2" , "variable" , "value" ]
8583
86- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
8784 def test_value_vars (self , df ):
8885 result3 = df .melt (id_vars = ["id1" , "id2" ], value_vars = "A" )
8986 assert len (result3 ) == 10
@@ -100,7 +97,6 @@ def test_value_vars(self, df):
10097 )
10198 tm .assert_frame_equal (result4 , expected4 )
10299
103- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
104100 @pytest .mark .parametrize ("type_" , (tuple , list , np .array ))
105101 def test_value_vars_types (self , type_ , df ):
106102 # GH 15348
@@ -181,7 +177,6 @@ def test_tuple_vars_fail_with_multiindex(self, id_vars, value_vars, df1):
181177 with pytest .raises (ValueError , match = msg ):
182178 df1 .melt (id_vars = id_vars , value_vars = value_vars )
183179
184- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
185180 def test_custom_var_name (self , df , var_name ):
186181 result5 = df .melt (var_name = var_name )
187182 assert result5 .columns .tolist () == ["var" , "value" ]
@@ -209,7 +204,6 @@ def test_custom_var_name(self, df, var_name):
209204 )
210205 tm .assert_frame_equal (result9 , expected9 )
211206
212- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
213207 def test_custom_value_name (self , df , value_name ):
214208 result10 = df .melt (value_name = value_name )
215209 assert result10 .columns .tolist () == ["variable" , "val" ]
@@ -239,7 +233,6 @@ def test_custom_value_name(self, df, value_name):
239233 )
240234 tm .assert_frame_equal (result14 , expected14 )
241235
242- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
243236 def test_custom_var_and_value_name (self , df , value_name , var_name ):
244237 result15 = df .melt (var_name = var_name , value_name = value_name )
245238 assert result15 .columns .tolist () == ["var" , "val" ]
@@ -364,14 +357,15 @@ def test_melt_missing_columns_raises(self):
364357 with pytest .raises (KeyError , match = msg ):
365358 multi .melt (["A" ], ["F" ], col_level = 0 )
366359
367- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
368360 def test_melt_mixed_int_str_id_vars (self ):
369361 # GH 29718
370362 df = DataFrame ({0 : ["foo" ], "a" : ["bar" ], "b" : [1 ], "d" : [2 ]})
371363 result = melt (df , id_vars = [0 , "a" ], value_vars = ["b" , "d" ])
372364 expected = DataFrame (
373365 {0 : ["foo" ] * 2 , "a" : ["bar" ] * 2 , "variable" : list ("bd" ), "value" : [1 , 2 ]}
374366 )
367+ # the df's columns are mixed type and thus object -> preserves object dtype
368+ expected ["variable" ] = expected ["variable" ].astype (object )
375369 tm .assert_frame_equal (result , expected )
376370
377371 def test_melt_mixed_int_str_value_vars (self ):
@@ -1205,12 +1199,10 @@ def test_raise_of_column_name_value(self):
12051199 ):
12061200 df .melt (id_vars = "value" , value_name = "value" )
12071201
1208- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" , strict = False )
1209- @pytest .mark .parametrize ("dtype" , ["O" , "string" ])
1210- def test_missing_stubname (self , dtype ):
1202+ def test_missing_stubname (self , any_string_dtype ):
12111203 # GH46044
12121204 df = DataFrame ({"id" : ["1" , "2" ], "a-1" : [100 , 200 ], "a-2" : [300 , 400 ]})
1213- df = df .astype ({"id" : dtype })
1205+ df = df .astype ({"id" : any_string_dtype })
12141206 result = wide_to_long (
12151207 df ,
12161208 stubnames = ["a" , "b" ],
@@ -1226,12 +1218,13 @@ def test_missing_stubname(self, dtype):
12261218 {"a" : [100 , 200 , 300 , 400 ], "b" : [np .nan ] * 4 },
12271219 index = index ,
12281220 )
1229- new_level = expected .index .levels [0 ].astype (dtype )
1221+ new_level = expected .index .levels [0 ].astype (any_string_dtype )
1222+ if any_string_dtype == "object" :
1223+ new_level = expected .index .levels [0 ].astype ("str" )
12301224 expected .index = expected .index .set_levels (new_level , level = 0 )
12311225 tm .assert_frame_equal (result , expected )
12321226
12331227
1234- @pytest .mark .xfail (using_string_dtype (), reason = "TODO(infer_string)" )
12351228def test_wide_to_long_pyarrow_string_columns ():
12361229 # GH 57066
12371230 pytest .importorskip ("pyarrow" )
@@ -1250,7 +1243,7 @@ def test_wide_to_long_pyarrow_string_columns():
12501243 )
12511244 expected = DataFrame (
12521245 [[1 , 1 ], [1 , 1 ], [1 , 2 ]],
1253- columns = Index (["D" , "R" ], dtype = object ),
1246+ columns = Index (["D" , "R" ]),
12541247 index = pd .MultiIndex .from_arrays (
12551248 [
12561249 [1 , 1 , 1 ],
0 commit comments