@@ -640,7 +640,7 @@ def test_frame_multi_key_function_list():
640640 tm .assert_frame_equal (agged , expected )
641641
642642
643- def test_frame_multi_key_function_list_partial_failure ():
643+ def test_frame_multi_key_function_list_partial_failure (using_infer_string ):
644644 data = DataFrame (
645645 {
646646 "A" : [
@@ -691,6 +691,8 @@ def test_frame_multi_key_function_list_partial_failure():
691691 grouped = data .groupby (["A" , "B" ])
692692 funcs = ["mean" , "std" ]
693693 msg = re .escape ("agg function failed [how->mean,dtype->" )
694+ if using_infer_string :
695+ msg = "dtype 'str' does not support operation 'mean'"
694696 with pytest .raises (TypeError , match = msg ):
695697 grouped .agg (funcs )
696698
@@ -981,9 +983,11 @@ def test_groupby_multi_corner(df):
981983 tm .assert_frame_equal (agged , expected )
982984
983985
984- def test_raises_on_nuisance (df ):
986+ def test_raises_on_nuisance (df , using_infer_string ):
985987 grouped = df .groupby ("A" )
986988 msg = re .escape ("agg function failed [how->mean,dtype->" )
989+ if using_infer_string :
990+ msg = "dtype 'str' does not support operation 'mean'"
987991 with pytest .raises (TypeError , match = msg ):
988992 grouped .agg ("mean" )
989993 with pytest .raises (TypeError , match = msg ):
@@ -1026,15 +1030,18 @@ def test_keep_nuisance_agg(df, agg_function):
10261030 ["sum" , "mean" , "prod" , "std" , "var" , "sem" , "median" ],
10271031)
10281032@pytest .mark .parametrize ("numeric_only" , [True , False ])
1029- def test_omit_nuisance_agg (df , agg_function , numeric_only ):
1033+ def test_omit_nuisance_agg (df , agg_function , numeric_only , using_infer_string ):
10301034 # GH 38774, GH 38815
10311035 grouped = df .groupby ("A" )
10321036
10331037 no_drop_nuisance = ("var" , "std" , "sem" , "mean" , "prod" , "median" )
10341038 if agg_function in no_drop_nuisance and not numeric_only :
10351039 # Added numeric_only as part of GH#46560; these do not drop nuisance
10361040 # columns when numeric_only is False
1037- if agg_function in ("std" , "sem" ):
1041+ if using_infer_string :
1042+ msg = f"dtype 'str' does not support operation '{ agg_function } '"
1043+ klass = TypeError
1044+ elif agg_function in ("std" , "sem" ):
10381045 klass = ValueError
10391046 msg = "could not convert string to float: 'one'"
10401047 else :
@@ -1055,16 +1062,24 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
10551062 tm .assert_frame_equal (result , expected )
10561063
10571064
1058- def test_raise_on_nuisance_python_single (df ):
1065+ def test_raise_on_nuisance_python_single (df , using_infer_string ):
10591066 # GH 38815
10601067 grouped = df .groupby ("A" )
1061- with pytest .raises (ValueError , match = "could not convert" ):
1068+
1069+ err = ValueError
1070+ msg = "could not convert"
1071+ if using_infer_string :
1072+ err = TypeError
1073+ msg = "dtype 'str' does not support operation 'skew'"
1074+ with pytest .raises (err , match = msg ):
10621075 grouped .skew ()
10631076
10641077
1065- def test_raise_on_nuisance_python_multiple (three_group ):
1078+ def test_raise_on_nuisance_python_multiple (three_group , using_infer_string ):
10661079 grouped = three_group .groupby (["A" , "B" ])
10671080 msg = re .escape ("agg function failed [how->mean,dtype->" )
1081+ if using_infer_string :
1082+ msg = "dtype 'str' does not support operation 'mean'"
10681083 with pytest .raises (TypeError , match = msg ):
10691084 grouped .agg ("mean" )
10701085 with pytest .raises (TypeError , match = msg ):
@@ -1102,12 +1117,16 @@ def test_nonsense_func():
11021117 df .groupby (lambda x : x + "foo" )
11031118
11041119
1105- def test_wrap_aggregated_output_multindex (multiindex_dataframe_random_data ):
1120+ def test_wrap_aggregated_output_multindex (
1121+ multiindex_dataframe_random_data , using_infer_string
1122+ ):
11061123 df = multiindex_dataframe_random_data .T
11071124 df ["baz" , "two" ] = "peekaboo"
11081125
11091126 keys = [np .array ([0 , 0 , 1 ]), np .array ([0 , 0 , 1 ])]
11101127 msg = re .escape ("agg function failed [how->mean,dtype->" )
1128+ if using_infer_string :
1129+ msg = "dtype 'str' does not support operation 'mean'"
11111130 with pytest .raises (TypeError , match = msg ):
11121131 df .groupby (keys ).agg ("mean" )
11131132 agged = df .drop (columns = ("baz" , "two" )).groupby (keys ).agg ("mean" )
@@ -1299,8 +1318,10 @@ def test_groupby_with_hier_columns():
12991318
13001319def test_grouping_ndarray (df ):
13011320 grouped = df .groupby (df ["A" ].values )
1321+ grouped2 = df .groupby (df ["A" ].rename (None ))
1322+
13021323 result = grouped .sum ()
1303- expected = df . groupby ( df [ "A" ]. rename ( None )) .sum ()
1324+ expected = grouped2 .sum ()
13041325 tm .assert_frame_equal (result , expected )
13051326
13061327
@@ -1793,8 +1814,8 @@ def test_no_dummy_key_names(df):
17931814 result = df .groupby (df ["A" ].values ).sum ()
17941815 assert result .index .name is None
17951816
1796- result = df .groupby ([df ["A" ].values , df ["B" ].values ]).sum ()
1797- assert result .index .names == (None , None )
1817+ result2 = df .groupby ([df ["A" ].values , df ["B" ].values ]).sum ()
1818+ assert result2 .index .names == (None , None )
17981819
17991820
18001821def test_groupby_sort_multiindex_series ():
@@ -2099,6 +2120,7 @@ def get_categorical_invalid_expected():
20992120 is_per = isinstance (df .dtypes .iloc [0 ], pd .PeriodDtype )
21002121 is_dt64 = df .dtypes .iloc [0 ].kind == "M"
21012122 is_cat = isinstance (values , Categorical )
2123+ is_str = isinstance (df .dtypes .iloc [0 ], pd .StringDtype )
21022124
21032125 if (
21042126 isinstance (values , Categorical )
@@ -2123,13 +2145,15 @@ def get_categorical_invalid_expected():
21232145
21242146 if op in ["prod" , "sum" , "skew" ]:
21252147 # ops that require more than just ordered-ness
2126- if is_dt64 or is_cat or is_per :
2148+ if is_dt64 or is_cat or is_per or ( is_str and op != "sum" ) :
21272149 # GH#41291
21282150 # datetime64 -> prod and sum are invalid
21292151 if is_dt64 :
21302152 msg = "datetime64 type does not support"
21312153 elif is_per :
21322154 msg = "Period type does not support"
2155+ elif is_str :
2156+ msg = f"dtype 'str' does not support operation '{ op } '"
21332157 else :
21342158 msg = "category type does not support"
21352159 if op == "skew" :
@@ -3083,7 +3107,7 @@ def test_obj_with_exclusions_duplicate_columns():
30833107def test_groupby_numeric_only_std_no_result (numeric_only ):
30843108 # GH 51080
30853109 dicts_non_numeric = [{"a" : "foo" , "b" : "bar" }, {"a" : "car" , "b" : "dar" }]
3086- df = DataFrame (dicts_non_numeric )
3110+ df = DataFrame (dicts_non_numeric , dtype = object )
30873111 dfgb = df .groupby ("a" , as_index = False , sort = False )
30883112
30893113 if numeric_only :
@@ -3142,10 +3166,14 @@ def test_grouping_with_categorical_interval_columns():
31423166def test_groupby_sum_on_nan_should_return_nan (bug_var ):
31433167 # GH 24196
31443168 df = DataFrame ({"A" : [bug_var , bug_var , bug_var , np .nan ]})
3169+ if isinstance (bug_var , str ):
3170+ df = df .astype (object )
31453171 dfgb = df .groupby (lambda x : x )
31463172 result = dfgb .sum (min_count = 1 )
31473173
3148- expected_df = DataFrame ([bug_var , bug_var , bug_var , None ], columns = ["A" ])
3174+ expected_df = DataFrame (
3175+ [bug_var , bug_var , bug_var , None ], columns = ["A" ], dtype = df ["A" ].dtype
3176+ )
31493177 tm .assert_frame_equal (result , expected_df )
31503178
31513179
0 commit comments