@@ -425,7 +425,7 @@ def test_frame_multi_key_function_list():
425425 tm .assert_frame_equal (agged , expected )
426426
427427
428- def test_frame_multi_key_function_list_partial_failure ():
428+ def test_frame_multi_key_function_list_partial_failure (using_infer_string ):
429429 data = DataFrame (
430430 {
431431 "A" : [
@@ -476,6 +476,8 @@ def test_frame_multi_key_function_list_partial_failure():
476476 grouped = data .groupby (["A" , "B" ])
477477 funcs = ["mean" , "std" ]
478478 msg = re .escape ("agg function failed [how->mean,dtype->" )
479+ if using_infer_string :
480+ msg = "dtype 'str' does not support operation 'mean'"
479481 with pytest .raises (TypeError , match = msg ):
480482 grouped .agg (funcs )
481483
@@ -662,9 +664,11 @@ def test_groupby_multi_corner(df):
662664 tm .assert_frame_equal (agged , expected )
663665
664666
665- def test_raises_on_nuisance (df ):
667+ def test_raises_on_nuisance (df , using_infer_string ):
666668 grouped = df .groupby ("A" )
667669 msg = re .escape ("agg function failed [how->mean,dtype->" )
670+ if using_infer_string :
671+ msg = "dtype 'str' does not support operation 'mean'"
668672 with pytest .raises (TypeError , match = msg ):
669673 grouped .agg ("mean" )
670674 with pytest .raises (TypeError , match = msg ):
@@ -699,15 +703,18 @@ def test_keep_nuisance_agg(df, agg_function):
699703 ["sum" , "mean" , "prod" , "std" , "var" , "sem" , "median" ],
700704)
701705@pytest .mark .parametrize ("numeric_only" , [True , False ])
702- def test_omit_nuisance_agg (df , agg_function , numeric_only ):
706+ def test_omit_nuisance_agg (df , agg_function , numeric_only , using_infer_string ):
703707 # GH 38774, GH 38815
704708 grouped = df .groupby ("A" )
705709
706710 no_drop_nuisance = ("var" , "std" , "sem" , "mean" , "prod" , "median" )
707711 if agg_function in no_drop_nuisance and not numeric_only :
708712 # Added numeric_only as part of GH#46560; these do not drop nuisance
709713 # columns when numeric_only is False
710- if agg_function in ("std" , "sem" ):
714+ if using_infer_string :
715+ msg = f"dtype 'str' does not support operation '{ agg_function } '"
716+ klass = TypeError
717+ elif agg_function in ("std" , "sem" ):
711718 klass = ValueError
712719 msg = "could not convert string to float: 'one'"
713720 else :
@@ -728,16 +735,24 @@ def test_omit_nuisance_agg(df, agg_function, numeric_only):
728735 tm .assert_frame_equal (result , expected )
729736
730737
731- def test_raise_on_nuisance_python_single (df ):
738+ def test_raise_on_nuisance_python_single (df , using_infer_string ):
732739 # GH 38815
733740 grouped = df .groupby ("A" )
734- with pytest .raises (ValueError , match = "could not convert" ):
741+
742+ err = ValueError
743+ msg = "could not convert"
744+ if using_infer_string :
745+ err = TypeError
746+ msg = "dtype 'str' does not support operation 'skew'"
747+ with pytest .raises (err , match = msg ):
735748 grouped .skew ()
736749
737750
738- def test_raise_on_nuisance_python_multiple (three_group ):
751+ def test_raise_on_nuisance_python_multiple (three_group , using_infer_string ):
739752 grouped = three_group .groupby (["A" , "B" ])
740753 msg = re .escape ("agg function failed [how->mean,dtype->" )
754+ if using_infer_string :
755+ msg = "dtype 'str' does not support operation 'mean'"
741756 with pytest .raises (TypeError , match = msg ):
742757 grouped .agg ("mean" )
743758 with pytest .raises (TypeError , match = msg ):
@@ -775,12 +790,16 @@ def test_nonsense_func():
775790 df .groupby (lambda x : x + "foo" )
776791
777792
778- def test_wrap_aggregated_output_multindex (multiindex_dataframe_random_data ):
793+ def test_wrap_aggregated_output_multindex (
794+ multiindex_dataframe_random_data , using_infer_string
795+ ):
779796 df = multiindex_dataframe_random_data .T
780797 df ["baz" , "two" ] = "peekaboo"
781798
782799 keys = [np .array ([0 , 0 , 1 ]), np .array ([0 , 0 , 1 ])]
783800 msg = re .escape ("agg function failed [how->mean,dtype->" )
801+ if using_infer_string :
802+ msg = "dtype 'str' does not support operation 'mean'"
784803 with pytest .raises (TypeError , match = msg ):
785804 df .groupby (keys ).agg ("mean" )
786805 agged = df .drop (columns = ("baz" , "two" )).groupby (keys ).agg ("mean" )
@@ -960,8 +979,10 @@ def test_groupby_with_hier_columns():
960979
961980def test_grouping_ndarray (df ):
962981 grouped = df .groupby (df ["A" ].values )
982+ grouped2 = df .groupby (df ["A" ].rename (None ))
983+
963984 result = grouped .sum ()
964- expected = df . groupby ( df [ "A" ]. rename ( None )) .sum ()
985+ expected = grouped2 .sum ()
965986 tm .assert_frame_equal (result , expected )
966987
967988
@@ -1457,8 +1478,8 @@ def test_no_dummy_key_names(df):
14571478 result = df .groupby (df ["A" ].values ).sum ()
14581479 assert result .index .name is None
14591480
1460- result = df .groupby ([df ["A" ].values , df ["B" ].values ]).sum ()
1461- assert result .index .names == (None , None )
1481+ result2 = df .groupby ([df ["A" ].values , df ["B" ].values ]).sum ()
1482+ assert result2 .index .names == (None , None )
14621483
14631484
14641485def test_groupby_sort_multiindex_series ():
@@ -1761,6 +1782,7 @@ def get_categorical_invalid_expected():
17611782 is_per = isinstance (df .dtypes .iloc [0 ], pd .PeriodDtype )
17621783 is_dt64 = df .dtypes .iloc [0 ].kind == "M"
17631784 is_cat = isinstance (values , Categorical )
1785+ is_str = isinstance (df .dtypes .iloc [0 ], pd .StringDtype )
17641786
17651787 if (
17661788 isinstance (values , Categorical )
@@ -1785,13 +1807,15 @@ def get_categorical_invalid_expected():
17851807
17861808 if op in ["prod" , "sum" , "skew" ]:
17871809 # ops that require more than just ordered-ness
1788- if is_dt64 or is_cat or is_per :
1810+ if is_dt64 or is_cat or is_per or ( is_str and op != "sum" ) :
17891811 # GH#41291
17901812 # datetime64 -> prod and sum are invalid
17911813 if is_dt64 :
17921814 msg = "datetime64 type does not support"
17931815 elif is_per :
17941816 msg = "Period type does not support"
1817+ elif is_str :
1818+ msg = f"dtype 'str' does not support operation '{ op } '"
17951819 else :
17961820 msg = "category type does not support"
17971821 if op == "skew" :
@@ -2714,7 +2738,7 @@ def test_obj_with_exclusions_duplicate_columns():
27142738def test_groupby_numeric_only_std_no_result (numeric_only ):
27152739 # GH 51080
27162740 dicts_non_numeric = [{"a" : "foo" , "b" : "bar" }, {"a" : "car" , "b" : "dar" }]
2717- df = DataFrame (dicts_non_numeric )
2741+ df = DataFrame (dicts_non_numeric , dtype = object )
27182742 dfgb = df .groupby ("a" , as_index = False , sort = False )
27192743
27202744 if numeric_only :
@@ -2773,10 +2797,14 @@ def test_grouping_with_categorical_interval_columns():
27732797def test_groupby_sum_on_nan_should_return_nan (bug_var ):
27742798 # GH 24196
27752799 df = DataFrame ({"A" : [bug_var , bug_var , bug_var , np .nan ]})
2800+ if isinstance (bug_var , str ):
2801+ df = df .astype (object )
27762802 dfgb = df .groupby (lambda x : x )
27772803 result = dfgb .sum (min_count = 1 )
27782804
2779- expected_df = DataFrame ([bug_var , bug_var , bug_var , None ], columns = ["A" ])
2805+ expected_df = DataFrame (
2806+ [bug_var , bug_var , bug_var , None ], columns = ["A" ], dtype = df ["A" ].dtype
2807+ )
27802808 tm .assert_frame_equal (result , expected_df )
27812809
27822810
0 commit comments