Skip to content

Commit e091b64

Browse files
committed
fix(dtypes): ensure consistent behavior of is_string_dtype for Categorical
- Handle Categorical series and CategoricalDtype consistently in is_string_dtype - Add tests to verify consistent results for various Categorical scenarios
1 parent 23aae9f commit e091b64

File tree

3 files changed

+62
-2
lines changed

3 files changed

+62
-2
lines changed

pandas/core/dtypes/common.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,6 +635,16 @@ def is_string_dtype(arr_or_dtype) -> bool:
635635
>>> is_string_dtype(pd.Series([1, 2], dtype=object))
636636
False
637637
"""
638+
# Handle Categorical series and CategoricalDtype consistently
639+
# - both should return False
640+
if hasattr(arr_or_dtype, "dtype") and isinstance(
641+
arr_or_dtype.dtype, CategoricalDtype
642+
):
643+
return False
644+
645+
if isinstance(arr_or_dtype, CategoricalDtype):
646+
return False
647+
638648
if hasattr(arr_or_dtype, "dtype") and _get_dtype(arr_or_dtype).kind == "O":
639649
return is_all_strings(arr_or_dtype)
640650

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import numpy as np
2+
3+
from pandas.core.dtypes.common import is_string_dtype
4+
from pandas.core.dtypes.dtypes import CategoricalDtype
5+
6+
import pandas as pd
7+
8+
9+
def test_is_string_dtype_categorical_consistency():
10+
"""Test that is_string_dtype returns consistent results for
11+
Categorical series and dtype."""
12+
# Test with CategoricalDtype directly
13+
categorical_dtype = CategoricalDtype()
14+
assert not is_string_dtype(categorical_dtype)
15+
16+
# Test with Series containing Categorical
17+
categorical_series = pd.Series(pd.Categorical(["a", "b", "c"]))
18+
assert not is_string_dtype(categorical_series)
19+
20+
# Test with ordered CategoricalDtype
21+
ordered_categorical_dtype = CategoricalDtype(ordered=True)
22+
assert not is_string_dtype(ordered_categorical_dtype)
23+
24+
# Test with Series containing ordered Categorical
25+
ordered_categorical_series = pd.Series(
26+
pd.Categorical(["a", "b", "c"], ordered=True)
27+
)
28+
assert not is_string_dtype(ordered_categorical_series)
29+
30+
# Test with CategoricalDtype with specific categories
31+
specific_categorical_dtype = CategoricalDtype(categories=["x", "y", "z"])
32+
assert not is_string_dtype(specific_categorical_dtype)
33+
34+
# Test with Series containing Categorical with specific categories
35+
specific_categorical_series = pd.Series(
36+
pd.Categorical(["x", "y", "z"], categories=["x", "y", "z"])
37+
)
38+
assert not is_string_dtype(specific_categorical_series)
39+
40+
# Test with empty Categorical
41+
empty_categorical = pd.Series(pd.Categorical([]))
42+
assert not is_string_dtype(empty_categorical)
43+
44+
# Test with Categorical containing NaN values
45+
nan_categorical = pd.Series(pd.Categorical([np.nan, "a", "b"]))
46+
assert not is_string_dtype(nan_categorical)
47+
48+
# Test with numeric Categorical
49+
numeric_categorical = pd.Series(pd.Categorical([1, 2, 3]))
50+
assert not is_string_dtype(numeric_categorical)

pandas/tests/frame/test_query_eval.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def test_query_duplicate_column_name(self, engine, parser):
168168
}
169169
).rename(columns={"B": "A"})
170170

171-
res = df.query('C == 1', engine=engine, parser=parser)
171+
res = df.query("C == 1", engine=engine, parser=parser)
172172

173173
expect = DataFrame(
174174
[[1, 1, 1]],
@@ -1411,7 +1411,7 @@ def test_expr_with_column_name_with_backtick_and_hash(self):
14111411
def test_expr_with_column_name_with_backtick(self):
14121412
# GH 59285
14131413
df = DataFrame({"a`b": (1, 2, 3), "ab": (4, 5, 6)})
1414-
result = df.query("`a``b` < 2") # noqa
1414+
result = df.query("`a``b` < 2")
14151415
# Note: Formatting checks may wrongly consider the above ``inline code``.
14161416
expected = df[df["a`b"] < 2]
14171417
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)