From 7bc3b08d0fc5f7889d2084b7bdf785a76167855a Mon Sep 17 00:00:00 2001 From: thanhlecongg Date: Tue, 11 Nov 2025 14:35:50 +1100 Subject: [PATCH 1/4] Add check againts dtype of string in Categorical (#63045) --- pandas/core/arrays/categorical.py | 2 +- pandas/tests/arrays/categorical/test_repr.py | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c6e01096ad158..d40c9e8cbcde2 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2280,7 +2280,7 @@ def _repr_categories(self) -> list[str]: from pandas.io.formats import format as fmt formatter = None - if self.categories.dtype == "str": + if self.categories.dtype == "str" or self.categories.dtype == "string": # the extension array formatter defaults to boxed=True in format_array # override here to boxed=False to be consistent with QUOTE_NONNUMERIC formatter = cast(ExtensionArray, self.categories._values)._formatter( diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 60af3bafb62b2..26f29f366a5c2 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -545,3 +545,10 @@ def test_categorical_str_repr(self): result = repr(Categorical([1, "2", 3, 4])) expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" assert result == expected + + def test_categorical_with_pandas_series(self): + # GH 63045 + s = Series(["apple", "banana", "cherry", "cherry"], dtype="string") + result =repr(Categorical(s)) + expected = "['apple', 'banana', 'cherry', 'cherry']\nCategories (3, string): ['apple', 'banana', 'cherry']" + assert result == expected From f69cfe61a65a754c9646ef4355dd7baef9375b6c Mon Sep 17 00:00:00 2001 From: thanhlecongg Date: Tue, 11 Nov 2025 14:50:24 +1100 Subject: [PATCH 2/4] update docs and ignore E501 in added tests --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/arrays/categorical/test_repr.py | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 65982ecdb810c..a6d169ef77074 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1032,13 +1032,13 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the ``categories.dtype`` is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings) (:issue:`61778`) +- Bug in :class:`pandas.Categorical` displaying string categories without quotes when constructed from a Series with dtype "string" (:issue:`63045`) - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) - Bug in :func:`bdate_range` raising ``ValueError`` with frequency ``freq="cbh"`` (:issue:`62849`) - Bug in :func:`testing.assert_index_equal` raising ``TypeError`` instead of ``AssertionError`` for incomparable ``CategoricalIndex`` when ``check_categorical=True`` and ``exact=False`` (:issue:`61935`) - Bug in :meth:`Categorical.astype` where ``copy=False`` would still trigger a copy of the codes (:issue:`62000`) - Bug in :meth:`DataFrame.pivot` and :meth:`DataFrame.set_index` raising an ``ArrowNotImplementedError`` for columns with pyarrow dictionary dtype (:issue:`53051`) - Bug in :meth:`Series.convert_dtypes` with ``dtype_backend="pyarrow"`` where empty :class:`CategoricalDtype` :class:`Series` raised an error or got converted to ``null[pyarrow]`` (:issue:`59934`) -- Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 26f29f366a5c2..6b0c2bc228420 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -545,10 +545,11 @@ def test_categorical_str_repr(self): result = repr(Categorical([1, "2", 3, 4])) expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" assert result == expected - + def test_categorical_with_pandas_series(self): # GH 63045 s = Series(["apple", "banana", "cherry", "cherry"], dtype="string") - result =repr(Categorical(s)) - expected = "['apple', 'banana', 'cherry', 'cherry']\nCategories (3, string): ['apple', 'banana', 'cherry']" + result = repr(Categorical(s)) + expected = "['apple', 'banana', 'cherry', 'cherry']\nCategories (3, string): ['apple', 'banana', 'cherry']" # noqa: E501 + assert result == expected From 37f2867984182b6b72d1e42ee300a1bd11faae1a Mon Sep 17 00:00:00 2001 From: thanhlecongg Date: Wed, 12 Nov 2025 22:22:55 +1100 Subject: [PATCH 3/4] update tests and docs to be more generalizable with all string like dtypes --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/tests/arrays/categorical/test_repr.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a6d169ef77074..847b567175a57 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1032,7 +1032,7 @@ Bug fixes Categorical ^^^^^^^^^^^ - Bug in :class:`Categorical` where constructing from a pandas :class:`Series` or :class:`Index` with ``dtype='object'`` did not preserve the categories' dtype as ``object``; now the ``categories.dtype`` is preserved as ``object`` for these cases, while numpy arrays and Python sequences with ``dtype='object'`` continue to infer the most specific dtype (for example, ``str`` if all elements are strings) (:issue:`61778`) -- Bug in :class:`pandas.Categorical` displaying string categories without quotes when constructed from a Series with dtype "string" (:issue:`63045`) +- Bug in :class:`pandas.Categorical` displaying string categories without quotes when using "string" dtype (:issue:`63045`) - Bug in :func:`Series.apply` where ``nan`` was ignored for :class:`CategoricalDtype` (:issue:`59938`) - Bug in :func:`bdate_range` raising ``ValueError`` with frequency ``freq="cbh"`` (:issue:`62849`) - Bug in :func:`testing.assert_index_equal` raising ``TypeError`` instead of ``AssertionError`` for incomparable ``CategoricalIndex`` when ``check_categorical=True`` and ``exact=False`` (:issue:`61935`) diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 6b0c2bc228420..9e955b4ac618a 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -546,10 +546,12 @@ def test_categorical_str_repr(self): expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" assert result == expected - def test_categorical_with_pandas_series(self): + def test_categorical_with_pandas_series(self, string_dtype_no_object): # GH 63045 - s = Series(["apple", "banana", "cherry", "cherry"], dtype="string") + s = Series( + ["apple", "banana", "cherry", "cherry"], dtype=string_dtype_no_object + ) result = repr(Categorical(s)) - expected = "['apple', 'banana', 'cherry', 'cherry']\nCategories (3, string): ['apple', 'banana', 'cherry']" # noqa: E501 + expected = f"['apple', 'banana', 'cherry', 'cherry']\nCategories (3, {string_dtype_no_object!s}): ['apple', 'banana', 'cherry']" # noqa: E501 assert result == expected From 8f845129b15bcf3267cb506444ef8ebb6bb73477 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 13 Nov 2025 11:53:41 +0100 Subject: [PATCH 4/4] Apply suggestion from @jorisvandenbossche --- pandas/tests/arrays/categorical/test_repr.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_repr.py b/pandas/tests/arrays/categorical/test_repr.py index 9e955b4ac618a..ebbfbfd96d48d 100644 --- a/pandas/tests/arrays/categorical/test_repr.py +++ b/pandas/tests/arrays/categorical/test_repr.py @@ -546,8 +546,8 @@ def test_categorical_str_repr(self): expected = "[1, '2', 3, 4]\nCategories (4, object): [1, 3, 4, '2']" assert result == expected - def test_categorical_with_pandas_series(self, string_dtype_no_object): - # GH 63045 + def test_categorical_with_string_dtype(self, string_dtype_no_object): + # GH 63045 - ensure categories are quoted for string dtypes s = Series( ["apple", "banana", "cherry", "cherry"], dtype=string_dtype_no_object )