From 261209fc57751adc71b36931fb0ada50a39ac806 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Sat, 26 Jul 2025 11:52:27 +0200
Subject: [PATCH 1/7] BUG: fix .str.isdigit to honor unicode superscript for
 older pyarrow

---
 doc/source/whatsnew/v2.3.2.rst             | 3 ++-
 pandas/core/arrays/_arrow_string_mixins.py | 7 +++++++
 pandas/tests/strings/test_strings.py       | 7 ++++---
 3 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v2.3.2.rst b/doc/source/whatsnew/v2.3.2.rst
index faa61cf4bd3bc..88bd63d8942ea 100644
--- a/doc/source/whatsnew/v2.3.2.rst
+++ b/doc/source/whatsnew/v2.3.2.rst
@@ -22,7 +22,8 @@ become the default string dtype in pandas 3.0. See
 
 Bug fixes
 ^^^^^^^^^
--
+- Fix :meth:`~Series.str.isdigit` to correctly recognize unicode superscript
+  characters as digits for :class:`StringDtype` backed by PyArrow (:issue:`61466`)
 
 .. ---------------------------------------------------------------------------
 .. _whatsnew_232.contributors:
diff --git a/pandas/core/arrays/_arrow_string_mixins.py b/pandas/core/arrays/_arrow_string_mixins.py
index 07cbf489cfe1c..ad91d60aae922 100644
--- a/pandas/core/arrays/_arrow_string_mixins.py
+++ b/pandas/core/arrays/_arrow_string_mixins.py
@@ -15,6 +15,7 @@
     HAS_PYARROW,
     pa_version_under13p0,
     pa_version_under17p0,
+    pa_version_under21p0,
 )
 
 if HAS_PYARROW:
@@ -261,6 +262,12 @@ def _str_isdecimal(self):
         return self._convert_bool_result(result)
 
     def _str_isdigit(self):
+        if pa_version_under21p0:
+            # https://github.com/pandas-dev/pandas/issues/61466
+            res_list = self._apply_elementwise(str.isdigit)
+            return self._convert_bool_result(
+                pa.chunked_array(res_list, type=pa.bool_())
+            )
         result = pc.utf8_is_digit(self._pa_array)
         return self._convert_bool_result(result)
 
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 025f837982595..2ed00703212ca 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -240,8 +240,9 @@ def test_ismethods(method, expected, any_string_dtype):
 @pytest.mark.parametrize(
     "method, expected",
     [
-        ("isnumeric", [False, True, True, False, True, True, False]),
-        ("isdecimal", [False, True, False, False, False, True, False]),
+        ("isnumeric", [False, True, True, True, False, True, True, False]),
+        ("isdecimal", [False, True, False, False, False, False, True, False]),
+        ("isdigit", [False, True, True, False, False, False, True, False]),
     ],
 )
 def test_isnumeric_unicode(method, expected, any_string_dtype):
@@ -250,7 +251,7 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
     # 0x1378: ፸ ETHIOPIC NUMBER SEVENTY
     # 0xFF13: ３ Em 3  # noqa: RUF003
     ser = Series(
-        ["A", "3", "¼", "★", "፸", "３", "four"],  # noqa: RUF001
+        ["A", "3", "³", "¼", "★", "፸", "３", "four"],  # noqa: RUF001
         dtype=any_string_dtype,
     )
     expected_dtype = (

From cf26a930329248a8856d105bfa1291030d85711d Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 13 Aug 2025 22:09:25 +0200
Subject: [PATCH 2/7] update test

---
 pandas/tests/strings/test_strings.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 6c751be0e31b5..932dc187932b0 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -6,12 +6,15 @@
 import numpy as np
 import pytest
 
+from pandas.compat import pa_version_under21p0
+
 from pandas import (
     NA,
     DataFrame,
     Index,
     MultiIndex,
     Series,
+    StringDtype,
     option_context,
 )
 import pandas._testing as tm
@@ -264,6 +267,16 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
         "bool" if is_object_or_nan_string_dtype(any_string_dtype) else "boolean"
     )
     expected = Series(expected, dtype=expected_dtype)
+    if (
+        method == "isdigit"
+        and isinstance(ser.dtype, StringDtype)
+        and ser.dtype.storage == "pyarrow"
+        and not pa_version_under21p0
+    ):
+        # known difference in behavior between python and pyarrow unicode handling
+        # pyarrow 21+ considers ¼ as a digit, while python does not
+        expected.iloc[3] = True
+
     result = getattr(ser.str, method)()
     tm.assert_series_equal(result, expected)
 

From 8349551d191f0d54aff612ba193021a24c811c44 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 19 Aug 2025 20:52:39 +0200
Subject: [PATCH 3/7] update test

---
 pandas/tests/strings/test_strings.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 932dc187932b0..24a15c86375a7 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -274,8 +274,9 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
         and not pa_version_under21p0
     ):
         # known difference in behavior between python and pyarrow unicode handling
-        # pyarrow 21+ considers ¼ as a digit, while python does not
+        # pyarrow 21+ considers ¼ and ፸ as a digit, while python does not
         expected.iloc[3] = True
+        expected.iloc[5] = True
 
     result = getattr(ser.str, method)()
     tm.assert_series_equal(result, expected)

From 7cd79a5ec48aeb5e2a59216a3ab5d4fadb8b54aa Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 20 Aug 2025 09:23:53 +0200
Subject: [PATCH 4/7] update test

---
 pandas/tests/strings/test_strings.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 24a15c86375a7..036c3cc2d132a 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -282,8 +282,13 @@ def test_isnumeric_unicode(method, expected, any_string_dtype):
     tm.assert_series_equal(result, expected)
 
     # compare with standard library
-    expected = [getattr(item, method)() for item in ser]
-    assert list(result) == expected
+    # (only for non-pyarrow storage given the above differences)
+    if any_string_dtype == "object" or (
+        isinstance(any_string_dtype, StringDtype)
+        and any_string_dtype.storage == "python"
+    ):
+        expected = [getattr(item, method)() for item in ser]
+        assert list(result) == expected
 
 
 @pytest.mark.parametrize(

From 71223d32e7007d66f4abd60fc16b4bc7e77e0bd3 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Sep 2025 22:47:29 +0200
Subject: [PATCH 5/7] update docstring and add note about differences in
 behaviour

---
 pandas/core/strings/accessor.py      | 13 +++++++++++--
 pandas/tests/strings/test_strings.py |  6 ++++--
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 21e6e2efbe778..45f5c3cb533a8 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -3610,10 +3610,19 @@ def casefold(self):
     >>> s3 = pd.Series(['23', '³', '⅕', ''])
     >>> s3.str.isdigit()
     0     True
-    1    False
-    2    False
+    1     True
+    2     True
     3    False
     dtype: bool
+
+    Notes
+    -----
+    The exact behavior of this method, i.e. which unicode characters are
+    considered as digits, depends on the backend used for string operations,
+    and there can be small differences.
+    For example, Python considers the ³ superscript character as a digit, but
+    not the ⅕ fraction character, while PyArrow considers both as digits. For
+    simple (ascii) decimal numbers, the behaviour is consistent.
     """
 
     _shared_docs["isspace"] = """
diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index fc0dd23334706..20dbb6068ae08 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -7,8 +7,10 @@
 import numpy as np
 import pytest
 
-from pandas.compat import pa_version_under21p0
-from pandas.errors import Pandas4Warning
+from pandas.compat import (
+    Pandas4Warning,
+    pa_version_under21p0,
+)
 
 from pandas import (
     NA,

From c2318fb3ccf9a1515b19443129aaaebfc932cf8e Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Wed, 24 Sep 2025 23:00:46 +0200
Subject: [PATCH 6/7] fixup merge

---
 pandas/tests/strings/test_strings.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/strings/test_strings.py b/pandas/tests/strings/test_strings.py
index 20dbb6068ae08..fc0dd23334706 100644
--- a/pandas/tests/strings/test_strings.py
+++ b/pandas/tests/strings/test_strings.py
@@ -7,10 +7,8 @@
 import numpy as np
 import pytest
 
-from pandas.compat import (
-    Pandas4Warning,
-    pa_version_under21p0,
-)
+from pandas.compat import pa_version_under21p0
+from pandas.errors import Pandas4Warning
 
 from pandas import (
     NA,

From 8220a4a60ed107464ab4dd9c84cfaaa6631febf5 Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Thu, 25 Sep 2025 14:28:59 +0200
Subject: [PATCH 7/7] switch order of docstring sections

---
 pandas/core/strings/accessor.py | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py
index 45f5c3cb533a8..b78ea3a9bf883 100644
--- a/pandas/core/strings/accessor.py
+++ b/pandas/core/strings/accessor.py
@@ -3602,11 +3602,21 @@ def casefold(self):
     Series.str.isupper : Check whether all characters are uppercase.
     Series.str.istitle : Check whether all characters are titlecase.
 
-    Examples
-    --------
+    Notes
+    -----
     Similar to ``str.isdecimal`` but also includes special digits, like
     superscripted and subscripted digits in unicode.
 
+    The exact behavior of this method, i.e. which unicode characters are
+    considered as digits, depends on the backend used for string operations,
+    and there can be small differences.
+    For example, Python considers the ³ superscript character as a digit, but
+    not the ⅕ fraction character, while PyArrow considers both as digits. For
+    simple (ascii) decimal numbers, the behaviour is consistent.
+
+    Examples
+    --------
+
     >>> s3 = pd.Series(['23', '³', '⅕', ''])
     >>> s3.str.isdigit()
     0     True
@@ -3614,15 +3624,6 @@ def casefold(self):
     2     True
     3    False
     dtype: bool
-
-    Notes
-    -----
-    The exact behavior of this method, i.e. which unicode characters are
-    considered as digits, depends on the backend used for string operations,
-    and there can be small differences.
-    For example, Python considers the ³ superscript character as a digit, but
-    not the ⅕ fraction character, while PyArrow considers both as digits. For
-    simple (ascii) decimal numbers, the behaviour is consistent.
     """
 
     _shared_docs["isspace"] = """