diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ae4be412c056f..293f1cb6f5e79 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -1008,8 +1008,8 @@ Conversion Strings ^^^^^^^ +- Bug in :meth:`Series.str.zfill` raising ``AttributeError`` for :class:`ArrowDtype` (:issue:`61485`) - Bug in :meth:`Series.value_counts` would not respect ``sort=False`` for series having ``string`` dtype (:issue:`55224`) -- Interval ^^^^^^^^ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 921621388e74a..7f0cd51e4fcc9 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -2808,6 +2808,13 @@ def _str_wrap(self, width: int, **kwargs) -> Self: result = self._apply_elementwise(predicate) return self._from_pyarrow_array(pa.chunked_array(result)) + def _str_zfill(self, width: int) -> Self: + # TODO: Replace with pc.utf8_zfill when supported by arrow + # Arrow ENH - https://github.com/apache/arrow/issues/46683 + predicate = lambda val: val.zfill(width) + result = self._apply_elementwise(predicate) + return type(self)(pa.chunked_array(result)) + @property def _dt_days(self) -> Self: return self._from_pyarrow_array( diff --git a/pandas/core/arrays/string_.py b/pandas/core/arrays/string_.py index 1be9af6e3d3b4..95abd9a953e24 100644 --- a/pandas/core/arrays/string_.py +++ b/pandas/core/arrays/string_.py @@ -1134,3 +1134,6 @@ def _cmp_method(self, other, op): return res_arr _arith_method = _cmp_method + + def _str_zfill(self, width: int) -> Self: + return self._str_map(lambda x: x.zfill(width)) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index b78ea3a9bf883..b9f3cd137bf82 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -1912,8 +1912,8 @@ def zfill(self, width: int): if not is_integer(width): msg = f"width must be of integer type, not {type(width).__name__}" raise TypeError(msg) - f = lambda x: x.zfill(width) - result = self._data.array._str_map(f) + + result = self._data.array._str_zfill(width) return self._wrap_result(result) def slice(self, start=None, stop=None, step=None): diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index ba35542b7f112..2afa4eb8efb76 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -544,3 +544,6 @@ def f(x): return empty_row return [f(val) for val in np.asarray(self)] + + def _str_zfill(self, width: int): + return self._str_map(lambda x: x.zfill(width)) diff --git a/pandas/tests/strings/test_string_array.py b/pandas/tests/strings/test_string_array.py index c5414022e664b..11fc3034cf290 100644 --- a/pandas/tests/strings/test_string_array.py +++ b/pandas/tests/strings/test_string_array.py @@ -110,3 +110,19 @@ def test_string_array_extract(nullable_string_dtype): result = result.astype(object) tm.assert_equal(result, expected) + + +@pytest.mark.parametrize( + "values, width, expected", + [ + (["a", "ab", "abc", None], 4, ["000a", "00ab", "0abc", None]), + (["1", "-1", "+1", None], 4, ["0001", "-001", "+001", None]), + (["1234", "-1234"], 3, ["1234", "-1234"]), + ], +) +def test_string_array_zfill(nullable_string_dtype, values, width, expected): + # GH #61485 + s = Series(values, dtype=nullable_string_dtype) + result = s.str.zfill(width) + expected = Series(expected, dtype=nullable_string_dtype) + tm.assert_series_equal(result, expected)