From d3ed71e947232da7d55b9b63d4649cf1cf899668 Mon Sep 17 00:00:00 2001 From: road Date: Tue, 12 Nov 2024 11:02:12 -0500 Subject: [PATCH 1/3] BUG: fixed .convert_dtypes timezone strip from tz-aware pyarrow timestamp Series (pandas-dev#60237) --- doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/dtypes/dtypes.py | 12 ------------ pandas/tests/copy_view/test_astype.py | 11 +++++++++++ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index de69166b8c196..03e64319ff791 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -649,6 +649,7 @@ Conversion - Bug in :meth:`DataFrame.update` bool dtype being converted to object (:issue:`55509`) - Bug in :meth:`Series.astype` might modify read-only array inplace when casting to a string dtype (:issue:`57212`) - Bug in :meth:`Series.reindex` not maintaining ``float32`` type when a ``reindex`` introduces a missing value (:issue:`45857`) +- Bug in :meth: 'Series.convert_dtype' strips the timezone on an already Timezone aware pyarrow timestamp dtype (:issue:'60237') Strings ^^^^^^^ diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 96b0aa16940a6..67c09884d03dd 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2277,18 +2277,6 @@ def name(self) -> str: # type: ignore[override] @cache_readonly def numpy_dtype(self) -> np.dtype: """Return an instance of the related numpy dtype""" - if pa.types.is_timestamp(self.pyarrow_dtype): - # pa.timestamp(unit).to_pandas_dtype() returns ns units - # regardless of the pyarrow timestamp units. - # This can be removed if/when pyarrow addresses it: - # https://github.com/apache/arrow/issues/34462 - return np.dtype(f"datetime64[{self.pyarrow_dtype.unit}]") - if pa.types.is_duration(self.pyarrow_dtype): - # pa.duration(unit).to_pandas_dtype() returns ns units - # regardless of the pyarrow duration units - # This can be removed if/when pyarrow addresses it: - # https://github.com/apache/arrow/issues/34462 - return np.dtype(f"timedelta64[{self.pyarrow_dtype.unit}]") if pa.types.is_string(self.pyarrow_dtype) or pa.types.is_large_string( self.pyarrow_dtype ): diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 91f5badeb9728..b3148d129a6a7 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -11,6 +11,7 @@ Series, Timestamp, date_range, + to_datetime, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -236,3 +237,13 @@ def test_convert_dtypes(using_infer_string): df2.iloc[0, 0] = "x" df2.iloc[0, 1] = 10 tm.assert_frame_equal(df, df_orig) + + +def test_convert_dtypes_pyarrow_timezone(): + # GH 60237 + expected = Series( + to_datetime(range(5), utc=True, unit="h"), + dtype="timestamp[ns, tz=UTC][pyarrow]", + ) + result = expected.convert_dtypes(dtype_backend="pyarrow") + tm.assert_series_equal(result, expected) From 29542b290de5defa7521ecb5f51c3c793328414b Mon Sep 17 00:00:00 2001 From: road Date: Tue, 12 Nov 2024 11:47:22 -0500 Subject: [PATCH 2/3] removed extra if statement --- pandas/core/dtypes/dtypes.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/dtypes/dtypes.py b/pandas/core/dtypes/dtypes.py index 67c09884d03dd..58b05286dba01 100644 --- a/pandas/core/dtypes/dtypes.py +++ b/pandas/core/dtypes/dtypes.py @@ -2277,11 +2277,6 @@ def name(self) -> str: # type: ignore[override] @cache_readonly def numpy_dtype(self) -> np.dtype: """Return an instance of the related numpy dtype""" - if pa.types.is_string(self.pyarrow_dtype) or pa.types.is_large_string( - self.pyarrow_dtype - ): - # pa.string().to_pandas_dtype() = object which we don't want - return np.dtype(str) try: return np.dtype(self.pyarrow_dtype.to_pandas_dtype()) except (NotImplementedError, TypeError): From 16e727fdc32c9c02bd228da38869693af683634d Mon Sep 17 00:00:00 2001 From: road Date: Tue, 12 Nov 2024 12:32:31 -0500 Subject: [PATCH 3/3] restructured test to avoid NameError: pa undefined --- pandas/tests/copy_view/test_astype.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index b3148d129a6a7..e055a20ab4ef0 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -11,7 +11,6 @@ Series, Timestamp, date_range, - to_datetime, ) import pandas._testing as tm from pandas.tests.copy_view.util import get_array @@ -242,7 +241,13 @@ def test_convert_dtypes(using_infer_string): def test_convert_dtypes_pyarrow_timezone(): # GH 60237 expected = Series( - to_datetime(range(5), utc=True, unit="h"), + [ + "1970-01-01 00:00:00+00:00", + "1970-01-01 01:00:00+00:00", + "1970-01-01 02:00:00+00:00", + "1970-01-01 03:00:00+00:00", + "1970-01-01 04:00:00+00:00", + ], dtype="timestamp[ns, tz=UTC][pyarrow]", ) result = expected.convert_dtypes(dtype_backend="pyarrow")