From 85d39a2e9cff29bc9f8aa3058c896ad1415dcdd1 Mon Sep 17 00:00:00 2001 From: Pooyan Razian Date: Sat, 16 Aug 2025 09:52:17 +0400 Subject: [PATCH 1/6] Add test --- pandas/tests/io/formats/test_to_csv.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 52f521d0d36eb..0a6f91bbd85c4 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -2,6 +2,7 @@ import os import sys from zipfile import ZipFile +from datetime import datetime, timezone from _csv import Error import numpy as np @@ -713,6 +714,30 @@ def test_to_csv_encoding_binary_handle(self, mode): assert handle.read().startswith(b'\xef\xbb\xbf""') + """ + tz-aware timestamps with/without microseconds should be written consistently + Checks if the .ffffff format is consistent, even when microseconds==0 + + GH 62111 + """ + def test_to_csv_tz_aware_consistent_microseconds_formatting_python_datetime(self): + df = DataFrame({"timestamp": [ + datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=timezone.utc), + datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=timezone.utc), + ]}) + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, index=False, lineterminator="\n") + with open(path, encoding="utf-8") as f: + contents = f.read() + + # + expected = ( + "timestamp\n" + "2025-08-14 12:34:56.000000+00:00\n" + "2025-08-14 12:34:56.000001+00:00\n" + ) + assert contents == expected + def test_to_csv_iterative_compression_name(compression): # GH 38714 df = DataFrame( From 7b0baa9f292bc1be46a630060a76053383bbdf87 Mon Sep 17 00:00:00 2001 From: Pooyan Razian Date: Mon, 18 Aug 2025 11:47:32 +0400 Subject: [PATCH 2/6] Fixes inconsistent format --- pandas/io/formats/csvs.py | 57 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 1b9eb6303fe74..3934217fa642c 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -20,6 +20,11 @@ import numpy as np +from datetime import datetime as _pydatetime + +from pandas.core.dtypes.dtypes import DatetimeTZDtype +from pandas.core.dtypes.common import is_object_dtype + from pandas._libs import writers as libwriters from pandas._typing import SequenceNotStr from pandas.util._decorators import cache_readonly @@ -312,11 +317,63 @@ def _save_body(self) -> None: break self._save_chunk(start_i, end_i) + + # tz-aware CSV formatting helper + @staticmethod + def _csv_format_datetime_tz_ea(ser, na_rep: str): + """ + Consistent tz-aware formatting for ExtensionArray datetimes: + 'YYYY-MM-DD HH:MM:SS.ffffff+HH:MM' + """ + # +HHMM → +HH:MM + s = ser.dt.strftime("%Y-%m-%d %H:%M:%S.%f%z") + s = s.str.replace(r"([+-]\d{2})(\d{2})$", r"\1:\2", regex=True) + return s.fillna(na_rep) + + # tz-aware CSV formatting helper + @staticmethod + def _csv_format_py_tz_aware_obj(ser, na_rep: str): + """ + For object-dtype Series containing stdlib tz-aware datetimes, render + with microseconds and colonized offset. Leave other objects untouched. + """ + if ser.empty: + return ser.astype(str) + + vals = ser.to_numpy(object, copy=False) + + def _is_tzaware_dt(x: object) -> bool: + return ( + isinstance(x, _pydatetime) + and getattr(x, "tzinfo", None) is not None + and x.tzinfo.utcoffset(x) is not None + ) + + mask = np.fromiter((_is_tzaware_dt(x) for x in vals), dtype=bool, count=len(vals)) + if mask.any(): + out = vals.copy() + # isoformat gives 'YYYY-MM-DD HH:MM:SS.ffffff+HH:MM' + out[mask] = [x.isoformat(sep=" ", timespec="microseconds") for x in out[mask]] + ser = ser._constructor(out, index=ser.index, name=ser.name) + + return ser.fillna(na_rep) + + def _save_chunk(self, start_i: int, end_i: int) -> None: # create the data for a chunk slicer = slice(start_i, end_i) df = self.obj.iloc[slicer] + # If user didn't set date_format, normalize tz-aware datetimes to a + # single canonical string form for CSV (GH 62111). + if self.date_format is None: + for col in df.columns: + col_dtype = df.dtypes[col] + if isinstance(col_dtype, DatetimeTZDtype): + df[col] = self._csv_format_datetime_tz_ea(df[col], self.na_rep) + elif is_object_dtype(col_dtype): + df[col] = self._csv_format_py_tz_aware_obj(df[col], self.na_rep) + res = df._get_values_for_csv(**self._number_format) data = list(res._iter_column_arrays()) From 102339db33c82c7fa5d612d8afca8b931b07ccbc Mon Sep 17 00:00:00 2001 From: Pooyan Razian Date: Mon, 18 Aug 2025 11:53:11 +0400 Subject: [PATCH 3/6] TST: add coverage for tz-aware CSV formatting consistency (GH#62111) --- pandas/tests/io/formats/test_to_csv.py | 59 +++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index 0a6f91bbd85c4..dc58e324c04e7 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -2,7 +2,7 @@ import os import sys from zipfile import ZipFile -from datetime import datetime, timezone +from datetime import datetime, timezone, timedelta from _csv import Error import numpy as np @@ -738,6 +738,63 @@ def test_to_csv_tz_aware_consistent_microseconds_formatting_python_datetime(self ) assert contents == expected + + def test_to_csv_tz_aware_consistent_microseconds_formatting_timestamp(self): + df = DataFrame({"timestamp": [ + pd.Timestamp("2025-08-14 12:34:56+00:00"), + pd.Timestamp("2025-08-14 12:34:56.000001+00:00"), + ]}) + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, index=False, lineterminator="\n") + with open(path, encoding="utf-8") as f: + contents = f.read() + + expected = ( + "timestamp\n" + "2025-08-14 12:34:56.000000+00:00\n" + "2025-08-14 12:34:56.000001+00:00\n" + ) + assert contents == expected + + + def test_to_csv_tz_aware_respects_date_format_python_datetime(self): + # No microseconds in date_format; %z produces +0000 (no colon) by design. + df = DataFrame({"timestamp": [ + datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=timezone.utc), + datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=timezone.utc), + ]}) + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, index=False, lineterminator="\n", date_format="%Y-%m-%d %H:%M:%S%z") + with open(path, encoding="utf-8") as f: + contents = f.read() + + expected = ( + "timestamp\n" + "2025-08-14 12:34:56+0000\n" + "2025-08-14 12:34:56+0000\n" + ) + assert contents == expected + + + def test_to_csv_tz_aware_consistent_microseconds_non_utc_offset_python_datetime(self): + ist = timezone(timedelta(hours=5, minutes=30)) # +05:30 + df = DataFrame({"timestamp": [ + datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=ist), + datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=ist), + ]}) + with tm.ensure_clean("test.csv") as path: + df.to_csv(path, index=False, lineterminator="\n") + with open(path, encoding="utf-8") as f: + contents = f.read() + + expected = ( + "timestamp\n" + "2025-08-14 12:34:56.000000+05:30\n" + "2025-08-14 12:34:56.000001+05:30\n" + ) + assert contents == expected + + def test_to_csv_iterative_compression_name(compression): # GH 38714 df = DataFrame( From d20aefaa12243d9616142fb2897609592cab65e1 Mon Sep 17 00:00:00 2001 From: Pooyan Razian Date: Mon, 18 Aug 2025 12:18:40 +0400 Subject: [PATCH 4/6] Fix formatting errors --- pandas/io/formats/csvs.py | 24 ++++---- pandas/tests/io/formats/test_to_csv.py | 83 ++++++++++++++++---------- 2 files changed, 63 insertions(+), 44 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index 3934217fa642c..cc292a92c9cc7 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -11,6 +11,7 @@ Sequence, ) import csv as csvlib +from datetime import datetime as _pydatetime import os from typing import ( TYPE_CHECKING, @@ -20,15 +21,12 @@ import numpy as np -from datetime import datetime as _pydatetime - -from pandas.core.dtypes.dtypes import DatetimeTZDtype -from pandas.core.dtypes.common import is_object_dtype - from pandas._libs import writers as libwriters from pandas._typing import SequenceNotStr from pandas.util._decorators import cache_readonly +from pandas.core.dtypes.common import is_object_dtype +from pandas.core.dtypes.dtypes import DatetimeTZDtype from pandas.core.dtypes.generic import ( ABCDatetimeIndex, ABCIndex, @@ -317,7 +315,6 @@ def _save_body(self) -> None: break self._save_chunk(start_i, end_i) - # tz-aware CSV formatting helper @staticmethod def _csv_format_datetime_tz_ea(ser, na_rep: str): @@ -344,21 +341,24 @@ def _csv_format_py_tz_aware_obj(ser, na_rep: str): def _is_tzaware_dt(x: object) -> bool: return ( - isinstance(x, _pydatetime) - and getattr(x, "tzinfo", None) is not None - and x.tzinfo.utcoffset(x) is not None + isinstance(x, _pydatetime) + and getattr(x, "tzinfo", None) is not None + and x.tzinfo.utcoffset(x) is not None ) - mask = np.fromiter((_is_tzaware_dt(x) for x in vals), dtype=bool, count=len(vals)) + mask = np.fromiter( + (_is_tzaware_dt(x) for x in vals), dtype=bool, count=len(vals) + ) if mask.any(): out = vals.copy() # isoformat gives 'YYYY-MM-DD HH:MM:SS.ffffff+HH:MM' - out[mask] = [x.isoformat(sep=" ", timespec="microseconds") for x in out[mask]] + out[mask] = [ + x.isoformat(sep=" ", timespec="microseconds") for x in out[mask] + ] ser = ser._constructor(out, index=ser.index, name=ser.name) return ser.fillna(na_rep) - def _save_chunk(self, start_i: int, end_i: int) -> None: # create the data for a chunk slicer = slice(start_i, end_i) diff --git a/pandas/tests/io/formats/test_to_csv.py b/pandas/tests/io/formats/test_to_csv.py index dc58e324c04e7..e2c8f55188154 100644 --- a/pandas/tests/io/formats/test_to_csv.py +++ b/pandas/tests/io/formats/test_to_csv.py @@ -1,8 +1,12 @@ +from datetime import ( + datetime, + timedelta, + timezone, +) import io import os import sys from zipfile import ZipFile -from datetime import datetime, timezone, timedelta from _csv import Error import numpy as np @@ -713,24 +717,27 @@ def test_to_csv_encoding_binary_handle(self, mode): handle.seek(0) assert handle.read().startswith(b'\xef\xbb\xbf""') - """ tz-aware timestamps with/without microseconds should be written consistently Checks if the .ffffff format is consistent, even when microseconds==0 GH 62111 """ + def test_to_csv_tz_aware_consistent_microseconds_formatting_python_datetime(self): - df = DataFrame({"timestamp": [ - datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=timezone.utc), - datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=timezone.utc), - ]}) + df = DataFrame( + { + "timestamp": [ + datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=timezone.utc), + datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=timezone.utc), + ] + } + ) with tm.ensure_clean("test.csv") as path: df.to_csv(path, index=False, lineterminator="\n") with open(path, encoding="utf-8") as f: contents = f.read() - # expected = ( "timestamp\n" "2025-08-14 12:34:56.000000+00:00\n" @@ -738,12 +745,15 @@ def test_to_csv_tz_aware_consistent_microseconds_formatting_python_datetime(self ) assert contents == expected - def test_to_csv_tz_aware_consistent_microseconds_formatting_timestamp(self): - df = DataFrame({"timestamp": [ - pd.Timestamp("2025-08-14 12:34:56+00:00"), - pd.Timestamp("2025-08-14 12:34:56.000001+00:00"), - ]}) + df = DataFrame( + { + "timestamp": [ + pd.Timestamp("2025-08-14 12:34:56+00:00"), + pd.Timestamp("2025-08-14 12:34:56.000001+00:00"), + ] + } + ) with tm.ensure_clean("test.csv") as path: df.to_csv(path, index=False, lineterminator="\n") with open(path, encoding="utf-8") as f: @@ -756,32 +766,41 @@ def test_to_csv_tz_aware_consistent_microseconds_formatting_timestamp(self): ) assert contents == expected - def test_to_csv_tz_aware_respects_date_format_python_datetime(self): # No microseconds in date_format; %z produces +0000 (no colon) by design. - df = DataFrame({"timestamp": [ - datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=timezone.utc), - datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=timezone.utc), - ]}) + df = DataFrame( + { + "timestamp": [ + datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=timezone.utc), + datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=timezone.utc), + ] + } + ) with tm.ensure_clean("test.csv") as path: - df.to_csv(path, index=False, lineterminator="\n", date_format="%Y-%m-%d %H:%M:%S%z") + df.to_csv( + path, + index=False, + lineterminator="\n", + date_format="%Y-%m-%d %H:%M:%S%z", + ) with open(path, encoding="utf-8") as f: contents = f.read() - expected = ( - "timestamp\n" - "2025-08-14 12:34:56+0000\n" - "2025-08-14 12:34:56+0000\n" - ) + expected = "timestamp\n2025-08-14 12:34:56+0000\n2025-08-14 12:34:56+0000\n" assert contents == expected - - def test_to_csv_tz_aware_consistent_microseconds_non_utc_offset_python_datetime(self): - ist = timezone(timedelta(hours=5, minutes=30)) # +05:30 - df = DataFrame({"timestamp": [ - datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=ist), - datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=ist), - ]}) + def test_to_csv_tz_aware_consistent_microseconds_non_utc_offset_python_datetime( + self, + ): + am_tz = timezone(timedelta(hours=4)) # +04:00 (Armenia / Asia/Yerevan) + df = DataFrame( + { + "timestamp": [ + datetime(2025, 8, 14, 12, 34, 56, 0, tzinfo=am_tz), + datetime(2025, 8, 14, 12, 34, 56, 1, tzinfo=am_tz), + ] + } + ) with tm.ensure_clean("test.csv") as path: df.to_csv(path, index=False, lineterminator="\n") with open(path, encoding="utf-8") as f: @@ -789,8 +808,8 @@ def test_to_csv_tz_aware_consistent_microseconds_non_utc_offset_python_datetime( expected = ( "timestamp\n" - "2025-08-14 12:34:56.000000+05:30\n" - "2025-08-14 12:34:56.000001+05:30\n" + "2025-08-14 12:34:56.000000+04:00\n" + "2025-08-14 12:34:56.000001+04:00\n" ) assert contents == expected From 794ed5a14aadba3f540cd252d227785a1b9bc0b9 Mon Sep 17 00:00:00 2001 From: Pooyan Razian Date: Mon, 18 Aug 2025 17:42:41 +0400 Subject: [PATCH 5/6] Add missing types --- pandas/io/formats/csvs.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py index cc292a92c9cc7..76180878d0c51 100644 --- a/pandas/io/formats/csvs.py +++ b/pandas/io/formats/csvs.py @@ -50,6 +50,8 @@ npt, ) + from pandas.core.series import Series + from pandas.io.formats.format import DataFrameFormatter @@ -317,7 +319,7 @@ def _save_body(self) -> None: # tz-aware CSV formatting helper @staticmethod - def _csv_format_datetime_tz_ea(ser, na_rep: str): + def _csv_format_datetime_tz_ea(ser: Series, na_rep: str) -> Series: """ Consistent tz-aware formatting for ExtensionArray datetimes: 'YYYY-MM-DD HH:MM:SS.ffffff+HH:MM' @@ -329,7 +331,7 @@ def _csv_format_datetime_tz_ea(ser, na_rep: str): # tz-aware CSV formatting helper @staticmethod - def _csv_format_py_tz_aware_obj(ser, na_rep: str): + def _csv_format_py_tz_aware_obj(ser: Series, na_rep: str) -> Series: """ For object-dtype Series containing stdlib tz-aware datetimes, render with microseconds and colonized offset. Leave other objects untouched. @@ -340,11 +342,10 @@ def _csv_format_py_tz_aware_obj(ser, na_rep: str): vals = ser.to_numpy(object, copy=False) def _is_tzaware_dt(x: object) -> bool: - return ( - isinstance(x, _pydatetime) - and getattr(x, "tzinfo", None) is not None - and x.tzinfo.utcoffset(x) is not None - ) + if not isinstance(x, _pydatetime): + return False + tz = getattr(x, "tzinfo", None) + return tz is not None and tz.utcoffset(x) is not None mask = np.fromiter( (_is_tzaware_dt(x) for x in vals), dtype=bool, count=len(vals) From a5fa547f8e3a9ddd615fb9ed7d5dcd9d71e7d9c4 Mon Sep 17 00:00:00 2001 From: Pooyan Razian Date: Mon, 18 Aug 2025 17:56:03 +0400 Subject: [PATCH 6/6] Add an entry in the latest whatsnew rst doc file --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index b94d82f3c9783..1e392c9bbcf8c 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -953,6 +953,7 @@ I/O - Bug in :meth:`DataFrame._repr_html_` which ignored the ``"display.float_format"`` option (:issue:`59876`) - Bug in :meth:`DataFrame.from_records` ignoring ``columns`` and ``index`` parameters when ``data`` is an empty iterator and ``nrows=0``. (:issue:`61140`) - Bug in :meth:`DataFrame.from_records` where ``columns`` parameter with numpy structured array was not reordering and filtering out the columns (:issue:`59717`) +- Bug in :meth:`DataFrame.to_csv` / :meth:`Series.to_csv` formatting tz-aware datetimes (microseconds dropped, offset lacked colon); now consistently emits ``YYYY-MM-DD HH:MM:SS.ffffff+HH:MM`` when ``date_format=None``. (:issue:`62111`) - Bug in :meth:`DataFrame.to_dict` raises unnecessary ``UserWarning`` when columns are not unique and ``orient='tight'``. (:issue:`58281`) - Bug in :meth:`DataFrame.to_excel` when writing empty :class:`DataFrame` with :class:`MultiIndex` on both axes (:issue:`57696`) - Bug in :meth:`DataFrame.to_excel` where the :class:`MultiIndex` index with a period level was not a date (:issue:`60099`)