Skip to content

Commit 93fc6dc

Browse files
authored
Merge branch 'main' into fix-latex-multiindex
2 parents 63bd28c + 1c986d6 commit 93fc6dc

File tree

11 files changed

+93
-27
lines changed

11 files changed

+93
-27
lines changed

ci/code_checks.sh

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
7373
-i "pandas.Period.freq GL08" \
7474
-i "pandas.Period.ordinal GL08" \
7575
-i "pandas.RangeIndex.from_range PR01,SA01" \
76-
-i "pandas.Series.dt.freq GL08" \
7776
-i "pandas.Series.dt.unit GL08" \
7877
-i "pandas.Series.pad PR01,SA01" \
7978
-i "pandas.Timedelta.max PR02" \
@@ -92,15 +91,11 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
9291
-i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
9392
-i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
9493
-i "pandas.core.groupby.DataFrameGroupBy.indices SA01" \
95-
-i "pandas.core.groupby.DataFrameGroupBy.nth PR02" \
9694
-i "pandas.core.groupby.DataFrameGroupBy.nunique SA01" \
9795
-i "pandas.core.groupby.DataFrameGroupBy.plot PR02" \
9896
-i "pandas.core.groupby.DataFrameGroupBy.sem SA01" \
9997
-i "pandas.core.groupby.SeriesGroupBy.get_group RT03,SA01" \
10098
-i "pandas.core.groupby.SeriesGroupBy.indices SA01" \
101-
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing SA01" \
102-
-i "pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing SA01" \
103-
-i "pandas.core.groupby.SeriesGroupBy.nth PR02" \
10499
-i "pandas.core.groupby.SeriesGroupBy.plot PR02" \
105100
-i "pandas.core.groupby.SeriesGroupBy.sem SA01" \
106101
-i "pandas.core.resample.Resampler.get_group RT03,SA01" \
@@ -114,7 +109,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
114109
-i "pandas.core.resample.Resampler.std SA01" \
115110
-i "pandas.core.resample.Resampler.transform PR01,RT03,SA01" \
116111
-i "pandas.core.resample.Resampler.var SA01" \
117-
-i "pandas.errors.AttributeConflictWarning SA01" \
118112
-i "pandas.errors.ChainedAssignmentError SA01" \
119113
-i "pandas.errors.DuplicateLabelError SA01" \
120114
-i "pandas.errors.IntCastingNaNError SA01" \

doc/source/whatsnew/v3.0.0.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ Other enhancements
5454
- :meth:`Series.cummin` and :meth:`Series.cummax` now supports :class:`CategoricalDtype` (:issue:`52335`)
5555
- :meth:`Series.plot` now correctly handle the ``ylabel`` parameter for pie charts, allowing for explicit control over the y-axis label (:issue:`58239`)
5656
- :meth:`DataFrame.plot.scatter` argument ``c`` now accepts a column of strings, where rows with the same string are colored identically (:issue:`16827` and :issue:`16485`)
57+
- :func:`read_parquet` accepts ``to_pandas_kwargs`` which are forwarded to :meth:`pyarrow.Table.to_pandas` which enables passing additional keywords to customize the conversion to pandas, such as ``maps_as_pydicts`` to read the Parquet map data type as python dictionaries (:issue:`56842`)
5758
- :meth:`DataFrameGroupBy.transform`, :meth:`SeriesGroupBy.transform`, :meth:`DataFrameGroupBy.agg`, :meth:`SeriesGroupBy.agg`, :meth:`RollingGroupby.apply`, :meth:`ExpandingGroupby.apply`, :meth:`Rolling.apply`, :meth:`Expanding.apply`, :meth:`DataFrame.apply` with ``engine="numba"`` now supports positional arguments passed as kwargs (:issue:`58995`)
5859
- :meth:`Series.map` can now accept kwargs to pass on to func (:issue:`59814`)
5960
- :meth:`pandas.concat` will raise a ``ValueError`` when ``ignore_index=True`` and ``keys`` is not ``None`` (:issue:`59274`)
@@ -626,6 +627,7 @@ Datetimelike
626627
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
627628
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
628629
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
630+
- Bug in :meth:`to_datetime` wrongly converts when ``arg`` is a ``np.datetime64`` object with unit of ``ps``. (:issue:`60341`)
629631
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
630632

631633
Timedelta

pandas/_libs/src/vendored/numpy/datetime/np_datetime.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -660,11 +660,12 @@ void pandas_datetime_to_datetimestruct(npy_datetime dt, NPY_DATETIMEUNIT base,
660660
perday = 24LL * 60 * 60 * 1000 * 1000 * 1000 * 1000;
661661

662662
set_datetimestruct_days(extract_unit(&dt, perday), out);
663-
out->hour = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60 * 60);
664-
out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 60);
665-
out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000);
666-
out->us = (npy_int32)extract_unit(&dt, 1000LL);
667-
out->ps = (npy_int32)(dt * 1000);
663+
out->hour =
664+
(npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60 * 60);
665+
out->min = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000 * 60);
666+
out->sec = (npy_int32)extract_unit(&dt, 1000LL * 1000 * 1000 * 1000);
667+
out->us = (npy_int32)extract_unit(&dt, 1000LL * 1000);
668+
out->ps = (npy_int32)(dt);
668669
break;
669670

670671
case NPY_FR_fs:

pandas/core/groupby/generic.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1443,6 +1443,11 @@ def is_monotonic_increasing(self) -> Series:
14431443
-------
14441444
Series
14451445
1446+
See Also
1447+
--------
1448+
SeriesGroupBy.is_monotonic_decreasing : Return whether each group's values
1449+
are monotonically decreasing.
1450+
14461451
Examples
14471452
--------
14481453
>>> s = pd.Series([2, 1, 3, 4], index=["Falcon", "Falcon", "Parrot", "Parrot"])
@@ -1462,6 +1467,11 @@ def is_monotonic_decreasing(self) -> Series:
14621467
-------
14631468
Series
14641469
1470+
See Also
1471+
--------
1472+
SeriesGroupBy.is_monotonic_increasing : Return whether each group's values
1473+
are monotonically increasing.
1474+
14651475
Examples
14661476
--------
14671477
>>> s = pd.Series([2, 1, 3, 4], index=["Falcon", "Falcon", "Parrot", "Parrot"])

pandas/core/groupby/groupby.py

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3983,19 +3983,6 @@ def nth(self) -> GroupByNthSelector:
39833983
'all' or 'any'; this is equivalent to calling dropna(how=dropna)
39843984
before the groupby.
39853985
3986-
Parameters
3987-
----------
3988-
n : int, slice or list of ints and slices
3989-
A single nth value for the row or a list of nth values or slices.
3990-
3991-
.. versionchanged:: 1.4.0
3992-
Added slice and lists containing slices.
3993-
Added index notation.
3994-
3995-
dropna : {'any', 'all', None}, default None
3996-
Apply the specified dropna operation before counting which row is
3997-
the nth row. Only supported if n is an int.
3998-
39993986
Returns
40003987
-------
40013988
Series or DataFrame

pandas/core/indexes/accessors.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -373,6 +373,28 @@ def to_pydatetime(self) -> Series:
373373

374374
@property
375375
def freq(self):
376+
"""
377+
Tries to return a string representing a frequency generated by infer_freq.
378+
379+
Returns None if it can't autodetect the frequency.
380+
381+
See Also
382+
--------
383+
Series.dt.to_period : Cast to PeriodArray/PeriodIndex at a particular
384+
frequency.
385+
386+
Examples
387+
--------
388+
>>> ser = pd.Series(["2024-01-01", "2024-01-02", "2024-01-03", "2024-01-04"])
389+
>>> ser = pd.to_datetime(ser)
390+
>>> ser.dt.freq
391+
'D'
392+
393+
>>> ser = pd.Series(["2022-01-01", "2024-01-01", "2026-01-01", "2028-01-01"])
394+
>>> ser = pd.to_datetime(ser)
395+
>>> ser.dt.freq
396+
'2YS-JAN'
397+
"""
376398
return self._get_values().inferred_freq
377399

378400
def isocalendar(self) -> DataFrame:

pandas/errors/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,12 @@ class AttributeConflictWarning(Warning):
672672
name than the existing index on an HDFStore or attempting to append an index with a
673673
different frequency than the existing index on an HDFStore.
674674
675+
See Also
676+
--------
677+
HDFStore : Dict-like IO interface for storing pandas objects in PyTables.
678+
DataFrame.to_hdf : Write the contained data to an HDF5 file using HDFStore.
679+
read_hdf : Read from an HDF5 file into a DataFrame.
680+
675681
Examples
676682
--------
677683
>>> idx1 = pd.Index(["a", "b"], name="name1")

pandas/io/_util.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,12 @@ def arrow_table_to_pandas(
6060
table: pyarrow.Table,
6161
dtype_backend: DtypeBackend | Literal["numpy"] | lib.NoDefault = lib.no_default,
6262
null_to_int64: bool = False,
63+
to_pandas_kwargs: dict | None = None,
6364
) -> pd.DataFrame:
6465
pa = import_optional_dependency("pyarrow")
6566

67+
to_pandas_kwargs = {} if to_pandas_kwargs is None else to_pandas_kwargs
68+
6669
types_mapper: type[pd.ArrowDtype] | None | Callable
6770
if dtype_backend == "numpy_nullable":
6871
mapping = _arrow_dtype_mapping()
@@ -80,5 +83,5 @@ def arrow_table_to_pandas(
8083
else:
8184
raise NotImplementedError
8285

83-
df = table.to_pandas(types_mapper=types_mapper)
86+
df = table.to_pandas(types_mapper=types_mapper, **to_pandas_kwargs)
8487
return df

pandas/io/parquet.py

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,6 +242,7 @@ def read(
242242
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
243243
storage_options: StorageOptions | None = None,
244244
filesystem=None,
245+
to_pandas_kwargs: dict[str, Any] | None = None,
245246
**kwargs,
246247
) -> DataFrame:
247248
kwargs["use_pandas_metadata"] = True
@@ -266,7 +267,11 @@ def read(
266267
"make_block is deprecated",
267268
DeprecationWarning,
268269
)
269-
result = arrow_table_to_pandas(pa_table, dtype_backend=dtype_backend)
270+
result = arrow_table_to_pandas(
271+
pa_table,
272+
dtype_backend=dtype_backend,
273+
to_pandas_kwargs=to_pandas_kwargs,
274+
)
270275

271276
if pa_table.schema.metadata:
272277
if b"PANDAS_ATTRS" in pa_table.schema.metadata:
@@ -347,6 +352,7 @@ def read(
347352
filters=None,
348353
storage_options: StorageOptions | None = None,
349354
filesystem=None,
355+
to_pandas_kwargs: dict | None = None,
350356
**kwargs,
351357
) -> DataFrame:
352358
parquet_kwargs: dict[str, Any] = {}
@@ -362,6 +368,10 @@ def read(
362368
raise NotImplementedError(
363369
"filesystem is not implemented for the fastparquet engine."
364370
)
371+
if to_pandas_kwargs is not None:
372+
raise NotImplementedError(
373+
"to_pandas_kwargs is not implemented for the fastparquet engine."
374+
)
365375
path = stringify_path(path)
366376
handles = None
367377
if is_fsspec_url(path):
@@ -452,7 +462,7 @@ def to_parquet(
452462
.. versionadded:: 2.1.0
453463
454464
kwargs
455-
Additional keyword arguments passed to the engine
465+
Additional keyword arguments passed to the engine.
456466
457467
Returns
458468
-------
@@ -491,6 +501,7 @@ def read_parquet(
491501
dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
492502
filesystem: Any = None,
493503
filters: list[tuple] | list[list[tuple]] | None = None,
504+
to_pandas_kwargs: dict | None = None,
494505
**kwargs,
495506
) -> DataFrame:
496507
"""
@@ -564,6 +575,12 @@ def read_parquet(
564575
565576
.. versionadded:: 2.1.0
566577
578+
to_pandas_kwargs : dict | None, default None
579+
Keyword arguments to pass through to :func:`pyarrow.Table.to_pandas`
580+
when ``engine="pyarrow"``.
581+
582+
.. versionadded:: 3.0.0
583+
567584
**kwargs
568585
Any additional kwargs are passed to the engine.
569586
@@ -636,5 +653,6 @@ def read_parquet(
636653
storage_options=storage_options,
637654
dtype_backend=dtype_backend,
638655
filesystem=filesystem,
656+
to_pandas_kwargs=to_pandas_kwargs,
639657
**kwargs,
640658
)

pandas/tests/io/test_parquet.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1172,6 +1172,20 @@ def test_non_nanosecond_timestamps(self, temp_file):
11721172
)
11731173
tm.assert_frame_equal(result, expected)
11741174

1175+
def test_maps_as_pydicts(self, pa):
1176+
pyarrow = pytest.importorskip("pyarrow", "13.0.0")
1177+
1178+
schema = pyarrow.schema(
1179+
[("foo", pyarrow.map_(pyarrow.string(), pyarrow.int64()))]
1180+
)
1181+
df = pd.DataFrame([{"foo": {"A": 1}}, {"foo": {"B": 2}}])
1182+
check_round_trip(
1183+
df,
1184+
pa,
1185+
write_kwargs={"schema": schema},
1186+
read_kwargs={"to_pandas_kwargs": {"maps_as_pydicts": "strict"}},
1187+
)
1188+
11751189

11761190
class TestParquetFastParquet(Base):
11771191
def test_basic(self, fp, df_full, request):

0 commit comments

Comments
 (0)