Skip to content

Commit 4b0a90c

Browse files
author
nli307
committed
change pyarrow dtype of date32 and date64 to day and ms granularity
1 parent 829f8a0 commit 4b0a90c

File tree

2 files changed

+12
-23
lines changed

2 files changed

+12
-23
lines changed

pandas/core/dtypes/common.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1872,11 +1872,6 @@ def pandas_dtype(dtype) -> DtypeObj:
18721872
result = result()
18731873
return result
18741874

1875-
# try a pyarrow dtype
1876-
from pandas.core.dtypes.dtypes import ArrowDtype
1877-
if isinstance(dtype, ArrowDtype):
1878-
return ArrowDtype(dtype)
1879-
18801875
# try a numpy dtype
18811876
# raise a consistent TypeError if failed
18821877
try:

pandas/core/dtypes/dtypes.py

Lines changed: 12 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2277,12 +2277,18 @@ def name(self) -> str: # type: ignore[override]
22772277
@cache_readonly
22782278
def numpy_dtype(self) -> np.dtype:
22792279
"""Return an instance of the related numpy dtype"""
2280-
if pa.types.is_date32(self.pyarrow_dtype) or pa.types.is_date64(
2281-
self.pyarrow_dtype
2282-
):
2283-
# date32 and date64 are pyarrow timestamps but do not have a
2284-
# corresponding numpy dtype.
2285-
return np.dtype(object)
2280+
if pa.types.is_date32(self.pyarrow_dtype):
2281+
# pa.timestamp(unit).to_pandas_dtype() returns ns units
2282+
# regardless of the pyarrow timestamp units.
2283+
# This can be removed if/when pyarrow addresses it:
2284+
# https://github.com/apache/arrow/issues/34462
2285+
return np.dtype("datetime64[D]")
2286+
if pa.types.is_date64(self.pyarrow_dtype):
2287+
# pa.timestamp(unit).to_pandas_dtype() returns ns units
2288+
# regardless of the pyarrow timestamp units.
2289+
# This can be removed if/when pyarrow addresses it:
2290+
# https://github.com/apache/arrow/issues/34462
2291+
return np.dtype("datetime64[ms]")
22862292
if pa.types.is_timestamp(self.pyarrow_dtype):
22872293
# pa.timestamp(unit).to_pandas_dtype() returns ns units
22882294
# regardless of the pyarrow timestamp units.
@@ -2459,18 +2465,6 @@ def _get_common_dtype(self, dtypes: list[DtypeObj]) -> DtypeObj | None:
24592465

24602466
null_dtype = type(self)(pa.null())
24612467

2462-
# Cover cases where numpy does not have a corresponding dtype, but
2463-
# only one non-null dtype is received, or all dtypes are null.
2464-
single_dtype = {
2465-
dtype
2466-
for dtype in dtypes
2467-
if dtype != null_dtype
2468-
}
2469-
if len(single_dtype) == 0:
2470-
return null_dtype
2471-
if len(single_dtype) == 1:
2472-
return single_dtype.pop()
2473-
24742468
new_dtype = find_common_type(
24752469
[
24762470
dtype.numpy_dtype if isinstance(dtype, ArrowDtype) else dtype

0 commit comments

Comments
 (0)