Skip to content

Commit c21ec52

Browse files
committed
ENH: resolution inference for array_to_timedelta64
1 parent 00a7c41 commit c21ec52

File tree

18 files changed

+166
-131
lines changed

18 files changed

+166
-131
lines changed

pandas/_libs/lib.pyx

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,6 @@ from pandas._libs.tslibs.nattype cimport (
106106
)
107107
from pandas._libs.tslibs.offsets cimport is_offset_object
108108
from pandas._libs.tslibs.period cimport is_period_object
109-
from pandas._libs.tslibs.timedeltas cimport convert_to_timedelta64
110109
from pandas._libs.tslibs.timezones cimport tz_compare
111110

112111
# constants that will be compared to potentially arbitrarily large
@@ -2670,11 +2669,6 @@ def maybe_convert_objects(ndarray[object] objects,
26702669
elif is_timedelta(val):
26712670
if convert_non_numeric:
26722671
seen.timedelta_ = True
2673-
try:
2674-
convert_to_timedelta64(val, "ns")
2675-
except OutOfBoundsTimedelta:
2676-
seen.object_ = True
2677-
break
26782672
break
26792673
else:
26802674
seen.object_ = True

pandas/_libs/tslibs/conversion.pxd

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,6 @@ cdef int64_t get_datetime64_nanos(object val, NPY_DATETIMEUNIT reso) except? -1
4545

4646
cpdef datetime localize_pydatetime(datetime dt, tzinfo tz)
4747
cdef int64_t cast_from_unit(object ts, str unit, NPY_DATETIMEUNIT out_reso=*) except? -1
48-
cdef (int64_t, int) precision_from_unit(
49-
NPY_DATETIMEUNIT in_reso, NPY_DATETIMEUNIT out_reso=*
50-
)
5148

5249
cdef maybe_localize_tso(_TSObject obj, tzinfo tz, NPY_DATETIMEUNIT reso)
5350

pandas/_libs/tslibs/timedeltas.pxd

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ cpdef int64_t get_unit_for_round(freq, NPY_DATETIMEUNIT creso) except? -1
99
cpdef int64_t delta_to_nanoseconds(
1010
delta, NPY_DATETIMEUNIT reso=*, bint round_ok=*
1111
) except? -1
12-
cdef convert_to_timedelta64(object ts, str unit)
1312
cdef bint is_any_td_scalar(object obj)
1413

1514

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 114 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ from pandas._libs.missing cimport checknull_with_nat_and_na
4141
from pandas._libs.tslibs.base cimport ABCTimestamp
4242
from pandas._libs.tslibs.conversion cimport (
4343
cast_from_unit,
44-
precision_from_unit,
4544
)
4645
from pandas._libs.tslibs.dtypes cimport (
4746
c_DEPR_UNITS,
@@ -289,68 +288,6 @@ cpdef int64_t delta_to_nanoseconds(
289288
) from err
290289

291290

292-
@cython.overflowcheck(True)
293-
cdef object ensure_td64ns(object ts):
294-
"""
295-
Overflow-safe implementation of td64.astype("m8[ns]")
296-
297-
Parameters
298-
----------
299-
ts : np.timedelta64
300-
301-
Returns
302-
-------
303-
np.timedelta64[ns]
304-
"""
305-
cdef:
306-
NPY_DATETIMEUNIT td64_unit
307-
int64_t td64_value, mult
308-
309-
td64_unit = get_datetime64_unit(ts)
310-
if (
311-
td64_unit != NPY_DATETIMEUNIT.NPY_FR_ns
312-
and td64_unit != NPY_DATETIMEUNIT.NPY_FR_GENERIC
313-
):
314-
315-
td64_value = cnp.get_timedelta64_value(ts)
316-
317-
mult = precision_from_unit(td64_unit)[0]
318-
try:
319-
# NB: cython#1381 this cannot be *=
320-
td64_value = td64_value * mult
321-
except OverflowError as err:
322-
raise OutOfBoundsTimedelta(ts) from err
323-
324-
return np.timedelta64(td64_value, "ns")
325-
326-
return ts
327-
328-
329-
cdef convert_to_timedelta64(object ts, str unit):
330-
"""
331-
Convert an incoming object to a timedelta64 if possible.
332-
Before calling, unit must be standardized to avoid repeated unit conversion
333-
334-
Handle these types of objects:
335-
- timedelta/Timedelta
336-
337-
Return an timedelta64[ns] object
338-
"""
339-
# Caller is responsible for checking unit not in ["Y", "y", "M"]
340-
if isinstance(ts, _Timedelta):
341-
# already in the proper format
342-
if ts._creso != NPY_FR_ns:
343-
ts = ts.as_unit("ns").asm8
344-
else:
345-
ts = np.timedelta64(ts._value, "ns")
346-
347-
elif PyDelta_Check(ts):
348-
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
349-
elif not cnp.is_timedelta64_object(ts):
350-
raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}")
351-
return ts.astype("timedelta64[ns]")
352-
353-
354291
cdef _numeric_to_td64ns(object item, str unit):
355292
# caller is responsible for checking
356293
# assert unit not in ["Y", "y", "M"]
@@ -369,10 +306,34 @@ cdef _numeric_to_td64ns(object item, str unit):
369306
return ts
370307

371308

309+
# TODO: de-duplicate with DatetimeParseState
310+
cdef class ResoState:
311+
cdef:
312+
NPY_DATETIMEUNIT creso
313+
bint creso_ever_changed
314+
315+
def __cinit__(self, NPY_DATETIMEUNIT creso):
316+
self.creso = creso
317+
self.creso_ever_changed = False
318+
319+
cdef bint update_creso(self, NPY_DATETIMEUNIT item_reso) noexcept:
320+
# Return a bool indicating whether we bumped to a higher resolution
321+
if self.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
322+
self.creso = item_reso
323+
elif item_reso > self.creso:
324+
self.creso = item_reso
325+
self.creso_ever_changed = True
326+
return True
327+
return False
328+
329+
372330
@cython.boundscheck(False)
373331
@cython.wraparound(False)
374332
def array_to_timedelta64(
375-
ndarray values, str unit=None, str errors="raise"
333+
ndarray values,
334+
str unit=None,
335+
str errors="raise",
336+
NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_GENERIC,
376337
) -> ndarray:
377338
# values is object-dtype, may be 2D
378339
"""
@@ -394,6 +355,10 @@ def array_to_timedelta64(
394355
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
395356
cnp.flatiter it
396357
str parsed_unit = parse_timedelta_unit(unit or "ns")
358+
NPY_DATETIMEUNIT item_reso
359+
ResoState state = ResoState(creso)
360+
bint infer_reso = creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC
361+
ndarray iresult = result.view("i8")
397362

398363
if values.descr.type_num != cnp.NPY_OBJECT:
399364
# raise here otherwise we segfault below
@@ -421,18 +386,58 @@ def array_to_timedelta64(
421386
ival = NPY_NAT
422387

423388
elif cnp.is_timedelta64_object(item):
424-
td64ns_obj = ensure_td64ns(item)
425-
ival = cnp.get_timedelta64_value(td64ns_obj)
389+
# TODO: de-duplicate this with Timedelta.__new__
390+
ival = cnp.get_timedelta64_value(item)
391+
dt64_reso = get_datetime64_unit(item)
392+
if not (
393+
is_supported_unit(dt64_reso) or
394+
dt64_reso in [
395+
NPY_DATETIMEUNIT.NPY_FR_m,
396+
NPY_DATETIMEUNIT.NPY_FR_h,
397+
NPY_DATETIMEUNIT.NPY_FR_D,
398+
NPY_DATETIMEUNIT.NPY_FR_W,
399+
NPY_DATETIMEUNIT.NPY_FR_GENERIC
400+
]
401+
):
402+
err = npy_unit_to_abbrev(dt64_reso)
403+
raise ValueError(
404+
f"Unit {err} is not supported. "
405+
"Only unambiguous timedelta values durations are supported. "
406+
"Allowed units are 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns'")
407+
408+
item_reso = get_supported_reso(dt64_reso)
409+
state.update_creso(item_reso)
410+
if infer_reso:
411+
creso = state.creso
412+
if dt64_reso != NPY_DATETIMEUNIT.NPY_FR_GENERIC:
413+
try:
414+
ival = convert_reso(
415+
ival,
416+
dt64_reso,
417+
creso,
418+
round_ok=True,
419+
)
420+
except (OverflowError, OutOfBoundsDatetime) as err:
421+
raise OutOfBoundsTimedelta(item) from err
422+
else:
423+
# e.g. NaT
424+
pass
426425

427426
elif isinstance(item, _Timedelta):
428-
if item._creso != NPY_FR_ns:
429-
ival = item.as_unit("ns")._value
430-
else:
431-
ival = item._value
427+
item_reso = item._creso
428+
state.update_creso(item_reso)
429+
if infer_reso:
430+
creso = state.creso
431+
432+
ival = (<_Timedelta>item)._as_creso(creso)._value
432433

433434
elif PyDelta_Check(item):
434435
# i.e. isinstance(item, timedelta)
435-
ival = delta_to_nanoseconds(item)
436+
item_reso = NPY_DATETIMEUNIT.NPY_FR_us
437+
state.update_creso(item_reso)
438+
if infer_reso:
439+
creso = state.creso
440+
ival = delta_to_nanoseconds(item, reso=creso)
436441

437442
elif isinstance(item, str):
438443
if (
@@ -443,13 +448,27 @@ def array_to_timedelta64(
443448
else:
444449
ival = parse_timedelta_string(item)
445450

451+
item_reso = NPY_FR_ns
452+
state.update_creso(item_reso)
453+
if infer_reso:
454+
creso = state.creso
455+
446456
elif is_tick_object(item):
447-
ival = item.nanos
457+
item_reso = get_supported_reso(item._creso)
458+
state.update_creso(item_reso)
459+
if infer_reso:
460+
creso = state.creso
461+
ival = delta_to_nanoseconds(item, reso=creso)
448462

449463
elif is_integer_object(item) or is_float_object(item):
450464
td64ns_obj = _numeric_to_td64ns(item, parsed_unit)
451465
ival = cnp.get_timedelta64_value(td64ns_obj)
452466

467+
item_reso = NPY_FR_ns
468+
state.update_creso(item_reso)
469+
if infer_reso:
470+
creso = state.creso
471+
453472
else:
454473
raise TypeError(f"Invalid type for timedelta scalar: {type(item)}")
455474

@@ -467,7 +486,29 @@ def array_to_timedelta64(
467486

468487
cnp.PyArray_MultiIter_NEXT(mi)
469488

470-
return result
489+
if infer_reso:
490+
if state.creso_ever_changed:
491+
# We encountered mismatched resolutions, need to re-parse with
492+
# the correct one.
493+
return array_to_timedelta64(
494+
values,
495+
unit=unit,
496+
errors=errors,
497+
creso=state.creso,
498+
)
499+
elif state.creso == NPY_DATETIMEUNIT.NPY_FR_GENERIC:
500+
# i.e. we never encountered anything non-NaT, default to "s". This
501+
# ensures that insert and concat-like operations with NaT
502+
# do not upcast units
503+
result = iresult.view("m8[s]")
504+
else:
505+
# Otherwise we can use the single reso that we encountered and avoid
506+
# a second pass.
507+
abbrev = npy_unit_to_abbrev(state.creso)
508+
result = iresult.view(f"m8[{abbrev}]")
509+
510+
abbrev = npy_unit_to_abbrev(creso)
511+
return result.view(f"m8[{abbrev}]")
471512

472513

473514
@cython.cpow(True)

pandas/core/arrays/timedeltas.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1251,7 +1251,7 @@ def _objects_to_td64ns(
12511251
values = np.asarray(data, dtype=np.object_)
12521252

12531253
result = array_to_timedelta64(values, unit=unit, errors=errors)
1254-
return result.view("timedelta64[ns]")
1254+
return result
12551255

12561256

12571257
def _validate_td64_dtype(dtype) -> DtypeObj:

pandas/tests/arithmetic/test_timedelta64.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -717,7 +717,7 @@ def test_tdi_add_overflow(self):
717717
)
718718

719719
# These should not overflow!
720-
exp = TimedeltaIndex([NaT])
720+
exp = TimedeltaIndex([NaT], dtype="m8[ns]")
721721
result = pd.to_timedelta([NaT]) - Timedelta("1 days")
722722
tm.assert_index_equal(result, exp)
723723

@@ -2216,7 +2216,7 @@ def test_float_series_rdiv_td64arr(self, box_with_array, names):
22162216

22172217
def test_td64arr_all_nat_div_object_dtype_numeric(self, box_with_array):
22182218
# GH#39750 make sure we infer the result as td64
2219-
tdi = TimedeltaIndex([NaT, NaT])
2219+
tdi = TimedeltaIndex([NaT, NaT], dtype="m8[ns]")
22202220

22212221
left = tm.box_expected(tdi, box_with_array)
22222222
right = np.array([2, 2.0], dtype=object)

pandas/tests/dtypes/test_inference.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,7 @@ def test_maybe_convert_objects_datetime(self):
808808
tm.assert_numpy_array_equal(out, exp)
809809

810810
arr = np.array([pd.NaT, np.timedelta64(1, "s")], dtype=object)
811-
exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[ns]")
811+
exp = np.array([np.timedelta64("NaT"), np.timedelta64(1, "s")], dtype="m8[s]")
812812
out = lib.maybe_convert_objects(arr, convert_non_numeric=True)
813813
tm.assert_numpy_array_equal(out, exp)
814814

@@ -863,7 +863,7 @@ def test_maybe_convert_objects_datetime_overflow_safe(self, dtype):
863863
if dtype == "datetime64[ns]":
864864
expected = np.array(["2363-10-04"], dtype="M8[us]")
865865
else:
866-
expected = arr
866+
expected = arr.astype("m8[us]")
867867
tm.assert_numpy_array_equal(out, expected)
868868

869869
def test_maybe_convert_objects_mixed_datetimes(self):

pandas/tests/extension/test_arrow.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -591,14 +591,6 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna, request):
591591
if data.dtype._is_numeric:
592592
mark = pytest.mark.xfail(reason="skew not implemented")
593593
request.applymarker(mark)
594-
elif (
595-
op_name in ["std", "sem"]
596-
and pa.types.is_date64(data._pa_array.type)
597-
and skipna
598-
):
599-
# overflow
600-
mark = pytest.mark.xfail(reason="Cannot cast")
601-
request.applymarker(mark)
602594
return super().test_reduce_frame(data, all_numeric_reductions, skipna)
603595

604596
@pytest.mark.parametrize("typ", ["int64", "uint64", "float64"])

pandas/tests/frame/methods/test_dtypes.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ def test_dtypes_timedeltas(self):
103103
)
104104
result = df.dtypes
105105
expected = Series(
106-
[np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")], index=list("AB")
106+
[np.dtype("datetime64[ns]"), np.dtype("timedelta64[us]")], index=list("AB")
107107
)
108108
tm.assert_series_equal(result, expected)
109109

@@ -112,7 +112,7 @@ def test_dtypes_timedeltas(self):
112112
expected = Series(
113113
[
114114
np.dtype("datetime64[ns]"),
115-
np.dtype("timedelta64[ns]"),
115+
np.dtype("timedelta64[us]"),
116116
np.dtype("datetime64[ns]"),
117117
],
118118
index=list("ABC"),
@@ -125,7 +125,7 @@ def test_dtypes_timedeltas(self):
125125
expected = Series(
126126
[
127127
np.dtype("datetime64[ns]"),
128-
np.dtype("timedelta64[ns]"),
128+
np.dtype("timedelta64[us]"),
129129
np.dtype("datetime64[ns]"),
130130
np.dtype("int64"),
131131
],

0 commit comments

Comments
 (0)