Skip to content

Commit 3ec3d6e

Browse files
jbrockmendelLakshya-Upadhyaya
authored andcommitted
REF: inline array_to_datetime64 cases, update tests (pandas-dev#63015)
1 parent 5641979 commit 3ec3d6e

File tree

13 files changed

+115
-107
lines changed

13 files changed

+115
-107
lines changed

doc/source/whatsnew/v3.0.0.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -811,6 +811,7 @@ Other Deprecations
811811
- Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`)
812812
- Deprecated support for the Dataframe Interchange Protocol (:issue:`56732`)
813813
- Deprecated the 'inplace' keyword from :meth:`Resampler.interpolate`, as passing ``True`` raises ``AttributeError`` (:issue:`58690`)
814+
- Deprecated the ``verify_integrity`` parameter in :meth:`DataFrame.set_index`; use ``obj.index.is_unique`` to check for duplicate labels instead. (:issue:`62919`)
814815

815816
.. ---------------------------------------------------------------------------
816817
.. _whatsnew_300.prior_deprecations:

pandas/_libs/tslibs/timedeltas.pyx

Lines changed: 63 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -333,58 +333,39 @@ cdef convert_to_timedelta64(object ts, str unit):
333333
334334
Handle these types of objects:
335335
- timedelta/Timedelta
336-
- timedelta64
337-
- an offset
338-
- np.int64 (with unit providing a possible modifier)
339-
- None/NaT
340336
341-
Return an ns based int64
337+
Return an timedelta64[ns] object
342338
"""
343339
# Caller is responsible for checking unit not in ["Y", "y", "M"]
344-
if checknull_with_nat_and_na(ts):
345-
return np.timedelta64(NPY_NAT, "ns")
346-
elif isinstance(ts, _Timedelta):
340+
if isinstance(ts, _Timedelta):
347341
# already in the proper format
348342
if ts._creso != NPY_FR_ns:
349343
ts = ts.as_unit("ns").asm8
350344
else:
351345
ts = np.timedelta64(ts._value, "ns")
352-
elif cnp.is_timedelta64_object(ts):
353-
ts = ensure_td64ns(ts)
354-
elif is_integer_object(ts):
355-
if ts == NPY_NAT:
356-
return np.timedelta64(NPY_NAT, "ns")
357-
else:
358-
ts = _maybe_cast_from_unit(ts, unit)
359-
elif is_float_object(ts):
360-
ts = _maybe_cast_from_unit(ts, unit)
361-
elif isinstance(ts, str):
362-
if (len(ts) > 0 and ts[0] == "P") or (len(ts) > 1 and ts[:2] == "-P"):
363-
ts = parse_iso_format_string(ts)
364-
else:
365-
ts = parse_timedelta_string(ts)
366-
ts = np.timedelta64(ts, "ns")
367-
elif is_tick_object(ts):
368-
ts = np.timedelta64(ts.nanos, "ns")
369346

370-
if PyDelta_Check(ts):
347+
elif PyDelta_Check(ts):
371348
ts = np.timedelta64(delta_to_nanoseconds(ts), "ns")
372349
elif not cnp.is_timedelta64_object(ts):
373350
raise TypeError(f"Invalid type for timedelta scalar: {type(ts)}")
374351
return ts.astype("timedelta64[ns]")
375352

376353

377-
cdef _maybe_cast_from_unit(ts, str unit):
354+
cdef _numeric_to_td64ns(object item, str unit):
378355
# caller is responsible for checking
379356
# assert unit not in ["Y", "y", "M"]
357+
# assert is_integer_object(item) or is_float_object(item)
358+
if is_integer_object(item) and item == NPY_NAT:
359+
return np.timedelta64(NPY_NAT, "ns")
360+
380361
try:
381-
ts = cast_from_unit(ts, unit)
362+
item = cast_from_unit(item, unit)
382363
except OutOfBoundsDatetime as err:
383364
raise OutOfBoundsTimedelta(
384-
f"Cannot cast {ts} from {unit} to 'ns' without overflow."
365+
f"Cannot cast {item} from {unit} to 'ns' without overflow."
385366
) from err
386367

387-
ts = np.timedelta64(ts, "ns")
368+
ts = np.timedelta64(item, "ns")
388369
return ts
389370

390371

@@ -408,10 +389,11 @@ def array_to_timedelta64(
408389
cdef:
409390
Py_ssize_t i, n = values.size
410391
ndarray result = np.empty((<object>values).shape, dtype="m8[ns]")
411-
object item
392+
object item, td64ns_obj
412393
int64_t ival
413394
cnp.broadcast mi = cnp.PyArray_MultiIterNew2(result, values)
414395
cnp.flatiter it
396+
str parsed_unit = parse_timedelta_unit(unit or "ns")
415397

416398
if values.descr.type_num != cnp.NPY_OBJECT:
417399
# raise here otherwise we segfault below
@@ -431,70 +413,63 @@ def array_to_timedelta64(
431413
)
432414
cnp.PyArray_ITER_NEXT(it)
433415

434-
# Usually, we have all strings. If so, we hit the fast path.
435-
# If this path fails, we try conversion a different way, and
436-
# this is where all of the error handling will take place.
437-
try:
438-
for i in range(n):
439-
# Analogous to: item = values[i]
440-
item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
416+
for i in range(n):
417+
item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
441418

442-
ival = _item_to_timedelta64_fastpath(item)
419+
try:
420+
if checknull_with_nat_and_na(item):
421+
ival = NPY_NAT
443422

444-
# Analogous to: iresult[i] = ival
445-
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
423+
elif cnp.is_timedelta64_object(item):
424+
td64ns_obj = ensure_td64ns(item)
425+
ival = cnp.get_timedelta64_value(td64ns_obj)
446426

447-
cnp.PyArray_MultiIter_NEXT(mi)
427+
elif isinstance(item, _Timedelta):
428+
if item._creso != NPY_FR_ns:
429+
ival = item.as_unit("ns")._value
430+
else:
431+
ival = item._value
432+
433+
elif PyDelta_Check(item):
434+
# i.e. isinstance(item, timedelta)
435+
ival = delta_to_nanoseconds(item)
436+
437+
elif isinstance(item, str):
438+
if (
439+
(len(item) > 0 and item[0] == "P")
440+
or (len(item) > 1 and item[:2] == "-P")
441+
):
442+
ival = parse_iso_format_string(item)
443+
else:
444+
ival = parse_timedelta_string(item)
448445

449-
except (TypeError, ValueError):
450-
cnp.PyArray_MultiIter_RESET(mi)
446+
elif is_tick_object(item):
447+
ival = item.nanos
451448

452-
parsed_unit = parse_timedelta_unit(unit or "ns")
453-
for i in range(n):
454-
item = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
449+
elif is_integer_object(item) or is_float_object(item):
450+
td64ns_obj = _numeric_to_td64ns(item, parsed_unit)
451+
ival = cnp.get_timedelta64_value(td64ns_obj)
455452

456-
ival = _item_to_timedelta64(item, parsed_unit, errors)
453+
else:
454+
raise TypeError(f"Invalid type for timedelta scalar: {type(item)}")
455+
456+
except ValueError as err:
457+
if errors == "coerce":
458+
ival = NPY_NAT
459+
elif "unit abbreviation w/o a number" in str(err):
460+
# re-raise with more pertinent message
461+
msg = f"Could not convert '{item}' to NumPy timedelta"
462+
raise ValueError(msg) from err
463+
else:
464+
raise
457465

458-
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
466+
(<int64_t*>cnp.PyArray_MultiIter_DATA(mi, 0))[0] = ival
459467

460-
cnp.PyArray_MultiIter_NEXT(mi)
468+
cnp.PyArray_MultiIter_NEXT(mi)
461469

462470
return result
463471

464472

465-
cdef int64_t _item_to_timedelta64_fastpath(object item) except? -1:
466-
"""
467-
See array_to_timedelta64.
468-
"""
469-
if item is NaT:
470-
# we allow this check in the fast-path because NaT is a C-object
471-
# so this is an inexpensive check
472-
return NPY_NAT
473-
else:
474-
return parse_timedelta_string(item)
475-
476-
477-
cdef int64_t _item_to_timedelta64(
478-
object item,
479-
str parsed_unit,
480-
str errors
481-
) except? -1:
482-
"""
483-
See array_to_timedelta64.
484-
"""
485-
try:
486-
return cnp.get_timedelta64_value(convert_to_timedelta64(item, parsed_unit))
487-
except ValueError as err:
488-
if errors == "coerce":
489-
return NPY_NAT
490-
elif "unit abbreviation w/o a number" in str(err):
491-
# re-raise with more pertinent message
492-
msg = f"Could not convert '{item}' to NumPy timedelta"
493-
raise ValueError(msg) from err
494-
else:
495-
raise
496-
497-
498473
@cython.cpow(True)
499474
cdef int64_t parse_timedelta_string(str ts) except? -1:
500475
"""
@@ -2154,12 +2129,14 @@ class Timedelta(_Timedelta):
21542129
new_value = delta_to_nanoseconds(value, reso=new_reso)
21552130
return cls._from_value_and_reso(new_value, reso=new_reso)
21562131

2132+
elif checknull_with_nat_and_na(value):
2133+
return NaT
2134+
21572135
elif is_integer_object(value) or is_float_object(value):
21582136
# unit=None is de-facto 'ns'
21592137
unit = parse_timedelta_unit(unit)
2160-
value = convert_to_timedelta64(value, unit)
2161-
elif checknull_with_nat_and_na(value):
2162-
return NaT
2138+
value = _numeric_to_td64ns(value, unit)
2139+
21632140
else:
21642141
raise ValueError(
21652142
"Value must be Timedelta, string, integer, "

pandas/core/frame.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6362,6 +6362,16 @@ def set_index(
63626362
2013 84
63636363
2014 31
63646364
"""
6365+
6366+
if verify_integrity:
6367+
warnings.warn(
6368+
"The verify_integrity keyword in DataFrame.set_index is deprecated "
6369+
"and will be removed in future version. "
6370+
"Check uniqueness with obj.index.is_unique instead.",
6371+
Pandas4Warning,
6372+
stacklevel=find_stack_level(),
6373+
)
6374+
63656375
inplace = validate_bool_kwarg(inplace, "inplace")
63666376
self._check_inplace_and_allows_duplicate_labels(inplace)
63676377
if not isinstance(keys, list):

pandas/tests/arithmetic/test_datetime64.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1011,7 +1011,9 @@ def test_dt64arr_sub_timestamp_tzaware(self, box_with_array):
10111011

10121012
ser = tm.box_expected(ser, box_with_array)
10131013

1014-
delta_series = Series([np.timedelta64(0, "D"), np.timedelta64(1, "D")])
1014+
delta_series = Series(
1015+
[np.timedelta64(0, "D"), np.timedelta64(1, "D")], dtype="m8[ns]"
1016+
)
10151017
expected = tm.box_expected(delta_series, box_with_array)
10161018

10171019
tm.assert_equal(ser - ts, expected)

pandas/tests/arithmetic/test_period.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,7 +1641,9 @@ def test_pi_sub_period(self):
16411641
result = np.subtract(Period("2012-01", freq="M"), idx)
16421642
tm.assert_index_equal(result, exp)
16431643

1644-
exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx")
1644+
exp = TimedeltaIndex(
1645+
[np.nan, np.nan, np.nan, np.nan], name="idx", dtype="m8[ns]"
1646+
)
16451647
result = idx - Period("NaT", freq="M")
16461648
tm.assert_index_equal(result, exp)
16471649
assert result.freq == exp.freq
@@ -1655,7 +1657,7 @@ def test_pi_sub_pdnat(self):
16551657
idx = PeriodIndex(
16561658
["2011-01", "2011-02", "NaT", "2011-04"], freq="M", name="idx"
16571659
)
1658-
exp = TimedeltaIndex([pd.NaT] * 4, name="idx")
1660+
exp = TimedeltaIndex([pd.NaT] * 4, name="idx", dtype="m8[ns]")
16591661
tm.assert_index_equal(pd.NaT - idx, exp)
16601662
tm.assert_index_equal(idx - pd.NaT, exp)
16611663

@@ -1674,6 +1676,8 @@ def test_pi_sub_period_nat(self):
16741676
exp = pd.Index([12 * off, pd.NaT, 10 * off, 9 * off], name="idx")
16751677
tm.assert_index_equal(result, exp)
16761678

1677-
exp = TimedeltaIndex([np.nan, np.nan, np.nan, np.nan], name="idx")
1679+
exp = TimedeltaIndex(
1680+
[np.nan, np.nan, np.nan, np.nan], name="idx", dtype="m8[ns]"
1681+
)
16781682
tm.assert_index_equal(idx - Period("NaT", freq="M"), exp)
16791683
tm.assert_index_equal(Period("NaT", freq="M") - idx, exp)

pandas/tests/arithmetic/test_timedelta64.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,7 @@ def test_operators_timedelta64(self):
850850
assert rs.dtype == "timedelta64[ns]"
851851

852852
df = DataFrame({"A": v1})
853-
td = Series([timedelta(days=i) for i in range(3)])
853+
td = Series([timedelta(days=i) for i in range(3)], dtype="m8[ns]")
854854
assert td.dtype == "timedelta64[ns]"
855855

856856
# series on the rhs
@@ -875,7 +875,9 @@ def test_operators_timedelta64(self):
875875

876876
# datetimes on rhs
877877
result = df["A"] - datetime(2001, 1, 1)
878-
expected = Series([timedelta(days=4017 + i) for i in range(3)], name="A")
878+
expected = Series(
879+
[timedelta(days=4017 + i) for i in range(3)], name="A", dtype="m8[ns]"
880+
)
879881
tm.assert_series_equal(result, expected)
880882
assert result.dtype == "m8[ns]"
881883

@@ -1559,7 +1561,7 @@ def test_tdi_rmul_arraylike(self, other, box_with_array):
15591561

15601562
def test_td64arr_mul_bool_scalar_raises(self, box_with_array):
15611563
# GH#58054
1562-
ser = Series(np.arange(5) * timedelta(hours=1))
1564+
ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]")
15631565
obj = tm.box_expected(ser, box_with_array)
15641566

15651567
msg = r"Cannot multiply 'timedelta64\[ns\]' by bool"
@@ -1582,7 +1584,7 @@ def test_td64arr_mul_bool_scalar_raises(self, box_with_array):
15821584
)
15831585
def test_td64arr_mul_bool_raises(self, dtype, box_with_array):
15841586
# GH#58054
1585-
ser = Series(np.arange(5) * timedelta(hours=1))
1587+
ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]")
15861588
obj = tm.box_expected(ser, box_with_array)
15871589

15881590
other = Series(np.arange(5) < 0.5, dtype=dtype)
@@ -1611,7 +1613,7 @@ def test_td64arr_mul_bool_raises(self, dtype, box_with_array):
16111613
],
16121614
)
16131615
def test_td64arr_mul_masked(self, dtype, box_with_array):
1614-
ser = Series(np.arange(5) * timedelta(hours=1))
1616+
ser = Series(np.arange(5) * timedelta(hours=1), dtype="m8[ns]")
16151617
obj = tm.box_expected(ser, box_with_array)
16161618

16171619
other = Series(np.arange(5), dtype=dtype)

pandas/tests/frame/methods/test_set_index.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
import numpy as np
1111
import pytest
1212

13+
from pandas.errors import Pandas4Warning
14+
1315
from pandas import (
1416
Categorical,
1517
CategoricalIndex,
@@ -542,6 +544,12 @@ def test_set_index_period(self):
542544
tm.assert_index_equal(df.index.get_level_values(1), idx2)
543545
tm.assert_index_equal(df.index.get_level_values(2), idx3)
544546

547+
def test_set_index_verify_integrity_deprecated(self):
548+
# GH#62919
549+
df = DataFrame({"A": [1, 2, 3], "B": [10, 20, 30]})
550+
with tm.assert_produces_warning(Pandas4Warning, match="verify_integrity"):
551+
df.set_index("A", verify_integrity=True)
552+
545553

546554
class TestSetIndexInvalid:
547555
def test_set_index_verify_integrity(self, frame_of_index_cols):

pandas/tests/frame/test_reductions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -747,12 +747,14 @@ def test_operators_timedelta64(self):
747747

748748
# works when only those columns are selected
749749
result = mixed[["A", "B"]].min(axis=1)
750-
expected = Series([timedelta(days=-1)] * 3)
750+
expected = Series([timedelta(days=-1)] * 3, dtype="m8[ns]")
751751
tm.assert_series_equal(result, expected)
752752

753753
result = mixed[["A", "B"]].min()
754754
expected = Series(
755-
[timedelta(seconds=5 * 60 + 5), timedelta(days=-1)], index=["A", "B"]
755+
[timedelta(seconds=5 * 60 + 5), timedelta(days=-1)],
756+
index=["A", "B"],
757+
dtype="m8[ns]",
756758
)
757759
tm.assert_series_equal(result, expected)
758760

pandas/tests/indexes/timedeltas/test_formats.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def test_repr_round_days_non_nano(self):
2222

2323
@pytest.mark.parametrize("method", ["__repr__", "__str__"])
2424
def test_representation(self, method):
25-
idx1 = TimedeltaIndex([], freq="D")
25+
idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]")
2626
idx2 = TimedeltaIndex(["1 days"], freq="D")
2727
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
2828
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")
@@ -53,7 +53,7 @@ def test_representation(self, method):
5353

5454
# TODO: this is a Series.__repr__ test
5555
def test_representation_to_series(self):
56-
idx1 = TimedeltaIndex([], freq="D")
56+
idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]")
5757
idx2 = TimedeltaIndex(["1 days"], freq="D")
5858
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
5959
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")
@@ -83,7 +83,7 @@ def test_representation_to_series(self):
8383

8484
def test_summary(self):
8585
# GH#9116
86-
idx1 = TimedeltaIndex([], freq="D")
86+
idx1 = TimedeltaIndex([], freq="D", dtype="m8[ns]")
8787
idx2 = TimedeltaIndex(["1 days"], freq="D")
8888
idx3 = TimedeltaIndex(["1 days", "2 days"], freq="D")
8989
idx4 = TimedeltaIndex(["1 days", "2 days", "3 days"], freq="D")

0 commit comments

Comments
 (0)