Skip to content

Commit 0815d44

Browse files
GH1409 Improve Series.to_numpy typing (#1476)
* GH1409 Improve Series.to_numpy typing * GH1409 PR Feedback * GH1409 PR Feedback * GH1409 PR Feedback
1 parent 71d53e8 commit 0815d44

File tree

4 files changed

+164
-12
lines changed

4 files changed

+164
-12
lines changed

pandas-stubs/_typing.pyi

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,14 +863,17 @@ np_ndarray_num: TypeAlias = np_ndarray[ShapeT, NpNumT]
863863

864864
# Numpy arrays with known shape (Do not use as argument types, only as return types)
865865
np_1darray: TypeAlias = np.ndarray[tuple[int], np.dtype[GenericT]]
866+
np_1darray_str: TypeAlias = np_1darray[np.str_]
867+
np_1darray_bytes: TypeAlias = np_1darray[np.bytes_]
868+
np_1darray_complex: TypeAlias = np_1darray[np.complexfloating]
869+
np_1darray_object: TypeAlias = np_1darray[np.object_]
866870
np_1darray_bool: TypeAlias = np_1darray[np.bool]
867871
np_1darray_intp: TypeAlias = np_1darray[np.intp]
868872
np_1darray_int64: TypeAlias = np_1darray[np.int64]
869873
np_1darray_anyint: TypeAlias = np_1darray[np.integer]
870874
np_1darray_float: TypeAlias = np_1darray[np.floating]
871875
np_1darray_dt: TypeAlias = np_1darray[np.datetime64]
872876
np_1darray_td: TypeAlias = np_1darray[np.timedelta64]
873-
874877
np_2darray: TypeAlias = np.ndarray[tuple[int, int], np.dtype[GenericT]]
875878

876879
DtypeNp = TypeVar("DtypeNp", bound=np.dtype[np.generic])

pandas-stubs/core/series.pyi

Lines changed: 58 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -220,10 +220,16 @@ from pandas._typing import (
220220
WriteBuffer,
221221
_T_co,
222222
np_1darray,
223+
np_1darray_anyint,
223224
np_1darray_bool,
225+
np_1darray_bytes,
226+
np_1darray_complex,
224227
np_1darray_dt,
228+
np_1darray_float,
225229
np_1darray_int64,
226230
np_1darray_intp,
231+
np_1darray_object,
232+
np_1darray_str,
227233
np_1darray_td,
228234
np_ndarray,
229235
np_ndarray_anyint,
@@ -4469,7 +4475,7 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
44694475
copy: bool = False,
44704476
na_value: Scalar = ...,
44714477
**kwargs: Any,
4472-
) -> np_1darray[np.object_]: ...
4478+
) -> np_1darray_object: ...
44734479
@overload
44744480
def to_numpy(
44754481
self: Series[Period],
@@ -4485,23 +4491,23 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
44854491
copy: bool = False,
44864492
na_value: Scalar = ...,
44874493
**kwargs: Any,
4488-
) -> np_1darray[np.object_]: ...
4494+
) -> np_1darray_object: ...
44894495
@overload
44904496
def to_numpy(
44914497
self: Series[BaseOffset],
44924498
dtype: type[np.bytes_],
44934499
copy: bool = False,
44944500
na_value: Scalar = ...,
44954501
**kwargs: Any,
4496-
) -> np_1darray[np.bytes_]: ...
4502+
) -> np_1darray_bytes: ...
44974503
@overload
44984504
def to_numpy(
44994505
self: Series[Interval],
45004506
dtype: type[np.object_] | None = None,
45014507
copy: bool = False,
45024508
na_value: Scalar = ...,
45034509
**kwargs: Any,
4504-
) -> np_1darray[np.object_]: ...
4510+
) -> np_1darray_object: ...
45054511
@overload
45064512
def to_numpy(
45074513
self: Series[Interval],
@@ -4511,6 +4517,54 @@ class Series(IndexOpsMixin[S1], ElementOpsMixin[S1], NDFrame):
45114517
**kwargs: Any,
45124518
) -> np_1darray[_T_INTERVAL_NP]: ...
45134519
@overload
4520+
def to_numpy(
4521+
self: Series[int],
4522+
dtype: DTypeLike | None = None,
4523+
copy: bool = False,
4524+
na_value: Scalar = ...,
4525+
**kwargs: Any,
4526+
) -> np_1darray_anyint: ...
4527+
@overload
4528+
def to_numpy(
4529+
self: Series[float],
4530+
dtype: DTypeLike | None = None,
4531+
copy: bool = False,
4532+
na_value: Scalar = ...,
4533+
**kwargs: Any,
4534+
) -> np_1darray_float: ...
4535+
@overload
4536+
def to_numpy(
4537+
self: Series[complex],
4538+
dtype: DTypeLike | None = None,
4539+
copy: bool = False,
4540+
na_value: Scalar = ...,
4541+
**kwargs: Any,
4542+
) -> np_1darray_complex: ...
4543+
@overload
4544+
def to_numpy(
4545+
self: Series[bool],
4546+
dtype: DTypeLike | None = None,
4547+
copy: bool = False,
4548+
na_value: Scalar = ...,
4549+
**kwargs: Any,
4550+
) -> np_1darray_bool: ...
4551+
@overload
4552+
def to_numpy(
4553+
self: Series[_str],
4554+
dtype: DTypeLike | None = None,
4555+
copy: bool = False,
4556+
na_value: Scalar = ...,
4557+
**kwargs: Any,
4558+
) -> np_1darray_str: ...
4559+
@overload
4560+
def to_numpy(
4561+
self: Series[bytes],
4562+
dtype: DTypeLike | None = None,
4563+
copy: bool = False,
4564+
na_value: Scalar = ...,
4565+
**kwargs: Any,
4566+
) -> np_1darray_bytes: ...
4567+
@overload
45144568
def to_numpy( # pyright: ignore[reportIncompatibleMethodOverride]
45154569
self,
45164570
dtype: DTypeLike | None = None,

tests/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,14 @@
4747
np_1darray as np_1darray,
4848
np_1darray_anyint as np_1darray_anyint,
4949
np_1darray_bool as np_1darray_bool,
50+
np_1darray_bytes as np_1darray_bytes,
51+
np_1darray_complex as np_1darray_complex,
5052
np_1darray_dt as np_1darray_dt,
5153
np_1darray_float as np_1darray_float,
5254
np_1darray_int64 as np_1darray_int64,
5355
np_1darray_intp as np_1darray_intp,
56+
np_1darray_object as np_1darray_object,
57+
np_1darray_str as np_1darray_str,
5458
np_1darray_td as np_1darray_td,
5559
np_2darray as np_2darray,
5660
np_ndarray as np_ndarray,
@@ -67,7 +71,11 @@
6771
_S = TypeVar("_S", bound=tuple[int, ...])
6872
# Separately define here so pytest works
6973
np_1darray: TypeAlias = np.ndarray[tuple[int], np.dtype[_G]]
70-
np_1darray_bool: TypeAlias = np_1darray[np.bool]
74+
np_1darray_bool: TypeAlias = np.ndarray[tuple[int], np.bool_]
75+
np_1darray_str: TypeAlias = np.ndarray[tuple[int], np.str_]
76+
np_1darray_bytes: TypeAlias = np.ndarray[tuple[int], np.bytes_]
77+
np_1darray_complex: TypeAlias = np.ndarray[tuple[int], np.complexfloating]
78+
np_1darray_object: TypeAlias = np.ndarray[tuple[int], np.object_]
7179
np_1darray_intp: TypeAlias = np_1darray[np.intp]
7280
np_1darray_int64: TypeAlias = np_1darray[np.int64]
7381
np_1darray_anyint: TypeAlias = np_1darray[np.integer]

tests/series/test_series.py

Lines changed: 93 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
)
4848
import xarray as xr
4949

50+
from pandas._libs.tslibs.offsets import Day
5051
from pandas._typing import (
5152
DtypeObj,
5253
Scalar,
@@ -61,7 +62,15 @@
6162
check,
6263
ensure_clean,
6364
np_1darray,
65+
np_1darray_anyint,
6466
np_1darray_bool,
67+
np_1darray_bytes,
68+
np_1darray_complex,
69+
np_1darray_dt,
70+
np_1darray_float,
71+
np_1darray_object,
72+
np_1darray_str,
73+
np_1darray_td,
6574
np_ndarray_num,
6675
pytest_warns_bounded,
6776
)
@@ -1994,16 +2003,94 @@ def test_dtype_type() -> None:
19942003

19952004
def test_types_to_numpy() -> None:
19962005
s = pd.Series(["a", "b", "c"], dtype=str)
1997-
check(assert_type(s.to_numpy(), np_1darray), np_1darray)
1998-
check(assert_type(s.to_numpy(dtype="str", copy=True), np_1darray), np_1darray)
1999-
check(assert_type(s.to_numpy(na_value=0), np_1darray), np_1darray)
2000-
check(assert_type(s.to_numpy(na_value=np.int32(4)), np_1darray), np_1darray)
2001-
check(assert_type(s.to_numpy(na_value=np.float16(4)), np_1darray), np_1darray)
2002-
check(assert_type(s.to_numpy(na_value=np.complex128(4, 7)), np_1darray), np_1darray)
2006+
check(assert_type(s.to_numpy(), np_1darray[np.str_]), np_1darray)
2007+
check(
2008+
assert_type(s.to_numpy(dtype="str", copy=True), np_1darray[np.str_]), np_1darray
2009+
)
2010+
check(assert_type(s.to_numpy(na_value=0), np_1darray[np.str_]), np_1darray)
2011+
check(
2012+
assert_type(s.to_numpy(na_value=np.int32(4)), np_1darray[np.str_]), np_1darray
2013+
)
2014+
check(
2015+
assert_type(s.to_numpy(na_value=np.float16(4)), np_1darray[np.str_]), np_1darray
2016+
)
2017+
check(
2018+
assert_type(s.to_numpy(na_value=np.complex128(4, 7)), np_1darray[np.str_]),
2019+
np_1darray,
2020+
)
20032021

20042022
check(assert_type(pd.Series().to_numpy(), np_1darray), np_1darray)
20052023

20062024

2025+
def test_to_numpy() -> None:
2026+
"""Test Series.to_numpy for different types."""
2027+
s_str = pd.Series(["a", "b", "c"], dtype=str)
2028+
check(assert_type(s_str.to_numpy(), np_1darray_str), np_1darray, str)
2029+
2030+
s_bytes = pd.Series(["a", "b", "c"]).astype(bytes)
2031+
check(assert_type(s_bytes.to_numpy(), np_1darray_bytes), np_1darray, np.bytes_)
2032+
2033+
s_bool = pd.Series([True, False])
2034+
check(assert_type(s_bool.to_numpy(), np_1darray_bool), np_1darray, np.bool_)
2035+
2036+
s_int = pd.Series([2, 3, 4])
2037+
check(assert_type(s_int.to_numpy(), np_1darray_anyint), np_1darray, np.integer)
2038+
2039+
s_float = pd.Series([2.0, 3.54, 4.84])
2040+
check(
2041+
assert_type(s_float.to_numpy(), np_1darray_float),
2042+
np_1darray,
2043+
np.floating,
2044+
)
2045+
2046+
s_complex = pd.Series([2.0 + 2j, 3.54 + 4j, 4.84])
2047+
check(
2048+
assert_type(s_complex.to_numpy(), np_1darray_complex),
2049+
np_1darray,
2050+
np.complexfloating,
2051+
)
2052+
2053+
dates = pd.Series(
2054+
[
2055+
pd.Timestamp("2020-01-01"),
2056+
pd.Timestamp("2020-01-15"),
2057+
pd.Timestamp("2020-02-01"),
2058+
],
2059+
dtype="datetime64[ns]",
2060+
)
2061+
s_period = pd.PeriodIndex(dates, freq="M").to_series()
2062+
check(assert_type(s_period.to_numpy(), np_1darray_object), np_1darray, pd.Period)
2063+
2064+
s_interval = pd.Series(
2065+
[
2066+
pd.Interval(date, date + pd.DateOffset(days=1), closed="left")
2067+
for date in dates
2068+
]
2069+
)
2070+
check(
2071+
assert_type(s_interval.to_numpy(), np_1darray_object), np_1darray, pd.Interval
2072+
)
2073+
2074+
s_day = pd.Series([Day(1)])
2075+
check(assert_type(s_day.to_numpy(), np_1darray_object), np_1darray, Day)
2076+
2077+
s_date = pd.Series(pd.date_range(start="2017-01-01", end="2017-02-01"))
2078+
check(
2079+
assert_type(s_date.to_numpy(), np_1darray_dt),
2080+
np_1darray,
2081+
np.datetime64,
2082+
)
2083+
2084+
s_timedelta = pd.Series(
2085+
[pd.Timestamp.now().date(), pd.Timestamp.now().date()]
2086+
).diff()
2087+
check(
2088+
assert_type(s_timedelta.to_numpy(), np_1darray_td),
2089+
np_1darray,
2090+
np.timedelta64,
2091+
)
2092+
2093+
20072094
def test_where() -> None:
20082095
s = pd.Series([1, 2, 3], dtype=int)
20092096

0 commit comments

Comments
 (0)