Skip to content

Commit a0fc10b

Browse files
authored
Merge branch 'pandas-dev:main' into test-fix-56853
2 parents 9e4fdc0 + b9da662 commit a0fc10b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+493
-384
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ repos:
121121
types: [python]
122122
stages: [manual]
123123
additional_dependencies: &pyright_dependencies
124-
- pyright@1.1.383
124+
- pyright@1.1.404
125125
- id: pyright
126126
# note: assumes python env is setup and activated
127127
name: pyright reportGeneralTypeIssues

asv_bench/benchmarks/strings.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
DataFrame,
99
Index,
1010
Series,
11+
StringDtype,
1112
)
1213
from pandas.arrays import StringArray
1314

@@ -290,10 +291,10 @@ def setup(self):
290291
self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
291292

292293
def time_string_array_construction(self):
293-
StringArray(self.series_arr)
294+
StringArray(self.series_arr, dtype=StringDtype())
294295

295296
def time_string_array_with_nan_construction(self):
296-
StringArray(self.series_arr_nan)
297+
StringArray(self.series_arr_nan, dtype=StringDtype())
297298

298299
def peakmem_stringarray_construction(self):
299-
StringArray(self.series_arr)
300+
StringArray(self.series_arr, dtype=StringDtype())

environment.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ dependencies:
7777

7878
# code checks
7979
- flake8=7.1.0 # run in subprocess over docstring examples
80-
- mypy=1.13.0 # pre-commit uses locally installed mypy
80+
- mypy=1.17.1 # pre-commit uses locally installed mypy
8181
- tokenize-rt # scripts/check_for_inconsistent_pandas_namespace.py
8282
- pre-commit>=4.2.0
8383

pandas/_libs/src/vendored/ujson/python/objToJSON.c

Lines changed: 38 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ Numeric decoder derived from TCL library
5151
#include <numpy/ndarraytypes.h>
5252
#include <numpy/npy_math.h>
5353

54+
static const int CSTR_SIZE = 20;
55+
5456
npy_int64 get_nat(void) { return NPY_MIN_INT64; }
5557

5658
typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
@@ -106,7 +108,7 @@ typedef struct __TypeContext {
106108
double doubleValue;
107109
JSINT64 longValue;
108110

109-
const char *cStr;
111+
char *cStr;
110112
NpyArrContext *npyarr;
111113
PdBlockContext *pdblock;
112114
int transpose;
@@ -347,7 +349,8 @@ static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
347349
}
348350

349351
NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
350-
return PyDateTimeToIso(obj, base, len);
352+
GET_TC(tc)->cStr = PyDateTimeToIso(obj, base, len);
353+
return GET_TC(tc)->cStr;
351354
}
352355

353356
static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
@@ -1007,16 +1010,24 @@ static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
10071010
//=============================================================================
10081011
static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10091012
GET_TC(tc)->index = 0;
1013+
GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
1014+
if (!GET_TC(tc)->cStr) {
1015+
PyErr_NoMemory();
1016+
}
10101017
}
10111018

10121019
static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
10131020
const Py_ssize_t index = GET_TC(tc)->index;
10141021
Py_XDECREF(GET_TC(tc)->itemValue);
1022+
if (!GET_TC(tc)->cStr) {
1023+
return 0;
1024+
}
1025+
10151026
if (index == 0) {
1016-
GET_TC(tc)->cStr = "name";
1027+
strcpy(GET_TC(tc)->cStr, "name");
10171028
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
10181029
} else if (index == 1) {
1019-
GET_TC(tc)->cStr = "data";
1030+
strcpy(GET_TC(tc)->cStr, "data");
10201031
GET_TC(tc)->itemValue = get_values(obj);
10211032
if (!GET_TC(tc)->itemValue) {
10221033
return 0;
@@ -1049,19 +1060,27 @@ static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10491060
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
10501061
GET_TC(tc)->index = 0;
10511062
enc->outputFormat = VALUES; // for contained series
1063+
GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
1064+
if (!GET_TC(tc)->cStr) {
1065+
PyErr_NoMemory();
1066+
}
10521067
}
10531068

10541069
static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
10551070
const Py_ssize_t index = GET_TC(tc)->index;
10561071
Py_XDECREF(GET_TC(tc)->itemValue);
1072+
if (!GET_TC(tc)->cStr) {
1073+
return 0;
1074+
}
1075+
10571076
if (index == 0) {
1058-
GET_TC(tc)->cStr = "name";
1077+
strcpy(GET_TC(tc)->cStr, "name");
10591078
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
10601079
} else if (index == 1) {
1061-
GET_TC(tc)->cStr = "index";
1080+
strcpy(GET_TC(tc)->cStr, "index");
10621081
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
10631082
} else if (index == 2) {
1064-
GET_TC(tc)->cStr = "data";
1083+
strcpy(GET_TC(tc)->cStr, "data");
10651084
GET_TC(tc)->itemValue = get_values(obj);
10661085
if (!GET_TC(tc)->itemValue) {
10671086
return 0;
@@ -1096,19 +1115,27 @@ static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
10961115
PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
10971116
GET_TC(tc)->index = 0;
10981117
enc->outputFormat = VALUES; // for contained series & index
1118+
GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
1119+
if (!GET_TC(tc)->cStr) {
1120+
PyErr_NoMemory();
1121+
}
10991122
}
11001123

11011124
static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
11021125
const Py_ssize_t index = GET_TC(tc)->index;
11031126
Py_XDECREF(GET_TC(tc)->itemValue);
1127+
if (!GET_TC(tc)->cStr) {
1128+
return 0;
1129+
}
1130+
11041131
if (index == 0) {
1105-
GET_TC(tc)->cStr = "columns";
1132+
strcpy(GET_TC(tc)->cStr, "columns");
11061133
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
11071134
} else if (index == 1) {
1108-
GET_TC(tc)->cStr = "index";
1135+
strcpy(GET_TC(tc)->cStr, "index");
11091136
GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
11101137
} else if (index == 2) {
1111-
GET_TC(tc)->cStr = "data";
1138+
strcpy(GET_TC(tc)->cStr, "data");
11121139
Py_INCREF(obj);
11131140
GET_TC(tc)->itemValue = obj;
11141141
} else {
@@ -1880,6 +1907,7 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
18801907
GET_TC(tc)->rowLabels = NULL;
18811908
NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
18821909
GET_TC(tc)->columnLabels = NULL;
1910+
PyObject_Free(GET_TC(tc)->cStr);
18831911
GET_TC(tc)->cStr = NULL;
18841912
PyObject_Free(tc->prv);
18851913
tc->prv = NULL;

pandas/_libs/tslibs/dtypes.pyi

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -28,33 +28,35 @@ class PeriodDtypeBase:
2828
def _td64_unit(self) -> str: ...
2929

3030
class FreqGroup(Enum):
31-
FR_ANN: int
32-
FR_QTR: int
33-
FR_MTH: int
34-
FR_WK: int
35-
FR_BUS: int
36-
FR_DAY: int
37-
FR_HR: int
38-
FR_MIN: int
39-
FR_SEC: int
40-
FR_MS: int
41-
FR_US: int
42-
FR_NS: int
43-
FR_UND: int
31+
_value_: int
32+
FR_ANN = ...
33+
FR_QTR = ...
34+
FR_MTH = ...
35+
FR_WK = ...
36+
FR_BUS = ...
37+
FR_DAY = ...
38+
FR_HR = ...
39+
FR_MIN = ...
40+
FR_SEC = ...
41+
FR_MS = ...
42+
FR_US = ...
43+
FR_NS = ...
44+
FR_UND = ...
4445
@staticmethod
4546
def from_period_dtype_code(code: int) -> FreqGroup: ...
4647

4748
class Resolution(Enum):
48-
RESO_NS: int
49-
RESO_US: int
50-
RESO_MS: int
51-
RESO_SEC: int
52-
RESO_MIN: int
53-
RESO_HR: int
54-
RESO_DAY: int
55-
RESO_MTH: int
56-
RESO_QTR: int
57-
RESO_YR: int
49+
_value_: int
50+
RESO_NS = ...
51+
RESO_US = ...
52+
RESO_MS = ...
53+
RESO_SEC = ...
54+
RESO_MIN = ...
55+
RESO_HR = ...
56+
RESO_DAY = ...
57+
RESO_MTH = ...
58+
RESO_QTR = ...
59+
RESO_YR = ...
5860
def __lt__(self, other: Resolution) -> bool: ...
5961
def __ge__(self, other: Resolution) -> bool: ...
6062
@property
@@ -67,17 +69,18 @@ class Resolution(Enum):
6769
def attr_abbrev(self) -> str: ...
6870

6971
class NpyDatetimeUnit(Enum):
70-
NPY_FR_Y: int
71-
NPY_FR_M: int
72-
NPY_FR_W: int
73-
NPY_FR_D: int
74-
NPY_FR_h: int
75-
NPY_FR_m: int
76-
NPY_FR_s: int
77-
NPY_FR_ms: int
78-
NPY_FR_us: int
79-
NPY_FR_ns: int
80-
NPY_FR_ps: int
81-
NPY_FR_fs: int
82-
NPY_FR_as: int
83-
NPY_FR_GENERIC: int
72+
_value_: int
73+
NPY_FR_Y = ...
74+
NPY_FR_M = ...
75+
NPY_FR_W = ...
76+
NPY_FR_D = ...
77+
NPY_FR_h = ...
78+
NPY_FR_m = ...
79+
NPY_FR_s = ...
80+
NPY_FR_ms = ...
81+
NPY_FR_us = ...
82+
NPY_FR_ns = ...
83+
NPY_FR_ps = ...
84+
NPY_FR_fs = ...
85+
NPY_FR_as = ...
86+
NPY_FR_GENERIC = ...

pandas/_testing/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -348,8 +348,9 @@ class SubclassedDataFrame(DataFrame):
348348
def _constructor(self):
349349
return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
350350

351+
# error: Cannot override writeable attribute with read-only property
351352
@property
352-
def _constructor_sliced(self):
353+
def _constructor_sliced(self): # type: ignore[override]
353354
return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
354355

355356

pandas/_testing/_io.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,12 +81,12 @@ def round_trip_pathlib(writer, reader, path: str | None = None):
8181
if path is None:
8282
path = "___pathlib___"
8383
with ensure_clean(path) as path:
84-
writer(Path(path)) # type: ignore[arg-type]
85-
obj = reader(Path(path)) # type: ignore[arg-type]
84+
writer(Path(path))
85+
obj = reader(Path(path))
8686
return obj
8787

8888

89-
def write_to_compressed(compression, path, data, dest: str = "test") -> None:
89+
def write_to_compressed(compression, path: str, data, dest: str = "test") -> None:
9090
"""
9191
Write data to a compressed file.
9292
@@ -138,5 +138,9 @@ def write_to_compressed(compression, path, data, dest: str = "test") -> None:
138138
else:
139139
raise ValueError(f"Unrecognized compression type: {compression}")
140140

141-
with compress_method(path, mode=mode) as f:
141+
# error: No overload variant of "ZipFile" matches argument types "str", "str"
142+
# error: No overload variant of "BZ2File" matches argument types "str", "str"
143+
# error: Argument "mode" to "TarFile" has incompatible type "str";
144+
# expected "Literal['r', 'a', 'w', 'x']
145+
with compress_method(path, mode=mode) as f: # type: ignore[call-overload, arg-type]
142146
getattr(f, method)(*args)

pandas/core/_numba/executor.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,11 @@ def column_looper(
8787
else:
8888

8989
@numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
90-
def column_looper(
90+
# error: Incompatible redefinition (redefinition with type
91+
# "Callable[[ndarray[Any, Any], ndarray[Any, Any], ndarray[Any, Any],
92+
# int, VarArg(Any)], Any]", original type "Callable[[ndarray[Any, Any],
93+
# ndarray[Any, Any], int, int, VarArg(Any)], Any]")
94+
def column_looper( # type: ignore[misc]
9195
values: np.ndarray,
9296
start: np.ndarray,
9397
end: np.ndarray,

pandas/core/algorithms.py

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,7 @@ def _reconstruct_data(
216216
values = cls._from_sequence(values, dtype=dtype) # type: ignore[assignment]
217217

218218
else:
219-
values = values.astype(dtype, copy=False) # type: ignore[assignment]
219+
values = values.astype(dtype, copy=False)
220220

221221
return values
222222

@@ -904,7 +904,10 @@ def value_counts_internal(
904904
.size()
905905
)
906906
result.index.names = values.names
907-
counts = result._values
907+
# error: Incompatible types in assignment (expression has type
908+
# "ndarray[Any, Any] | DatetimeArray | TimedeltaArray | PeriodArray | Any",
909+
# variable has type "ndarray[tuple[int, ...], dtype[Any]]")
910+
counts = result._values # type: ignore[assignment]
908911

909912
else:
910913
values = _ensure_arraylike(values, func_name="value_counts")
@@ -1311,7 +1314,7 @@ def searchsorted(
13111314
_diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"}
13121315

13131316

1314-
def diff(arr, n: int, axis: AxisInt = 0):
1317+
def diff(arr, n: int | float | np.integer | np.floating, axis: AxisInt = 0):
13151318
"""
13161319
difference of n between self,
13171320
analogous to s-s.shift(n)
@@ -1400,7 +1403,7 @@ def diff(arr, n: int, axis: AxisInt = 0):
14001403
if arr.dtype.name in _diff_special:
14011404
# TODO: can diff_2d dtype specialization troubles be fixed by defining
14021405
# out_arr inside diff_2d?
1403-
algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta)
1406+
algos.diff_2d(arr, out_arr, int(n), axis, datetimelike=is_timedelta)
14041407
else:
14051408
# To keep mypy happy, _res_indexer is a list while res_indexer is
14061409
# a tuple, ditto for lag_indexer.

pandas/core/apply.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,7 +1645,7 @@ def agg_or_apply_dict_like(
16451645
assert op_name in ["agg", "apply"]
16461646

16471647
obj = self.obj
1648-
kwargs = {}
1648+
kwargs: dict[str, Any] = {}
16491649
if op_name == "apply":
16501650
by_row = "_compat" if self.by_row else False
16511651
kwargs.update({"by_row": by_row})
@@ -2012,7 +2012,8 @@ def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]:
20122012
for aggfunc in aggfuncs:
20132013
if com.get_callable_name(aggfunc) == "<lambda>":
20142014
aggfunc = partial(aggfunc)
2015-
aggfunc.__name__ = f"<lambda_{i}>"
2015+
# error: "partial[Any]" has no attribute "__name__"; maybe "__new__"?
2016+
aggfunc.__name__ = f"<lambda_{i}>" # type: ignore[attr-defined]
20162017
i += 1
20172018
mangled_aggfuncs.append(aggfunc)
20182019

0 commit comments

Comments
 (0)