pandas-dev
diff --git a/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.pre-commit-config.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎asv_bench/benchmarks/strings.py‎
Lines changed: 4 additions & 3 deletions b/‎asv_bench/benchmarks/strings.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎environment.yml‎
Lines changed: 1 addition & 1 deletion b/‎environment.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pandas/_libs/src/vendored/ujson/python/objToJSON.c‎
Lines changed: 38 additions & 10 deletions b/‎pandas/_libs/src/vendored/ujson/python/objToJSON.c‎
Lines changed: 38 additions & 10 deletions
diff --git a/‎pandas/_libs/tslibs/dtypes.pyi‎
Lines changed: 40 additions & 37 deletions b/‎pandas/_libs/tslibs/dtypes.pyi‎
Lines changed: 40 additions & 37 deletions
diff --git a/‎pandas/_testing/__init__.py‎
Lines changed: 2 additions & 1 deletion b/‎pandas/_testing/__init__.py‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎pandas/_testing/_io.py‎
Lines changed: 8 additions & 4 deletions b/‎pandas/_testing/_io.py‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎pandas/core/_numba/executor.py‎
Lines changed: 5 additions & 1 deletion b/‎pandas/core/_numba/executor.py‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎pandas/core/algorithms.py‎
Lines changed: 7 additions & 4 deletions b/‎pandas/core/algorithms.py‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎pandas/core/apply.py‎
Lines changed: 3 additions & 2 deletions b/‎pandas/core/apply.py‎
Lines changed: 3 additions & 2 deletions
@@ -121,7 +121,7 @@ repos:
         types: [python]
         stages: [manual]
         additional_dependencies: &pyright_dependencies
-        - pyright@1.1.383
+        - pyright@1.1.404
     -   id: pyright
         # note: assumes python env is setup and activated
         name: pyright reportGeneralTypeIssues
 
@@ -8,6 +8,7 @@
     DataFrame,
     Index,
     Series,
+    StringDtype,
 )
 from pandas.arrays import StringArray
 
@@ -290,10 +291,10 @@ def setup(self):
         self.series_arr_nan = np.concatenate([self.series_arr, np.array([NA] * 1000)])
 
     def time_string_array_construction(self):
-        StringArray(self.series_arr)
+        StringArray(self.series_arr, dtype=StringDtype())
 
     def time_string_array_with_nan_construction(self):
-        StringArray(self.series_arr_nan)
+        StringArray(self.series_arr_nan, dtype=StringDtype())
 
     def peakmem_stringarray_construction(self):
-        StringArray(self.series_arr)
+        StringArray(self.series_arr, dtype=StringDtype())
@@ -77,7 +77,7 @@ dependencies:
 
   # code checks
   - flake8=7.1.0  # run in subprocess over docstring examples
-  - mypy=1.13.0  # pre-commit uses locally installed mypy
+  - mypy=1.17.1  # pre-commit uses locally installed mypy
   - tokenize-rt  # scripts/check_for_inconsistent_pandas_namespace.py
   - pre-commit>=4.2.0
 
 
@@ -51,6 +51,8 @@ Numeric decoder derived from TCL library
 #include <numpy/ndarraytypes.h>
 #include <numpy/npy_math.h>
 
+static const int CSTR_SIZE = 20;
+
 npy_int64 get_nat(void) { return NPY_MIN_INT64; }
 
 typedef const char *(*PFN_PyTypeToUTF8)(JSOBJ obj, JSONTypeContext *ti,
@@ -106,7 +108,7 @@ typedef struct __TypeContext {
   double doubleValue;
   JSINT64 longValue;
 
-  const char *cStr;
+  char *cStr;
   NpyArrContext *npyarr;
   PdBlockContext *pdblock;
   int transpose;
@@ -347,7 +349,8 @@ static const char *PyDateTimeToIsoCallback(JSOBJ obj, JSONTypeContext *tc,
   }
 
   NPY_DATETIMEUNIT base = ((PyObjectEncoder *)tc->encoder)->datetimeUnit;
-  return PyDateTimeToIso(obj, base, len);
+  GET_TC(tc)->cStr = PyDateTimeToIso(obj, base, len);
+  return GET_TC(tc)->cStr;
 }
 
 static const char *PyTimeToJSON(JSOBJ _obj, JSONTypeContext *tc,
@@ -1007,16 +1010,24 @@ static const char *List_iterGetName(JSOBJ Py_UNUSED(obj),
 //=============================================================================
 static void Index_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   GET_TC(tc)->index = 0;
+  GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
 }
 
 static int Index_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
   if (index == 0) {
-    GET_TC(tc)->cStr = "name";
+    strcpy(GET_TC(tc)->cStr, "name");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
   } else if (index == 1) {
-    GET_TC(tc)->cStr = "data";
+    strcpy(GET_TC(tc)->cStr, "data");
     GET_TC(tc)->itemValue = get_values(obj);
     if (!GET_TC(tc)->itemValue) {
       return 0;
@@ -1049,19 +1060,27 @@ static void Series_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
   GET_TC(tc)->index = 0;
   enc->outputFormat = VALUES; // for contained series
+  GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
 }
 
 static int Series_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
   if (index == 0) {
-    GET_TC(tc)->cStr = "name";
+    strcpy(GET_TC(tc)->cStr, "name");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "name");
   } else if (index == 1) {
-    GET_TC(tc)->cStr = "index";
+    strcpy(GET_TC(tc)->cStr, "index");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
   } else if (index == 2) {
-    GET_TC(tc)->cStr = "data";
+    strcpy(GET_TC(tc)->cStr, "data");
     GET_TC(tc)->itemValue = get_values(obj);
     if (!GET_TC(tc)->itemValue) {
       return 0;
@@ -1096,19 +1115,27 @@ static void DataFrame_iterBegin(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
   PyObjectEncoder *enc = (PyObjectEncoder *)tc->encoder;
   GET_TC(tc)->index = 0;
   enc->outputFormat = VALUES; // for contained series & index
+  GET_TC(tc)->cStr = PyObject_Malloc(CSTR_SIZE);
+  if (!GET_TC(tc)->cStr) {
+    PyErr_NoMemory();
+  }
 }
 
 static int DataFrame_iterNext(JSOBJ obj, JSONTypeContext *tc) {
   const Py_ssize_t index = GET_TC(tc)->index;
   Py_XDECREF(GET_TC(tc)->itemValue);
+  if (!GET_TC(tc)->cStr) {
+    return 0;
+  }
+
   if (index == 0) {
-    GET_TC(tc)->cStr = "columns";
+    strcpy(GET_TC(tc)->cStr, "columns");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "columns");
   } else if (index == 1) {
-    GET_TC(tc)->cStr = "index";
+    strcpy(GET_TC(tc)->cStr, "index");
     GET_TC(tc)->itemValue = PyObject_GetAttrString(obj, "index");
   } else if (index == 2) {
-    GET_TC(tc)->cStr = "data";
+    strcpy(GET_TC(tc)->cStr, "data");
     Py_INCREF(obj);
     GET_TC(tc)->itemValue = obj;
   } else {
@@ -1880,6 +1907,7 @@ static void Object_endTypeContext(JSOBJ Py_UNUSED(obj), JSONTypeContext *tc) {
     GET_TC(tc)->rowLabels = NULL;
     NpyArr_freeLabels(GET_TC(tc)->columnLabels, GET_TC(tc)->columnLabelsLen);
     GET_TC(tc)->columnLabels = NULL;
+    PyObject_Free(GET_TC(tc)->cStr);
     GET_TC(tc)->cStr = NULL;
     PyObject_Free(tc->prv);
     tc->prv = NULL;
 
@@ -28,33 +28,35 @@ class PeriodDtypeBase:
     def _td64_unit(self) -> str: ...
 
 class FreqGroup(Enum):
-    FR_ANN: int
-    FR_QTR: int
-    FR_MTH: int
-    FR_WK: int
-    FR_BUS: int
-    FR_DAY: int
-    FR_HR: int
-    FR_MIN: int
-    FR_SEC: int
-    FR_MS: int
-    FR_US: int
-    FR_NS: int
-    FR_UND: int
+    _value_: int
+    FR_ANN = ...
+    FR_QTR = ...
+    FR_MTH = ...
+    FR_WK = ...
+    FR_BUS = ...
+    FR_DAY = ...
+    FR_HR = ...
+    FR_MIN = ...
+    FR_SEC = ...
+    FR_MS = ...
+    FR_US = ...
+    FR_NS = ...
+    FR_UND = ...
     @staticmethod
     def from_period_dtype_code(code: int) -> FreqGroup: ...
 
 class Resolution(Enum):
-    RESO_NS: int
-    RESO_US: int
-    RESO_MS: int
-    RESO_SEC: int
-    RESO_MIN: int
-    RESO_HR: int
-    RESO_DAY: int
-    RESO_MTH: int
-    RESO_QTR: int
-    RESO_YR: int
+    _value_: int
+    RESO_NS = ...
+    RESO_US = ...
+    RESO_MS = ...
+    RESO_SEC = ...
+    RESO_MIN = ...
+    RESO_HR = ...
+    RESO_DAY = ...
+    RESO_MTH = ...
+    RESO_QTR = ...
+    RESO_YR = ...
     def __lt__(self, other: Resolution) -> bool: ...
     def __ge__(self, other: Resolution) -> bool: ...
     @property
@@ -67,17 +69,18 @@ class Resolution(Enum):
     def attr_abbrev(self) -> str: ...
 
 class NpyDatetimeUnit(Enum):
-    NPY_FR_Y: int
-    NPY_FR_M: int
-    NPY_FR_W: int
-    NPY_FR_D: int
-    NPY_FR_h: int
-    NPY_FR_m: int
-    NPY_FR_s: int
-    NPY_FR_ms: int
-    NPY_FR_us: int
-    NPY_FR_ns: int
-    NPY_FR_ps: int
-    NPY_FR_fs: int
-    NPY_FR_as: int
-    NPY_FR_GENERIC: int
+    _value_: int
+    NPY_FR_Y = ...
+    NPY_FR_M = ...
+    NPY_FR_W = ...
+    NPY_FR_D = ...
+    NPY_FR_h = ...
+    NPY_FR_m = ...
+    NPY_FR_s = ...
+    NPY_FR_ms = ...
+    NPY_FR_us = ...
+    NPY_FR_ns = ...
+    NPY_FR_ps = ...
+    NPY_FR_fs = ...
+    NPY_FR_as = ...
+    NPY_FR_GENERIC = ...
@@ -348,8 +348,9 @@ class SubclassedDataFrame(DataFrame):
     def _constructor(self):
         return lambda *args, **kwargs: SubclassedDataFrame(*args, **kwargs)
 
+    # error: Cannot override writeable attribute with read-only property
     @property
-    def _constructor_sliced(self):
+    def _constructor_sliced(self):  # type: ignore[override]
         return lambda *args, **kwargs: SubclassedSeries(*args, **kwargs)
 
 
 
@@ -81,12 +81,12 @@ def round_trip_pathlib(writer, reader, path: str | None = None):
     if path is None:
         path = "___pathlib___"
     with ensure_clean(path) as path:
-        writer(Path(path))  # type: ignore[arg-type]
-        obj = reader(Path(path))  # type: ignore[arg-type]
+        writer(Path(path))
+        obj = reader(Path(path))
     return obj
 
 
-def write_to_compressed(compression, path, data, dest: str = "test") -> None:
+def write_to_compressed(compression, path: str, data, dest: str = "test") -> None:
     """
     Write data to a compressed file.
 
@@ -138,5 +138,9 @@ def write_to_compressed(compression, path, data, dest: str = "test") -> None:
     else:
         raise ValueError(f"Unrecognized compression type: {compression}")
 
-    with compress_method(path, mode=mode) as f:
+    # error: No overload variant of "ZipFile" matches argument types "str", "str"
+    # error: No overload variant of "BZ2File" matches argument types "str", "str"
+    # error: Argument "mode" to "TarFile" has incompatible type "str";
+    #  expected "Literal['r', 'a', 'w', 'x']
+    with compress_method(path, mode=mode) as f:  # type: ignore[call-overload, arg-type]
         getattr(f, method)(*args)
@@ -87,7 +87,11 @@ def column_looper(
     else:
 
         @numba.jit(nopython=nopython, nogil=nogil, parallel=parallel)
-        def column_looper(
+        # error: Incompatible redefinition (redefinition with type
+        # "Callable[[ndarray[Any, Any], ndarray[Any, Any], ndarray[Any, Any],
+        # int, VarArg(Any)], Any]", original type "Callable[[ndarray[Any, Any],
+        # ndarray[Any, Any], int, int, VarArg(Any)], Any]")
+        def column_looper(  # type: ignore[misc]
             values: np.ndarray,
             start: np.ndarray,
             end: np.ndarray,
 
@@ -216,7 +216,7 @@ def _reconstruct_data(
         values = cls._from_sequence(values, dtype=dtype)  # type: ignore[assignment]
 
     else:
-        values = values.astype(dtype, copy=False)  # type: ignore[assignment]
+        values = values.astype(dtype, copy=False)
 
     return values
 
@@ -904,7 +904,10 @@ def value_counts_internal(
                 .size()
             )
             result.index.names = values.names
-            counts = result._values
+            # error: Incompatible types in assignment (expression has type
+            # "ndarray[Any, Any] | DatetimeArray | TimedeltaArray | PeriodArray | Any",
+            # variable has type "ndarray[tuple[int, ...], dtype[Any]]")
+            counts = result._values  # type: ignore[assignment]
 
         else:
             values = _ensure_arraylike(values, func_name="value_counts")
@@ -1311,7 +1314,7 @@ def searchsorted(
 _diff_special = {"float64", "float32", "int64", "int32", "int16", "int8"}
 
 
-def diff(arr, n: int, axis: AxisInt = 0):
+def diff(arr, n: int | float | np.integer | np.floating, axis: AxisInt = 0):
     """
     difference of n between self,
     analogous to s-s.shift(n)
@@ -1400,7 +1403,7 @@ def diff(arr, n: int, axis: AxisInt = 0):
     if arr.dtype.name in _diff_special:
         # TODO: can diff_2d dtype specialization troubles be fixed by defining
         #  out_arr inside diff_2d?
-        algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta)
+        algos.diff_2d(arr, out_arr, int(n), axis, datetimelike=is_timedelta)
     else:
         # To keep mypy happy, _res_indexer is a list while res_indexer is
         #  a tuple, ditto for lag_indexer.
 
@@ -1645,7 +1645,7 @@ def agg_or_apply_dict_like(
         assert op_name in ["agg", "apply"]
 
         obj = self.obj
-        kwargs = {}
+        kwargs: dict[str, Any] = {}
         if op_name == "apply":
             by_row = "_compat" if self.by_row else False
             kwargs.update({"by_row": by_row})
@@ -2012,7 +2012,8 @@ def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]:
     for aggfunc in aggfuncs:
         if com.get_callable_name(aggfunc) == "<lambda>":
             aggfunc = partial(aggfunc)
-            aggfunc.__name__ = f"<lambda_{i}>"
+            # error: "partial[Any]" has no attribute "__name__"; maybe "__new__"?
+            aggfunc.__name__ = f"<lambda_{i}>"  # type: ignore[attr-defined]
             i += 1
         mangled_aggfuncs.append(aggfunc)